tachibot-mcp 2.19.2 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,13 @@ All notable changes to TachiBot MCP will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.19.3] - 2026-03-21
9
+
10
+ ### Fixed
11
+ - **Section header regex** — now matches mixed case + optional dashes (works across all providers, not just Gemini)
12
+ - **Planner 5/1 bug** — `parsePlanSteps` now matches `### Task [T-ID]:` format (was only matching `### Step N:`)
13
+ - **Planner mismatch warning** — surfaces parse failures instead of masking with `Math.max`
14
+
8
15
  ## [2.19.2] - 2026-03-21
9
16
 
10
17
  ### Added
@@ -4,6 +4,7 @@ import { sessionLogger } from "./session/session-logger.js";
4
4
  import { sessionManager } from "./session/session-manager.js";
5
5
  import { ToolRouter } from "./tools/tool-router.js";
6
6
  import { getMemoryManager } from "./memory/index.js";
7
+ import { GROK_MODELS } from "./config/model-constants.js";
7
8
  import { VisualizationService } from "./orchestrators/collaborative/services/visualization/VisualizationService.js";
8
9
  import { ToolExecutionService } from "./orchestrators/collaborative/services/tool-execution/ToolExecutionService.js";
9
10
  export class CollaborativeOrchestrator {
@@ -229,12 +230,11 @@ export class CollaborativeOrchestrator {
229
230
  this.modelPreferences = { ...this.modelPreferences, ...preferences };
230
231
  }
231
232
  /**
232
- * Use Grok 4 Heavy for all Grok operations
233
+ * Use Grok 4.20 Multi-Agent for all Grok operations
233
234
  */
234
235
  useGrok4Heavy() {
235
- // Use GROK_4_0709 as the "heavy" model (reasoning model)
236
- this.modelPreferences['grok'] = 'grok-4-0709';
237
- this.modelPreferences['grok-4'] = 'grok-4-0709';
236
+ this.modelPreferences['grok'] = GROK_MODELS._4_20_MULTI_AGENT;
237
+ this.modelPreferences['grok-4'] = GROK_MODELS._4_20_MULTI_AGENT;
238
238
  }
239
239
  /**
240
240
  * Generate session ID
@@ -59,10 +59,14 @@ export const PERPLEXITY_MODELS = {
59
59
  SONAR_PRO: "sonar-pro", // Advanced search
60
60
  SONAR_REASONING: "sonar-reasoning-pro", // Reasoning model (expensive - avoid)
61
61
  };
62
- // Grok Models (xAI) - Updated 2025-11-22 with correct API model names
62
+ // Grok Models (xAI) - Updated 2026-04-10 with Grok 4.20 (Mar 2026)
63
63
  export const GROK_MODELS = {
64
- // Grok 4.1 models (Nov 2025) - LATEST & BEST
65
- _4_1_FAST_REASONING: "grok-4-1-fast-reasoning", // Latest: 2M context, $0.20/$0.50, enhanced reasoning
64
+ // Grok 4.20 models (Mar 10, 2026) - FLAGSHIP
65
+ _4_20_REASONING: "grok-4.20-0309-reasoning", // Flagship: 2M context, $2/$6, low hallucination
66
+ _4_20_NON_REASONING: "grok-4.20-0309-non-reasoning", // Standard: 2M context, $2/$6
67
+ _4_20_MULTI_AGENT: "grok-4.20-multi-agent-0309", // Multi-agent: 4-16 agents via reasoning.effort, $2/$6
68
+ // Grok 4.1 fast models (Nov 2025) - BEST VALUE (10x cheaper)
69
+ _4_1_FAST_REASONING: "grok-4-1-fast-reasoning", // Fast reasoning: 2M context, $0.20/$0.50
66
70
  _4_1_FAST_NON_REASONING: "grok-4-1-fast-non-reasoning", // Tool-calling optimized: 2M context, $0.20/$0.50
67
71
  // Grok 4 fast models (2025) - Still good
68
72
  CODE_FAST: "grok-code-fast-1", // Coding specialist: 256K→2M, $0.20/$1.50, 92 tok/sec
@@ -153,12 +157,12 @@ export const CURRENT_MODELS = {
153
157
  premium: OPENAI_MODELS.PRO, // Expert mode (gpt-5.4-pro - higher compute)
154
158
  },
155
159
  grok: {
156
- reason: GROK_MODELS._4_1_FAST_REASONING,
157
- code: GROK_MODELS._4_1_FAST_NON_REASONING,
158
- debug: GROK_MODELS._4_1_FAST_NON_REASONING,
159
- brainstorm: GROK_MODELS._4_1_FAST_REASONING,
160
- search: GROK_MODELS._4_1_FAST_REASONING,
161
- architect: GROK_MODELS._4_1_FAST_REASONING,
160
+ reason: GROK_MODELS._4_20_REASONING, // grok-4.20-0309-reasoning (flagship, low hallucination)
161
+ code: GROK_MODELS._4_20_NON_REASONING, // grok-4.20 non-reasoning (flagship quality, tool-calling)
162
+ debug: GROK_MODELS._4_20_NON_REASONING, // grok-4.20 non-reasoning (low hallucination for debugging)
163
+ brainstorm: GROK_MODELS._4_20_NON_REASONING, // grok-4.20-0309-non-reasoning (2M context)
164
+ search: GROK_MODELS._4_20_REASONING, // grok-4.20 LOW HALLUCINATION - critical for search
165
+ architect: GROK_MODELS._4_20_MULTI_AGENT, // grok-4.20-multi-agent-0309 (4-16 agent swarm)
162
166
  },
163
167
  gemini: {
164
168
  default: GEMINI_MODELS.GEMINI_3_PRO,
@@ -331,6 +335,9 @@ export const MODEL_DISPLAY_NAMES = {
331
335
  "gemini-3-flash-preview": "gemini-3-flash",
332
336
  "gemini-3.1-flash-lite": "gemini-3.1-flash-lite",
333
337
  // Grok (xAI)
338
+ "grok-4.20-0309-reasoning": "grok-4.20",
339
+ "grok-4.20-0309-non-reasoning": "grok-4.20-fast",
340
+ "grok-4.20-multi-agent-0309": "grok-4.20-multi",
334
341
  "grok-4-1-fast-reasoning": "grok-4.1",
335
342
  "grok-4-1-fast-non-reasoning": "grok-4.1-fast",
336
343
  "grok-4-fast-reasoning": "grok-4",
@@ -370,7 +377,10 @@ export const MODEL_PRICING = {
370
377
  "gemini-3.1-pro-preview": 0.007, // ($2 + $12) / 2 / 1000
371
378
  "gemini-3-flash-preview": 0.00175, // ($0.50 + $3) / 2 / 1000
372
379
  "gemini-3.1-flash-lite": 0.001, // Cheapest/fastest in 3.1 series (Mar 2026)
373
- // Grok - all cheap!
380
+ // Grok
381
+ "grok-4.20-0309-reasoning": 0.004, // ($2 + $6) / 2 / 1000
382
+ "grok-4.20-0309-non-reasoning": 0.004, // ($2 + $6) / 2 / 1000
383
+ "grok-4.20-multi-agent-0309": 0.004, // ($2 + $6) / 2 / 1000
374
384
  "grok-4-1-fast-reasoning": 0.00035,
375
385
  "grok-4-1-fast-non-reasoning": 0.00035,
376
386
  "grok-4-fast-reasoning": 0.00035,
@@ -21,7 +21,7 @@ const MODELS = {
21
21
  OPENAI: OPENAI_MODELS.THINKING, // gpt-5.4 (default - most capable)
22
22
  OPENAI_REASON: OPENAI_MODELS.THINKING, // gpt-5.4 (deep reasoning)
23
23
  // xAI Grok
24
- GROK: GROK_MODELS._4_1_FAST_REASONING, // grok-4-1-fast-reasoning
24
+ GROK: GROK_MODELS._4_20_REASONING, // grok-4.20-0309-reasoning
25
25
  // Perplexity
26
26
  PERPLEXITY: PERPLEXITY_MODELS.SONAR, // sonar (cheapest)
27
27
  PERPLEXITY_REASON: PERPLEXITY_MODELS.SONAR_REASONING, // sonar-reasoning-pro ($2/$8 per M)
@@ -112,12 +112,12 @@ export const SMART_TIMEOUT_DEFAULTS = {
112
112
  max: 90000 // 90 seconds
113
113
  },
114
114
  grok: {
115
- base: 30000, // 30 seconds
116
- max: 90000 // 90 seconds
115
+ base: 30000, // 30 seconds - 4.1 fast models are quick
116
+ max: 120000 // 2 minutes - 4.20 reasoning models need more
117
117
  },
118
118
  openai: {
119
- base: 20000, // 20 seconds
120
- max: 60000 // 60 seconds
119
+ base: 60000, // 60 seconds - GPT-5.4 reasoning needs more time
120
+ max: 180000 // 3 minutes - high/xhigh reasoning effort
121
121
  },
122
122
  anthropic: {
123
123
  base: 20000, // 20 seconds
@@ -128,7 +128,7 @@ export function getAvailableModels(config) {
128
128
  models.push('sonar-pro', 'sonar-reasoning-pro', 'sonar-deep-research');
129
129
  }
130
130
  if (config.apiKeys.grok) {
131
- models.push('grok-3', 'grok-3-fast', 'grok-4-0709');
131
+ models.push('grok-3', 'grok-4.20-0309-reasoning', 'grok-4.20-multi-agent-0309');
132
132
  }
133
133
  if (config.apiKeys.openrouter) {
134
134
  models.push('qwen3-coder', 'qwq-32b', 'qwen3-32b');
@@ -17,8 +17,8 @@ export class Architect {
17
17
  },
18
18
  specialized_verification: {
19
19
  models: {
20
- 'syntax_error': 'gpt-4-mini',
21
- 'type_error': 'gpt-4-mini',
20
+ 'syntax_error': 'gpt-5.4-mini',
21
+ 'type_error': 'gpt-5.4-mini',
22
22
  'algorithmic_complexity': 'qwq-32b',
23
23
  'performance_issue': 'qwq-32b',
24
24
  'architectural_smell': 'claude-opus-4.1',
@@ -28,7 +28,7 @@ export class Architect {
28
28
  'design_pattern_violation': 'claude-opus-4.1',
29
29
  'memory_leak': 'qwq-32b',
30
30
  'race_condition': 'claude-opus-4.1',
31
- 'code_duplication': 'gpt-4-mini',
31
+ 'code_duplication': 'gpt-5.4-mini',
32
32
  'circular_dependency': 'claude-opus-4.1'
33
33
  },
34
34
  dynamicTokens: {
@@ -269,7 +269,7 @@ export class Architect {
269
269
  'claude-opus-4.1': 10,
270
270
  'qwq-32b': 8,
271
271
  'perplexity-reasoning': 7,
272
- 'gpt-4-mini': 5
272
+ 'gpt-5.4-mini': 5
273
273
  };
274
274
  return priorities[model] || 5;
275
275
  }
@@ -1,5 +1,6 @@
1
1
  import { ModelRouter } from '../workflows/model-router.js';
2
2
  import { getScoutModels, getDefaultModels } from '../config/model-defaults.js';
3
+ import { GROK_MODELS } from '../config/model-constants.js';
3
4
  import { getGrokApiKey } from '../utils/api-keys.js';
4
5
  import { createProgressStream } from '../utils/progress-stream.js';
5
6
  import { providerRouter } from '../utils/provider-router.js';
@@ -530,7 +531,7 @@ export class Scout {
530
531
  const { callGrokEnhanced } = await import('../tools/grok-enhanced.js');
531
532
  const messages = [{ role: 'user', content: query }];
532
533
  const result = await callGrokEnhanced(messages, {
533
- model: 'grok-4-0709',
534
+ model: GROK_MODELS._4_20_REASONING,
534
535
  maxTokens,
535
536
  enableLiveSearch: options?.enableLiveSearch ?? true,
536
537
  searchSources: options?.maxSources ?? 100,
@@ -56,7 +56,7 @@ export class CostMonitor extends EventEmitter {
56
56
  "claude-3.5-sonnet",
57
57
  { model: "claude-3.5-sonnet", inputCost: 0.003, outputCost: 0.015 },
58
58
  ],
59
- ["grok-4", { model: "grok-4", inputCost: 0.005, outputCost: 0.015 }],
59
+ ["grok-4.20-0309-reasoning", { model: "grok-4.20-0309-reasoning", inputCost: 0.002, outputCost: 0.006 }],
60
60
  ]);
61
61
  // Clean up old records periodically
62
62
  setInterval(() => this.cleanupOldRecords(), 60 * 60 * 1000); // Every hour
@@ -82,7 +82,7 @@ modelProviderRegistry.registerMany([
82
82
  { modelName: "qwq", toolName: "qwq_reason", provider: "openrouter" },
83
83
  // Grok models
84
84
  { modelName: "grok", toolName: "grok_reason", provider: "x.ai" },
85
- { modelName: "grok-4", toolName: "grok_reason", provider: "x.ai", aliases: ["grok-4-0709"] },
85
+ { modelName: "grok-4.20", toolName: "grok_reason", provider: "x.ai", aliases: ["grok-4.20-0309-reasoning", "grok-4.20-multi-agent-0309"] },
86
86
  // Claude models
87
87
  { modelName: "claude", toolName: "think", provider: "anthropic", aliases: ["claude-code", "reasoning", "analysis"] },
88
88
  // Gemini models (all use gemini-3.1-pro-preview for RAW POWER)
@@ -75,6 +75,7 @@ import { isGeminiAvailable, geminiBrainstormTool, geminiAnalyzeCodeTool } from "
75
75
  import { isOpenRouterAvailable } from "./tools/openrouter-tools.js";
76
76
  import { getTachiTools } from "./tools/tachi-tool.js";
77
77
  import { getPromptTechniqueTools } from "./tools/prompt-technique-tools.js";
78
+ import { withParamAliases } from "./utils/param-aliases.js";
78
79
  // import { registerGPT5Tools, isGPT5Available } from "./tools/openai-gpt5-fixed.js"; // DISABLED - using regular openai-tools.ts
79
80
  import { initializeOptimizations } from "./optimization/index.js";
80
81
  import { FocusModeRegistry } from "./application/services/focus/FocusModeRegistry.js";
@@ -117,6 +118,9 @@ function safeAddTool(tool) {
117
118
  if (!isToolEnabled(tool.name)) {
118
119
  return; // Skip disabled tools silently (logging handled by isToolEnabled)
119
120
  }
121
+ // Auto-alias common param names (query/problem/prompt/question/topic)
122
+ // so LLMs can use any synonym and the tool still works
123
+ tool = withParamAliases(tool);
120
124
  if (!registeredTools.has(tool.name)) {
121
125
  // Wrap execute with usage tracking
122
126
  const originalExecute = tool.execute;
@@ -19,23 +19,9 @@ config({ path: path.resolve(__dirname, '../../../.env') });
19
19
  const GROK_API_KEY = getGrokApiKey();
20
20
  const GROK_API_URL = "https://api.x.ai/v1/chat/completions";
21
21
  const GROK_RESPONSES_URL = "https://api.x.ai/v1/responses"; // New Agent Tools API endpoint (Jan 2025)
22
- // Grok models - Updated 2025-11-22 with correct API model names
23
- export var GrokModel;
24
- (function (GrokModel) {
25
- // Grok 4.1 models (Nov 2025) - LATEST & BEST (verified working)
26
- GrokModel["GROK_4_1_FAST_REASONING"] = "grok-4-1-fast-reasoning";
27
- GrokModel["GROK_4_1_FAST"] = "grok-4-1-fast-non-reasoning";
28
- // Grok 4 fast models (2025) - Still good
29
- GrokModel["CODE_FAST"] = "grok-code-fast-1";
30
- GrokModel["GROK_4_FAST_REASONING"] = "grok-4-fast-reasoning";
31
- GrokModel["GROK_4_FAST"] = "grok-4-fast-non-reasoning";
32
- // Expensive/specialized (use sparingly)
33
- GrokModel["GROK_4_HEAVY"] = "grok-4-0709";
34
- GrokModel["GROK_3"] = "grok-3";
35
- // Beta/experimental (deprecated)
36
- GrokModel["GROK_BETA"] = "grok-beta";
37
- GrokModel["GROK_VISION_BETA"] = "grok-vision-beta";
38
- })(GrokModel || (GrokModel = {}));
22
+ // Unified GrokModel enum - single source of truth in grok-tools.ts
23
+ import { GrokModel } from './grok-tools.js';
24
+ export { GrokModel };
39
25
  /**
40
26
  * Enhanced Grok API call with live search support
41
27
  */
@@ -45,7 +31,7 @@ export async function callGrokEnhanced(messages, options = {}) {
45
31
  content: `[Grok API key not configured. Add GROK_API_KEY or XAI_API_KEY to .env file]`
46
32
  };
47
33
  }
48
- const { model = GrokModel.GROK_4_1_FAST_REASONING, // Updated 2025-11-22: Use latest Grok 4.1 by default
34
+ const { model = GrokModel.GROK_4_20_REASONING, // Updated: Use Grok 4.20 by default
49
35
  temperature = 0.7, maxTokens = options.useHeavy ? 100000 : 4000, enableLiveSearch = false, searchSources = 100, // Default to 100 sources for cost control
50
36
  searchDomains = [], structuredOutput = false } = options;
51
37
  try {
@@ -54,7 +40,7 @@ export async function callGrokEnhanced(messages, options = {}) {
54
40
  // NEW Agent Tools API (Jan 2025) - uses /v1/responses endpoint
55
41
  // with 'input' instead of 'messages' and tools array
56
42
  const searchRequestBody = {
57
- model: GrokModel.GROK_4_1_FAST, // Tool-calling optimized model for agentic search
43
+ model: GrokModel.GROK_4_20_NON_REASONING, // 4.20 standard is better for tool-calling search
58
44
  input: messages.map(m => ({ role: m.role, content: m.content })),
59
45
  tools: [
60
46
  { type: "web_search" },
@@ -187,9 +173,9 @@ ${FORMAT_INSTRUCTION}`
187
173
  content: query
188
174
  }
189
175
  ];
190
- log?.info(`Grok Scout: ${variant} research with ${enableLiveSearch ? 'live search' : 'knowledge base'} (using grok-4-1-fast-reasoning with enhanced reasoning)`);
176
+ log?.info(`Grok Scout: ${variant} research with ${enableLiveSearch ? 'live search' : 'knowledge base'} (using grok-4.20 reasoning)`);
191
177
  const result = await callGrokEnhanced(messages, {
192
- model: GrokModel.GROK_4_1_FAST_REASONING, // Updated 2025-11-21: Use latest Grok 4.1
178
+ model: GrokModel.GROK_4_20_REASONING, // 4.20 for low hallucination research
193
179
  enableLiveSearch,
194
180
  searchSources,
195
181
  searchDomains,
@@ -255,7 +241,7 @@ ${FORMAT_INSTRUCTION}`
255
241
  const costInfo = useHeavy ? '$3/$15 (expensive!)' : '$0.20/$0.50 (latest!)';
256
242
  log?.info(`Using ${modelName} (${approach}) with ${enableLiveSearch ? 'live search' : 'knowledge base'} - Cost: ${costInfo}`);
257
243
  const result = await callGrokEnhanced(messages, {
258
- model: useHeavy ? GrokModel.GROK_4_HEAVY : GrokModel.GROK_4_1_FAST_REASONING, // Updated 2025-11-21: Use latest Grok 4.1
244
+ model: useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_REASONING,
259
245
  useHeavy,
260
246
  enableLiveSearch,
261
247
  searchSources: 50,
@@ -307,7 +293,7 @@ export const grokFunctionTool = {
307
293
  ];
308
294
  // Make request with tools
309
295
  const requestBody = {
310
- model: args.useHeavy ? GrokModel.GROK_4_HEAVY : GrokModel.GROK_4_1_FAST, // Updated 2025-11-22: Use tool-calling optimized Grok 4.1 Fast Non-Reasoning
296
+ model: args.useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_NON_REASONING,
311
297
  messages,
312
298
  tools,
313
299
  tool_choice: "auto", // Let Grok decide when to call functions
@@ -373,13 +359,13 @@ ${FORMAT_INSTRUCTION}`
373
359
  content: `Search for: ${query}`
374
360
  }
375
361
  ];
376
- log?.info(`Grok Search: ${max_search_results} sources, recency: ${recency} (using grok-4-1-fast-reasoning with enhanced reasoning)`);
362
+ log?.info(`Grok Search: ${max_search_results} sources, recency: ${recency} (using grok-4.20 reasoning)`);
377
363
  // Extract domains from sources if specified
378
364
  const domains = sources
379
365
  ?.filter((s) => s.allowed_websites)
380
366
  ?.flatMap((s) => s.allowed_websites) || [];
381
367
  const result = await callGrokEnhanced(messages, {
382
- model: GrokModel.GROK_4_1_FAST_REASONING, // Updated 2025-11-21: Use latest Grok 4.1 with search
368
+ model: GrokModel.GROK_4_20_REASONING, // Low hallucination is CRITICAL for search accuracy
383
369
  enableLiveSearch: true,
384
370
  searchSources: max_search_results,
385
371
  searchDomains: domains,
@@ -417,11 +403,11 @@ export function isGrokAvailable() {
417
403
  export function getGrokStatus() {
418
404
  return {
419
405
  available: isGrokAvailable(),
420
- model: GrokModel.GROK_4_1_FAST_REASONING,
406
+ model: "grok-4.20-0309-reasoning",
421
407
  features: [
422
- 'Grok 4.1 Fast Reasoning (Nov 2025): Enhanced reasoning, creativity & emotional intelligence ($0.20/$0.50, 2M context)',
423
- 'Grok 4.1 Fast Non-Reasoning: Tool-calling optimized, agentic workflows ($0.20/$0.50, 2M context)',
424
- 'Heavy mode available (grok-4-0709: $3/$15, use sparingly)',
408
+ 'Grok 4.20 Reasoning (grok-4.20-0309-reasoning): Flagship, low hallucination, 2M context ($2/$6)',
409
+ 'Grok 4.20 Non-Reasoning (grok-4.20-0309-non-reasoning): Tool-calling optimized, agentic workflows ($2/$6)',
410
+ 'Grok 4.20 Multi-Agent (grok-4.20-multi-agent-0309): 4-16 parallel agents ($2/$6)',
425
411
  'Live web search with citations',
426
412
  'Function calling',
427
413
  'Structured outputs',
@@ -21,10 +21,14 @@ config({ path: path.resolve(__dirname, '../../../.env') });
21
21
  // Grok API configuration
22
22
  const GROK_API_KEY = getGrokApiKey();
23
23
  const GROK_API_URL = "https://api.x.ai/v1/chat/completions";
24
- // Available Grok models - Updated 2025-11-22 with correct API model names
24
+ // Available Grok models - Updated 2026-04-10 with Grok 4.20 (Mar 2026)
25
25
  export var GrokModel;
26
26
  (function (GrokModel) {
27
- // Grok 4.1 models (Nov 2025) - LATEST & BEST (verified working)
27
+ // Grok 4.20 models (Mar 10, 2026) - FLAGSHIP
28
+ GrokModel["GROK_4_20_REASONING"] = "grok-4.20-0309-reasoning";
29
+ GrokModel["GROK_4_20_NON_REASONING"] = "grok-4.20-0309-non-reasoning";
30
+ GrokModel["GROK_4_20_MULTI_AGENT"] = "grok-4.20-multi-agent-0309";
31
+ // Grok 4.1 fast models (Nov 2025) - BEST VALUE (10x cheaper)
28
32
  GrokModel["GROK_4_1_FAST_REASONING"] = "grok-4-1-fast-reasoning";
29
33
  GrokModel["GROK_4_1_FAST"] = "grok-4-1-fast-non-reasoning";
30
34
  // Grok 4 fast models (2025) - Still good
@@ -42,9 +46,8 @@ export var GrokModel;
42
46
  * - 'code-analysis': Relaxed for code analysis tools
43
47
  * - 'llm-orchestration': Medium for LLM-to-LLM calls
44
48
  */
45
- export async function callGrok(messages, model = GrokModel.GROK_4_1_FAST_REASONING, // Updated 2025-11-22: Use latest Grok 4.1 by default
46
- temperature = 0.7, maxTokens = 16384, // Increased default for comprehensive responses
47
- forceVisibleOutput = true, validationContext = 'llm-orchestration') {
49
+ export async function callGrok(messages, model = GrokModel.GROK_4_20_REASONING, temperature = 0.7, maxTokens = 16384, // Increased default for comprehensive responses
50
+ forceVisibleOutput = true, validationContext = 'llm-orchestration', reasoningEffort) {
48
51
  // Try OpenRouter gateway first if enabled
49
52
  if (isGatewayEnabled()) {
50
53
  const gatewayResult = await tryOpenRouterGateway(model, messages, {
@@ -68,34 +71,43 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
68
71
  }
69
72
  return { ...msg, content: validation.sanitized };
70
73
  });
74
+ // Grok 4.x reasoning can take 60-90s; 4.20 and multi-agent can take longer
75
+ const isReasoning = model.includes('reasoning') || model.includes('multi-agent');
76
+ const is420 = model.includes('4.20');
77
+ const timeoutMs = is420 ? 180000 : (isReasoning ? 120000 : 60000);
71
78
  try {
72
- // For Grok 4 models, we need to handle reasoning tokens specially
73
- const isGrok4 = model === GrokModel.GROK_4_1_FAST_REASONING ||
74
- model === GrokModel.GROK_4_1_FAST ||
75
- model === GrokModel.GROK_4_FAST_REASONING ||
76
- model === GrokModel.GROK_4_FAST ||
77
- model === GrokModel.GROK_4_HEAVY;
78
- // Adjust prompt for Grok 4 to ensure visible output
79
- if (isGrok4 && forceVisibleOutput) {
79
+ // For Grok 4+ models, we need to handle reasoning tokens specially
80
+ const isGrok4Plus = model.includes('grok-4');
81
+ // Adjust prompt for Grok 4+ to ensure visible output
82
+ if (isGrok4Plus && forceVisibleOutput) {
80
83
  const lastMessage = validatedMessages[validatedMessages.length - 1];
81
84
  if (lastMessage.role === 'user') {
82
85
  lastMessage.content += '\n\nProvide a detailed response with your reasoning and conclusion.';
83
86
  }
84
87
  }
88
+ const controller = new AbortController();
89
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
90
+ const isMultiAgent = model.includes('multi-agent');
91
+ const requestBody = {
92
+ model,
93
+ messages: validatedMessages,
94
+ temperature,
95
+ max_tokens: maxTokens,
96
+ stream: false
97
+ };
98
+ if (isMultiAgent && reasoningEffort) {
99
+ requestBody.reasoning = { effort: reasoningEffort };
100
+ }
85
101
  const response = await fetch(GROK_API_URL, {
86
102
  method: "POST",
87
103
  headers: {
88
104
  "Authorization": `Bearer ${GROK_API_KEY}`,
89
105
  "Content-Type": "application/json"
90
106
  },
91
- body: JSON.stringify({
92
- model,
93
- messages: validatedMessages,
94
- temperature,
95
- max_tokens: maxTokens,
96
- stream: false
97
- })
107
+ body: JSON.stringify(requestBody),
108
+ signal: controller.signal
98
109
  });
110
+ clearTimeout(timeoutId);
99
111
  if (!response.ok) {
100
112
  const error = await response.text();
101
113
  throw new Error(`Grok API error: ${response.statusText} - ${error}`);
@@ -105,7 +117,7 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
105
117
  // Handle Grok 4's reasoning tokens
106
118
  if (!content && data.usage?.completion_tokens_details?.reasoning_tokens > 0) {
107
119
  // If Grok 4 returns no visible content, retry with Grok 3 for visible output
108
- if (isGrok4 && forceVisibleOutput) {
120
+ if (isGrok4Plus && forceVisibleOutput) {
109
121
  console.error(`Grok 4 used ${data.usage.completion_tokens_details.reasoning_tokens} reasoning tokens with no output. Retrying with Grok 3...`);
110
122
  // Messages already validated - use same context for retry
111
123
  return callGrok(validatedMessages, GrokModel.GROK_3, temperature, maxTokens, false, validationContext);
@@ -115,6 +127,9 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
115
127
  return content || "No response from Grok";
116
128
  }
117
129
  catch (error) {
130
+ if (error instanceof Error && error.name === 'AbortError') {
131
+ return `[Grok timeout: ${model} exceeded ${isReasoning ? '120' : '60'}s limit]`;
132
+ }
118
133
  return `[Grok error: ${error instanceof Error ? error.message : String(error)}]`;
119
134
  }
120
135
  }
@@ -158,10 +173,10 @@ ${FORMAT_INSTRUCTION}`
158
173
  content: problem + fileContext
159
174
  }
160
175
  ];
161
- // Use GROK_4_1_FAST_REASONING by default (latest with enhanced reasoning!), GROK_4_HEAVY only if explicitly requested
162
- const model = useHeavy ? GrokModel.GROK_4_HEAVY : GrokModel.GROK_4_1_FAST_REASONING;
163
- const maxTokens = useHeavy ? 100000 : 16384; // 100k for heavy, 16k for normal reasoning
164
- log?.info(`Using Grok model: ${model} for deep reasoning (max tokens: ${maxTokens}, cost: ${useHeavy ? 'expensive $3/$15' : 'cheap $0.20/$0.50'})`);
176
+ // Use 4.20 flagship by default, multi-agent for heavy tasks
177
+ const model = useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_REASONING;
178
+ const maxTokens = useHeavy ? 100000 : 16384;
179
+ log?.info(`Using Grok model: ${model} for deep reasoning (max tokens: ${maxTokens})`);
165
180
  // Use heartbeat to prevent MCP timeout during long reasoning operations
166
181
  const reportFn = reportProgress ?? (async () => { });
167
182
  const result = await withHeartbeat(() => callGrok(messages, model, 0.7, maxTokens, true, 'llm-orchestration'), reportFn);
@@ -212,7 +227,7 @@ ${FORMAT_INSTRUCTION}`
212
227
  log?.info(`Using Grok 4.1 Fast Non-Reasoning (2M context, tool-calling optimized, $0.20/$0.50)`);
213
228
  // Use heartbeat to prevent MCP timeout
214
229
  const reportFn = reportProgress ?? (async () => { });
215
- const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_1_FAST, 0.2, 4000, true, 'code-analysis'), reportFn);
230
+ const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_NON_REASONING, 0.2, 4000, true, 'code-analysis'), reportFn);
216
231
  return stripFormatting(result);
217
232
  }
218
233
  };
@@ -264,7 +279,7 @@ ${FORMAT_INSTRUCTION}`
264
279
  log?.info(`Using Grok 4.1 Fast Non-Reasoning for debugging (tool-calling optimized, $0.20/$0.50)`);
265
280
  // Use heartbeat to prevent MCP timeout
266
281
  const reportFn = reportProgress ?? (async () => { });
267
- const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_1_FAST, 0.3, 3000, true, 'code-analysis'), reportFn);
282
+ const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_NON_REASONING, 0.3, 3000, true, 'code-analysis'), reportFn);
268
283
  return stripFormatting(result);
269
284
  }
270
285
  };
@@ -302,10 +317,10 @@ ${FORMAT_INSTRUCTION}`
302
317
  content: requirements + fileContext
303
318
  }
304
319
  ];
305
- log?.info(`Using Grok 4.1 Fast Reasoning for architecture (latest model, $0.20/$0.50)`);
320
+ log?.info(`Using Grok 4.20 multi-agent for architecture (16-agent swarm)`);
306
321
  // Use heartbeat to prevent MCP timeout
307
322
  const reportFn = reportProgress ?? (async () => { });
308
- const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_1_FAST_REASONING, 0.6, 4000, true, 'llm-orchestration'), reportFn);
323
+ const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_MULTI_AGENT, 0.6, 4000, true, 'llm-orchestration', 'high'), reportFn);
309
324
  return stripFormatting(result);
310
325
  }
311
326
  };
@@ -356,8 +371,8 @@ ${FORMAT_INSTRUCTION}`
356
371
  content: topic + fileContext
357
372
  }
358
373
  ];
359
- const model = forceHeavy ? GrokModel.GROK_4_HEAVY : GrokModel.GROK_4_1_FAST_REASONING;
360
- log?.info(`Brainstorming with Grok model: ${model} (Heavy: ${forceHeavy}, cost: ${forceHeavy ? 'expensive $3/$15' : 'cheap $0.20/$0.50 - latest 4.1'})`);
374
+ const model = forceHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_NON_REASONING;
375
+ log?.info(`Brainstorming with Grok model: ${model} (Heavy: ${forceHeavy})`);
361
376
  const reportFn = reportProgress ?? (async () => { });
362
377
  const result = await withHeartbeat(() => callGrok(messages, model, 0.95, 4000, true, 'llm-orchestration'), reportFn);
363
378
  return stripFormatting(result);
@@ -184,14 +184,20 @@ reasoningEffort = "low", requireConfirmation = false, skipValidation = false) {
184
184
  };
185
185
  }
186
186
  console.error(`🔍 TRACE: Using ${isGPT5 ? '/v1/responses' : '/v1/chat/completions'} endpoint for ${currentModel}`);
187
+ // GPT-5.4 with high reasoning effort can take 2+ minutes
188
+ const timeoutMs = (reasoningEffort === 'high' || reasoningEffort === 'xhigh') ? 180000 : 90000;
189
+ const controller = new AbortController();
190
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
187
191
  const response = await fetch(endpoint, {
188
192
  method: "POST",
189
193
  headers: {
190
194
  "Authorization": `Bearer ${OPENAI_API_KEY}`,
191
195
  "Content-Type": "application/json"
192
196
  },
193
- body: JSON.stringify(requestBody)
197
+ body: JSON.stringify(requestBody),
198
+ signal: controller.signal
194
199
  });
200
+ clearTimeout(timeoutId);
195
201
  if (!response.ok) {
196
202
  const error = await response.text();
197
203
  lastError = `${currentModel}: ${response.statusText} - ${error}`;
@@ -254,8 +260,15 @@ reasoningEffort = "low", requireConfirmation = false, skipValidation = false) {
254
260
  return stripFormatting(result);
255
261
  }
256
262
  catch (error) {
257
- lastError = `${currentModel}: ${error instanceof Error ? error.message : String(error)}`;
258
- console.error(`🔍 TRACE: ${currentModel} EXCEPTION - ${lastError}`);
263
+ // Handle abort/timeout specifically
264
+ if (error instanceof Error && error.name === 'AbortError') {
265
+ lastError = `${currentModel}: Timeout (reasoning_effort=${reasoningEffort})`;
266
+ console.error(`🔍 TRACE: ${currentModel} TIMEOUT - reasoning_effort=${reasoningEffort}`);
267
+ }
268
+ else {
269
+ lastError = `${currentModel}: ${error instanceof Error ? error.message : String(error)}`;
270
+ console.error(`🔍 TRACE: ${currentModel} EXCEPTION - ${lastError}`);
271
+ }
259
272
  continue; // Try next model
260
273
  }
261
274
  }
@@ -1136,11 +1136,13 @@ function generateProgressBar(current, total) {
1136
1136
  */
1137
1137
  function parsePlanSteps(plan) {
1138
1138
  const steps = [];
1139
- // Try numbered steps first (### Step 1: or 1. or Step 1:)
1139
+ // Try structured step/task headers (### Step 1:, ### Task T1:, 1., Step 1:)
1140
1140
  const stepPatterns = [
1141
1141
  /###\s*Step\s*\d+[:\s]+([^\n]+)([\s\S]*?)(?=###\s*Step|\n##[^#]|$)/gi,
1142
+ /###\s*Task\s*[^\n:]+:\s*([^\n]+)([\s\S]*?)(?=###\s*Task|\n##[^#]|$)/gi,
1142
1143
  /^\s*(\d+)\.\s*([^\n]+)([\s\S]*?)(?=^\s*\d+\.|$)/gm,
1143
1144
  /^Step\s*\d+[:\s]+([^\n]+)([\s\S]*?)(?=^Step\s*\d+|$)/gim,
1145
+ /^Task\s*[^\n:]+:\s*([^\n]+)([\s\S]*?)(?=^Task\s*\S+|$)/gim,
1144
1146
  ];
1145
1147
  for (const pattern of stepPatterns) {
1146
1148
  const matches = [...plan.matchAll(pattern)];
@@ -1253,6 +1255,10 @@ Evidence params (unblind the checkpoints):
1253
1255
  // Parse plan into steps
1254
1256
  const steps = parsePlanSteps(plan);
1255
1257
  const totalSteps = steps.length;
1258
+ if (completed.length > totalSteps) {
1259
+ lines.push(`⚠️ Plan parse mismatch: ${completed.length} steps completed but only ${totalSteps} parsed. Plan format may have degraded.`);
1260
+ lines.push("");
1261
+ }
1256
1262
  if (mode === "start") {
1257
1263
  // ═══════════════════════════════════════════════════════════════
1258
1264
  // START: Show parsed plan and devlog hint
@@ -131,7 +131,7 @@ Focus on recent, accurate information. Provide sources.${FORMAT_INSTRUCTION}`
131
131
  { role: "user", content: query }
132
132
  ];
133
133
  const result = await callGrokEnhanced(messages, {
134
- model: GrokModel.GROK_4_1_FAST_REASONING,
134
+ model: GrokModel.GROK_4_20_REASONING,
135
135
  enableLiveSearch: true,
136
136
  searchSources: 20,
137
137
  temperature: 0.3,
@@ -176,7 +176,7 @@ async function solveHandler(query) {
176
176
  { role: "system", content: `Search for solutions to this coding problem. Find relevant Stack Overflow, docs, or GitHub issues.${FORMAT_INSTRUCTION}` },
177
177
  { role: "user", content: query }
178
178
  ], {
179
- model: GrokModel.GROK_4_1_FAST_REASONING,
179
+ model: GrokModel.GROK_4_20_REASONING,
180
180
  enableLiveSearch: true,
181
181
  searchSources: 10,
182
182
  temperature: 0.3,
@@ -269,7 +269,7 @@ async function architectHandler(query) {
269
269
  { role: "system", content: `Search for architecture patterns, best practices, and real-world examples for this design decision.${FORMAT_INSTRUCTION}` },
270
270
  { role: "user", content: query }
271
271
  ], {
272
- model: GrokModel.GROK_4_1_FAST_REASONING,
272
+ model: GrokModel.GROK_4_20_REASONING,
273
273
  enableLiveSearch: true,
274
274
  searchSources: 15,
275
275
  temperature: 0.3,
@@ -452,8 +452,8 @@ export function stripMarkdown(md, options) {
452
452
  text = text
453
453
  // Markdown headers — strip # prefix (or bold if boldHeaders)
454
454
  .replace(/^#{1,6}\s+(.+)$/gm, boldHeaders ? '\x1b[1m$1\x1b[0m' : '$1')
455
- // Emoji section headers — e.g. "🧠 TYPE SAFETY ───" → rotating pastel bg, dark bold text
456
- .replace(/^(.{1,2})\s+([A-Z][A-Z\s&]+?)\s*─+$/gm, (_match, emoji, header) => {
455
+ // Emoji section headers — e.g. "🧠 TYPE SAFETY ───" or "🧠 Key Activities" → rotating pastel bg
456
+ .replace(/^(.{1,2})\s+([A-Z][\w\s&,()/-]{2,50}?)\s*─*$/gm, (_match, emoji, header) => {
457
457
  if (!boldHeaders)
458
458
  return `${emoji} ${header}`;
459
459
  const pastels = [146, 182, 152, 187, 116, 180]; // lavender, mauve, powder blue, sand, mint, peach