npm - byterover-cli - Versions diffs - 1.3.0 → 1.5.0 - Mend

byterover-cli 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

package/dist/infra/cipher/llm/openrouter-llm-service.js CHANGED Viewed

@@ -4,7 +4,10 @@ import { NoOpLogger } from '../../../core/interfaces/cipher/i-logger.js';
 import { getErrorMessage } from '../../../utils/error-helpers.js';
 import { ContextManager } from './context/context-manager.js';
 import { OpenRouterMessageFormatter } from './formatters/openrouter-formatter.js';
+import { OpenRouterContentGenerator } from './generators/openrouter-content-generator.js';
+import { createIdGenerator, StreamProcessor } from './stream-processor.js';
 import { OpenRouterTokenizer } from './tokenizers/openrouter-tokenizer.js';
+import { transformGenerateContentChunksToStreamEvents } from './transformers/openrouter-stream-transformer.js';
 /**
  * OpenRouter LLM Service.
  *
@@ -25,11 +28,14 @@ import { OpenRouterTokenizer } from './tokenizers/openrouter-tokenizer.js';
 export class OpenRouterLLMService {
     client;
     config;
+    contentGenerator;
     contextManager;
     formatter;
     logger;
     memoryManager;
     sessionEventBus;
+    sessionId;
+    streamProcessor;
     systemPromptManager;
     tokenizer;
     toolManager;
@@ -65,6 +71,8 @@ export class OpenRouterLLMService {
             timeout: config.timeout,
             verbose: config.verbose,
         };
+        // Store sessionId for streaming context
+        this.sessionId = sessionId;
         // Initialize OpenAI client with OpenRouter base URL
         this.client = new OpenAI({
             apiKey: this.config.apiKey,
@@ -78,6 +86,19 @@ export class OpenRouterLLMService {
         // Initialize formatter and tokenizer
         this.formatter = new OpenRouterMessageFormatter();
         this.tokenizer = new OpenRouterTokenizer();
+        // Initialize content generator for streaming support
+        this.contentGenerator = new OpenRouterContentGenerator({
+            apiKey: this.config.apiKey,
+            baseUrl: this.config.baseUrl,
+            httpReferer: this.config.httpReferer,
+            maxTokens: this.config.maxTokens,
+            model: this.config.model,
+            siteName: this.config.siteName,
+            temperature: this.config.temperature,
+            timeout: this.config.timeout,
+        });
+        // Initialize stream processor for handling streaming events
+        this.streamProcessor = new StreamProcessor();
         // Initialize context manager with optional history storage
         this.contextManager = new ContextManager({
             formatter: this.formatter,
@@ -101,18 +122,23 @@ export class OpenRouterLLMService {
      * @param options.signal - Optional abort signal for cancellation
      * @param options.imageData - Optional image data
      * @param options.fileData - Optional file data
-     * @param options.stream - Whether to stream response (not implemented yet)
+     * @param options.stream - Whether to stream response (emits llmservice:chunk events)
      * @param options.executionContext - Optional execution context (for JSON input mode, etc.)
      * @param options.taskId - Task ID for billing tracking
      * @returns Final assistant response
      */
     async completeTask(textInput, options) {
-        // Extract options with defaults
-        const { executionContext, fileData, imageData, signal } = options ?? {};
+        // Extract options with defaults - include taskId for concurrent task isolation
+        const { executionContext, fileData, imageData, signal, stream, taskId } = options ?? {};
         // Add user message to context
         await this.contextManager.addUserMessage(textInput, imageData, fileData);
         // Get filtered tools based on command type (e.g., only read-only tools for 'query')
         const toolSet = this.toolManager.getToolsForCommand(executionContext?.commandType);
+        // Route to streaming or non-streaming execution
+        if (stream) {
+            return this.completeTaskStreaming(toolSet, executionContext, signal, taskId);
+        }
+        // Non-streaming path: Build tools array for OpenAI format
         const tools = Object.entries(toolSet).map(([name, schema]) => ({
             function: {
                 description: schema.description ?? '',
@@ -130,14 +156,14 @@ export class OpenRouterLLMService {
             }
             try {
                 // eslint-disable-next-line no-await-in-loop -- Sequential iterations required for agentic loop
-                const result = await this.executeAgenticIteration(iterationCount, tools, executionContext);
+                const result = await this.executeAgenticIteration(iterationCount, tools, executionContext, taskId);
                 if (result !== null) {
                     return result;
                 }
                 iterationCount++;
             }
             catch (error) {
-                this.handleLLMError(error);
+                this.handleLLMError(error, taskId);
             }
         }
         // Max iterations exceeded
@@ -204,15 +230,65 @@ export class OpenRouterLLMService {
         }
         return lastMessage;
     }
+    /**
+     * Complete a task using streaming mode.
+     *
+     * Emits real-time llmservice:chunk events as tokens arrive.
+     * Follows the OpenCode pattern of delta-based streaming.
+     *
+     * @param toolSet - Available tools for the task
+     * @param executionContext - Optional execution context
+     * @param signal - Optional abort signal for cancellation
+     * @param taskId - Optional task ID for concurrent task isolation
+     * @returns Final accumulated response
+     */
+    async completeTaskStreaming(toolSet, executionContext, signal, taskId) {
+        let iterationCount = 0;
+        let finalResponse = '';
+        // Streaming agentic loop
+        while (iterationCount < this.config.maxIterations) {
+            // Check if aborted
+            if (signal?.aborted) {
+                throw new Error('Operation aborted');
+            }
+            try {
+                // eslint-disable-next-line no-await-in-loop -- Sequential iterations required for agentic loop
+                const result = await this.executeAgenticIterationStreaming(iterationCount, toolSet, executionContext, taskId);
+                // If no tool calls, we're done - emit final response
+                if (!result.hasToolCalls) {
+                    finalResponse = result.response;
+                    // Emit response event
+                    this.sessionEventBus.emit('llmservice:response', {
+                        content: finalResponse,
+                        model: this.config.model,
+                        provider: 'openrouter',
+                        taskId: taskId || undefined,
+                    });
+                    // Add assistant message to context
+                    // eslint-disable-next-line no-await-in-loop -- Must complete before returning
+                    await this.contextManager.addAssistantMessage(finalResponse);
+                    return finalResponse;
+                }
+                // Has tool calls - continue the loop
+                iterationCount++;
+            }
+            catch (error) {
+                this.handleLLMError(error, taskId);
+            }
+        }
+        // Max iterations exceeded
+        throw new LlmMaxIterationsError(this.config.maxIterations, 'openrouter', this.config.model);
+    }
     /**
      * Execute a single iteration of the agentic loop.
      *
      * @param iterationCount - Current iteration number
      * @param tools - Available tools for this iteration
      * @param executionContext - Optional execution context
+     * @param taskId - Optional task ID for concurrent task isolation
      * @returns Final response string if complete, null if more iterations needed
      */
-    async executeAgenticIteration(iterationCount, tools, executionContext) {
+    async executeAgenticIteration(iterationCount, tools, executionContext, taskId) {
         // Build system prompt using SystemPromptManager (before compression for correct token accounting)
         // Use filtered tool names based on command type (e.g., only read-only tools for 'query')
         const availableTools = this.toolManager.getToolNamesForCommand(executionContext?.commandType);
@@ -250,53 +326,162 @@ export class OpenRouterLLMService {
                 tokensUsed,
             });
         }
-        // Emit thinking event
-        this.sessionEventBus.emit('llmservice:thinking');
+        // Emit thinking event with taskId for concurrent task isolation
+        this.sessionEventBus.emit('llmservice:thinking', { taskId });
         // Call LLM and parse response
         const lastMessage = await this.callLLMAndParseResponse(tools, formattedMessages);
         // Check if there are tool calls
         if (!lastMessage.toolCalls || lastMessage.toolCalls.length === 0) {
-            return this.handleFinalResponse(lastMessage);
+            return this.handleFinalResponse(lastMessage, taskId);
         }
         // Has tool calls - handle them
-        await this.handleToolCalls(lastMessage);
+        await this.handleToolCalls(lastMessage, taskId);
         return null;
     }
+    /**
+     * Execute a single iteration of the agentic loop with streaming.
+     *
+     * This method uses the ContentGenerator's streaming API to provide
+     * real-time token-by-token output via the SessionEventBus.
+     *
+     * @param iterationCount - Current iteration number
+     * @param toolSet - Available tools for this iteration
+     * @param executionContext - Optional execution context
+     * @param taskId - Optional task ID for concurrent task isolation
+     * @returns Object with response text and whether tool calls were made
+     */
+    async executeAgenticIterationStreaming(iterationCount, toolSet, executionContext, taskId) {
+        // Build system prompt using SystemPromptManager
+        const availableTools = this.toolManager.getToolNamesForCommand(executionContext?.commandType);
+        const markersSet = this.toolManager.getAvailableMarkers();
+        const availableMarkers = {};
+        for (const marker of markersSet) {
+            availableMarkers[marker] = marker;
+        }
+        const systemPrompt = await this.systemPromptManager.build({
+            availableMarkers,
+            availableTools,
+            commandType: executionContext?.commandType,
+            conversationMetadata: executionContext?.conversationMetadata,
+            memoryManager: this.memoryManager,
+        });
+        // Get messages from context with compression
+        const { tokensUsed } = await this.contextManager.getFormattedMessagesWithCompression(systemPrompt);
+        if (this.config.verbose) {
+            this.logger.debug('Streaming iteration', {
+                iteration: `${iterationCount + 1}/${this.config.maxIterations}`,
+                maxInputTokens: this.config.maxInputTokens,
+                tokensUsed,
+            });
+        }
+        // Emit thinking event
+        this.sessionEventBus.emit('llmservice:thinking', { taskId });
+        // Get internal messages for content generator
+        const contents = this.contextManager.getMessages();
+        // Generate streaming response using ContentGenerator
+        const streamGenerator = this.contentGenerator.generateContentStream({
+            config: {
+                maxTokens: this.config.maxTokens,
+                temperature: this.config.temperature,
+            },
+            contents,
+            executionContext,
+            model: this.config.model,
+            systemPrompt,
+            taskId: taskId ?? `task-${Date.now()}`,
+            tools: toolSet,
+        });
+        // Transform chunks to StreamEvents and process
+        // Pass modelId for native reasoning extraction (OpenAI, Grok, Gemini)
+        const streamEvents = transformGenerateContentChunksToStreamEvents(streamGenerator, {
+            modelId: this.config.model,
+            stepIndex: iterationCount,
+        });
+        // Process stream and accumulate state
+        const generateId = createIdGenerator();
+        const processorState = await this.streamProcessor.process(streamEvents, {
+            eventBus: this.sessionEventBus,
+            generateId,
+            sessionId: this.sessionId,
+            taskId,
+        });
+        // Extract accumulated text and tool calls
+        const accumulatedText = processorState.textContent;
+        const toolParts = [...processorState.toolParts.values()];
+        const hasToolCalls = toolParts.length > 0;
+        // If there are tool calls, extract and execute them
+        if (hasToolCalls) {
+            // Convert tool parts to ToolCall format
+            const toolCalls = toolParts.map((part) => ({
+                function: {
+                    arguments: JSON.stringify(part.state.status === 'pending' ? part.state.input : {}),
+                    name: part.toolName,
+                },
+                id: part.callId,
+                type: 'function',
+            }));
+            // Add assistant message with tool calls to context
+            await this.contextManager.addAssistantMessage(accumulatedText, toolCalls);
+            // Execute tool calls in parallel (matching internal service behavior)
+            // This prevents long-running tools (e.g., subagent Tasks) from blocking others
+            await Promise.allSettled(toolCalls.map((toolCall) => this.executeToolCall(toolCall, taskId)));
+        }
+        return {
+            hasToolCalls,
+            response: accumulatedText,
+        };
+    }
     /**
      * Execute a single tool call.
      *
      * @param toolCall - Tool call to execute
+     * @param taskId - Optional task ID for concurrent task isolation
      */
-    async executeToolCall(toolCall) {
+    async executeToolCall(toolCall, taskId) {
         try {
             const toolName = toolCall.function.name;
             const toolArgs = JSON.parse(toolCall.function.arguments);
-            // Emit tool call event
+            // Emit tool call event with taskId for concurrent task isolation
             this.sessionEventBus.emit('llmservice:toolCall', {
                 args: toolArgs,
                 callId: toolCall.id,
+                taskId: taskId || undefined,
                 toolName,
             });
             // Execute tool via ToolManager (handles approval, routing, etc.)
-            const result = await this.toolManager.executeTool(toolName, toolArgs);
-            // Emit tool result event (success)
+            // Pass sessionId and taskId context for sub-agent event routing
+            const result = await this.toolManager.executeTool(toolName, toolArgs, this.sessionId, {
+                sessionId: this.sessionId,
+                taskId,
+            });
+            // Extract content from ToolExecutionResult - the LLM needs the content string,
+            // not the full result object (which would be JSON-stringified and confuse the model)
+            const resultContent = result.content;
+            const isSuccess = result.success;
+            // Emit tool result event with taskId
             this.sessionEventBus.emit('llmservice:toolResult', {
                 callId: toolCall.id,
-                result,
-                success: true,
+                ...(isSuccess ? { result: resultContent } : { error: result.errorMessage ?? String(resultContent) }),
+                errorType: result.errorType,
+                success: isSuccess,
+                taskId: taskId || undefined,
                 toolName,
             });
             // Add tool result to context
-            await this.contextManager.addToolResult(toolCall.id, toolName, result, { success: true });
+            await this.contextManager.addToolResult(toolCall.id, toolName, resultContent, {
+                errorType: result.errorType,
+                success: isSuccess,
+            });
         }
         catch (error) {
             // Add error result to context
             const errorMessage = error instanceof Error ? error.message : String(error);
-            // Emit tool result event (error)
+            // Emit tool result event (error) with taskId
             this.sessionEventBus.emit('llmservice:toolResult', {
                 callId: toolCall.id,
                 error: errorMessage,
                 success: false,
+                taskId: taskId || undefined,
                 toolName: toolCall.function.name,
             });
             await this.contextManager.addToolResult(toolCall.id, toolCall.function.name, `Error: ${errorMessage}`, {
@@ -326,15 +511,17 @@ export class OpenRouterLLMService {
      * Handle final response when there are no tool calls.
      *
      * @param lastMessage - Last message from LLM
+     * @param taskId - Optional task ID for concurrent task isolation
      * @returns Final response content
      */
-    async handleFinalResponse(lastMessage) {
+    async handleFinalResponse(lastMessage, taskId) {
         const content = this.extractTextContent(lastMessage);
-        // Emit response event
+        // Emit response event with taskId for concurrent task isolation
         this.sessionEventBus.emit('llmservice:response', {
             content,
             model: this.config.model,
             provider: 'openrouter',
+            taskId: taskId || undefined,
         });
         // Add assistant message to context
         await this.contextManager.addAssistantMessage(content);
@@ -344,12 +531,14 @@ export class OpenRouterLLMService {
      * Handle LLM errors and re-throw or wrap appropriately.
      *
      * @param error - Error to handle
+     * @param taskId - Optional task ID for concurrent task isolation
      */
-    handleLLMError(error) {
-        // Emit error event
+    handleLLMError(error, taskId) {
+        // Emit error event with taskId for concurrent task isolation
         const errorMessage = error instanceof Error ? error.message : String(error);
         this.sessionEventBus.emit('llmservice:error', {
             error: errorMessage,
+            taskId: taskId || undefined,
         });
         // Re-throw LLM errors as-is
         if (error instanceof LlmResponseParsingError ||
@@ -367,18 +556,17 @@ export class OpenRouterLLMService {
      * Handle tool calls from LLM response.
      *
      * @param lastMessage - Last message containing tool calls
+     * @param taskId - Optional task ID for concurrent task isolation
      */
-    async handleToolCalls(lastMessage) {
+    async handleToolCalls(lastMessage, taskId) {
         if (!lastMessage.toolCalls || lastMessage.toolCalls.length === 0) {
             return;
         }
         // Has tool calls - add assistant message with tool calls
         const assistantContent = this.extractTextContent(lastMessage);
         await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls);
-        // Execute tool calls via ToolManager
-        for (const toolCall of lastMessage.toolCalls) {
-            // eslint-disable-next-line no-await-in-loop -- Sequential tool execution required
-            await this.executeToolCall(toolCall);
-        }
+        // Execute tool calls in parallel (matching internal service behavior)
+        // This prevents long-running tools (e.g., subagent Tasks) from blocking others
+        await Promise.allSettled(lastMessage.toolCalls.map((toolCall) => this.executeToolCall(toolCall, taskId)));
     }
 }

package/dist/infra/cipher/llm/stream-processor.d.ts CHANGED Viewed

@@ -11,10 +11,11 @@
  * - Part creation and updates with unique IDs
  */
 import type { StepTokenUsage } from '../../../core/domain/cipher/agent-events/types.js';
-import type { CompactionPart, PatchPart, RetryPart, SnapshotPart, StepFinishPart, StepStartPart, TextPart, ToolPart } from '../../../core/interfaces/cipher/message-types.js';
+import type { CompactionPart, PatchPart, ReasoningPart, RetryPart, SnapshotPart, StepFinishPart, StepStartPart, TextPart, ToolPart } from '../../../core/interfaces/cipher/message-types.js';
 import type { SessionEventBus } from '../events/event-emitter.js';
 /**
  * Stream event types that the processor can handle.
+ * Following OpenCode's pattern with reasoning-start/delta/end lifecycle.
  */
 export type StreamEvent = {
     callId: string;
@@ -41,12 +42,25 @@ export type StreamEvent = {
     stepIndex: number;
     tokens: StepTokenUsage;
     type: 'step-finish';
+} | {
+    delta: string;
+    id: string;
+    providerMetadata?: Record<string, unknown>;
+    type: 'reasoning-delta-v2';
 } | {
     delta: string;
     type: 'reasoning-delta';
 } | {
     delta: string;
     type: 'text-delta';
+} | {
+    id: string;
+    providerMetadata?: Record<string, unknown>;
+    type: 'reasoning-end';
+} | {
+    id: string;
+    providerMetadata?: Record<string, unknown>;
+    type: 'reasoning-start';
 } | {
     stepIndex: number;
     type: 'step-start';
@@ -63,6 +77,8 @@ export interface ProcessorContext {
     generateId: () => string;
     /** Session ID for event context */
     sessionId: string;
+    /** Task ID for event routing (required for chunk events to reach TUI) */
+    taskId?: string;
 }
 /**
  * Accumulated state during stream processing.
@@ -71,7 +87,11 @@ export interface ProcessorState {
     /** Current step index */
     currentStepIndex: number;
     /** Parts created during processing */
-    parts: Array<CompactionPart | PatchPart | RetryPart | SnapshotPart | StepFinishPart | StepStartPart | TextPart | ToolPart>;
+    parts: Array<CompactionPart | PatchPart | ReasoningPart | RetryPart | SnapshotPart | StepFinishPart | StepStartPart | TextPart | ToolPart>;
+    /** Accumulated reasoning content (for legacy reasoning-delta events) */
+    reasoningContent: string;
+    /** Reasoning parts indexed by ID (for v2 reasoning events) */
+    reasoningParts: Map<string, ReasoningPart>;
     /** Accumulated text content */
     textContent: string;
     /** Tool parts indexed by call ID */

package/dist/infra/cipher/llm/stream-processor.js CHANGED Viewed

@@ -43,12 +43,24 @@ export class StreamProcessor {
         const state = {
             currentStepIndex: 0,
             parts: [],
+            reasoningContent: '',
+            reasoningParts: new Map(),
             textContent: '',
             toolParts: new Map(),
         };
+        let receivedFinish = false;
         for await (const event of stream) {
+            if (event.type === 'finish') {
+                receivedFinish = true;
+            }
             await this.handleEvent(event, state, context);
         }
+        // Safety net: if the stream ended without a 'finish' event (e.g., OpenRouter
+        // stream closed without setting finish_reason), finalize any pending text part
+        // so the TUI receives isComplete: true and stops showing a loading spinner.
+        if (!receivedFinish) {
+            this.finalizeTextPart(state, context);
+        }
         return state;
     }
     /**
@@ -65,6 +77,7 @@ export class StreamProcessor {
             context.eventBus.emit('llmservice:chunk', {
                 content: '',
                 isComplete: true,
+                taskId: context.taskId,
                 type: 'text',
             });
         }
@@ -80,13 +93,65 @@ export class StreamProcessor {
                 break;
             }
             case 'reasoning-delta': {
-                // Emit reasoning chunk for UI streaming
+                // Legacy: Emit reasoning chunk for UI streaming (simple delta without ID tracking)
+                state.reasoningContent += event.delta;
                 context.eventBus.emit('llmservice:chunk', {
                     content: event.delta,
+                    taskId: context.taskId,
                     type: 'reasoning',
                 });
                 break;
             }
+            case 'reasoning-delta-v2': {
+                // V2: Emit reasoning chunk with ID tracking (following OpenCode pattern)
+                const reasoningPart = state.reasoningParts.get(event.id);
+                if (reasoningPart) {
+                    reasoningPart.text += event.delta;
+                    if (event.providerMetadata) {
+                        reasoningPart.providerMetadata = event.providerMetadata;
+                    }
+                    context.eventBus.emit('llmservice:chunk', {
+                        content: event.delta,
+                        taskId: context.taskId,
+                        type: 'reasoning',
+                    });
+                }
+                break;
+            }
+            case 'reasoning-end': {
+                // Finalize reasoning part with end timestamp
+                const reasoningPart = state.reasoningParts.get(event.id);
+                if (reasoningPart) {
+                    reasoningPart.text = reasoningPart.text.trimEnd();
+                    reasoningPart.time.end = Date.now();
+                    if (event.providerMetadata) {
+                        reasoningPart.providerMetadata = event.providerMetadata;
+                    }
+                    // Emit completion signal
+                    context.eventBus.emit('llmservice:chunk', {
+                        content: '',
+                        isComplete: true,
+                        taskId: context.taskId,
+                        type: 'reasoning',
+                    });
+                }
+                break;
+            }
+            case 'reasoning-start': {
+                // Create new reasoning part and track it
+                const reasoningPart = {
+                    id: event.id,
+                    providerMetadata: event.providerMetadata,
+                    text: '',
+                    time: {
+                        start: Date.now(),
+                    },
+                    type: 'reasoning',
+                };
+                state.reasoningParts.set(event.id, reasoningPart);
+                state.parts.push(reasoningPart);
+                break;
+            }
             case 'step-finish': {
                 this.handleStepFinish({
                     cost: event.cost,
@@ -113,7 +178,7 @@ export class StreamProcessor {
                 break;
             }
             case 'tool-call-input': {
-                this.handleToolCallInput(event.callId, event.input, state);
+                this.handleToolCallInput(event.callId, event.input, state, context);
                 break;
             }
             case 'tool-call-running': {
@@ -173,6 +238,7 @@ export class StreamProcessor {
         // Emit chunk with delta for real-time UI update
         context.eventBus.emit('llmservice:chunk', {
             content: delta,
+            taskId: context.taskId,
             type: 'text',
         });
     }
@@ -215,13 +281,20 @@ export class StreamProcessor {
     /**
      * Handle tool call input received.
      */
-    handleToolCallInput(callId, input, state) {
+    handleToolCallInput(callId, input, state, context) {
         const toolPart = state.toolParts.get(callId);
         if (toolPart && toolPart.state.status === 'pending') {
             toolPart.state = {
                 input,
                 status: 'pending',
             };
+            // Emit updated tool call event with args so TUI can display them
+            context.eventBus.emit('llmservice:toolCall', {
+                args: input,
+                callId,
+                taskId: context.taskId,
+                toolName: toolPart.toolName,
+            });
         }
     }
     /**
@@ -255,10 +328,11 @@ export class StreamProcessor {
         };
         state.toolParts.set(callId, toolPart);
         state.parts.push(toolPart);
-        // Emit tool call event
+        // Emit tool call event with taskId for TUI routing
         context.eventBus.emit('llmservice:toolCall', {
             args: {},
             callId,
+            taskId: context.taskId,
             toolName,
         });
     }

package/dist/infra/cipher/llm/thought-parser.d.ts CHANGED Viewed

@@ -70,7 +70,7 @@ export declare function supportsMultimodalFunctionResponse(model: string): boole
 /**
  * Default thinking mode token budget
  */
-export declare const DEFAULT_THINKING_BUDGET = 512;
+export declare const DEFAULT_THINKING_BUDGET = 8192;
 /**
  * Synthetic thought signature used for Preview models
  */

package/dist/infra/cipher/llm/thought-parser.js CHANGED Viewed

@@ -44,7 +44,7 @@ export function supportsMultimodalFunctionResponse(model) {
 /**
  * Default thinking mode token budget
  */
-export const DEFAULT_THINKING_BUDGET = 512;
+export const DEFAULT_THINKING_BUDGET = 8192;
 /**
  * Synthetic thought signature used for Preview models
  */
@@ -159,20 +159,20 @@ export const ThinkingConfigManager = {
         // Gemini 3.x models
         if (lowerModel.startsWith('gemini-3') || lowerModel.includes('gemini-3')) {
             return {
-                includeThoughts: false,
-                thinkingLevel: ThinkingLevel.HIGH,
+                includeThoughts: true,
+                thinkingLevel: ThinkingLevel.LOW,
             };
         }
         // Gemini 2.x models
         if (lowerModel.startsWith('gemini-2') || lowerModel.includes('gemini-2')) {
             return {
-                includeThoughts: false,
+                includeThoughts: true,
                 thinkingBudget: DEFAULT_THINKING_BUDGET,
             };
         }
         // Other Gemini models - use budget as default
         return {
-            includeThoughts: false,
+            includeThoughts: true,
             thinkingBudget: DEFAULT_THINKING_BUDGET,
         };
     },