npm - byterover-cli - Versions diffs - 1.4.0 → 1.6.0 - Mend

byterover-cli 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

package/dist/infra/cipher/http/internal-llm-http-service.js CHANGED Viewed

@@ -1,4 +1,6 @@
+import { StreamChunkType } from '../../../core/interfaces/cipher/i-content-generator.js';
 import { AuthenticatedHttpClient } from '../../http/authenticated-http-client.js';
+import { ThoughtParser } from '../llm/thought-parser.js';
 /**
  * ByteRover HTTP LLM API client.
  *
@@ -29,7 +31,6 @@ export class ByteRoverLlmHttpService {
      */
     constructor(config) {
         this.config = {
-            accessToken: config.accessToken,
             apiBaseUrl: config.apiBaseUrl,
             projectId: config.projectId ?? 'byterover',
             region: config.region ?? 'us-east1',
@@ -72,6 +73,30 @@ export class ByteRoverLlmHttpService {
         };
         return this.callHttpGenerate(request);
     }
+    /**
+     * Call ByteRover REST LLM service to generate content with streaming.
+     *
+     * Currently falls back to non-streaming endpoint since /api/llm/generate/stream
+     * doesn't exist on the backend yet. Extracts thinking/reasoning from the complete
+     * response and yields them as separate chunks.
+     *
+     * When backend streaming is available, this will use SSE for true streaming.
+     *
+     * @param contents - For Gemini: Content[]. For Claude: MessageCreateParamsNonStreaming (complete body)
+     * @param config - For Gemini: GenerateContentConfig. For Claude: RequestOptions (optional HTTP options)
+     * @param model - Model to use (detects provider from model name)
+     * @param executionMetadata - Optional execution metadata (mode, executionContext)
+     * @yields GenerateContentChunk objects as they are generated
+     */
+    async *generateContentStream(contents, config, model, executionMetadata) {
+        // Fall back to non-streaming endpoint and simulate streaming
+        // by extracting thinking from the complete response
+        const response = await this.generateContent(contents, config, model, executionMetadata);
+        // Extract and yield thinking/reasoning chunks first
+        yield* this.extractThinkingFromResponse(response);
+        // Then yield the final content
+        yield* this.extractContentFromResponse(response);
+    }
     /**
      * Call the ByteRover REST Generate endpoint.
      *
@@ -83,11 +108,11 @@ export class ByteRoverLlmHttpService {
      */
     async callHttpGenerate(request) {
         const url = `${this.config.apiBaseUrl}/api/llm/generate`;
-        const httpClient = new AuthenticatedHttpClient(this.config.accessToken, this.config.sessionKey);
-        const response = await httpClient.post(url, request, {
+        const httpClient = new AuthenticatedHttpClient(this.config.sessionKey);
+        const httpResponse = await httpClient.post(url, request, {
             timeout: this.config.timeout,
         });
-        return response.data;
+        return httpResponse.data;
     }
     /**
      * Detect LLM provider from model identifier.
@@ -113,4 +138,128 @@ export class ByteRoverLlmHttpService {
     detectRegionFromModel(model) {
         return model.toLowerCase().startsWith('claude') ? 'us-east5' : 'global';
     }
+    /**
+     * Extract content chunks from a complete response.
+     *
+     * Looks for text parts (excluding thinking) and function calls,
+     * yields them as final chunks.
+     *
+     * @param response - Complete GenerateContentResponse
+     * @yields GenerateContentChunk for content and tool calls
+     */
+    *extractContentFromResponse(response) {
+        const { candidates } = response;
+        if (!candidates || candidates.length === 0) {
+            yield {
+                content: '',
+                finishReason: 'stop',
+                isComplete: true,
+            };
+            return;
+        }
+        const candidate = candidates[0];
+        const parts = candidate?.content?.parts;
+        const finishReason = this.mapFinishReason(candidate?.finishReason ?? 'STOP');
+        if (!parts || parts.length === 0) {
+            yield {
+                content: '',
+                finishReason,
+                isComplete: true,
+            };
+            return;
+        }
+        // Collect text content (excluding thinking parts)
+        const textParts = [];
+        const functionCalls = [];
+        for (const part of parts) {
+            const partRecord = part;
+            // Skip thinking parts
+            if (partRecord.thought === true)
+                continue;
+            // Collect text
+            if (partRecord.text && typeof partRecord.text === 'string') {
+                textParts.push(partRecord.text);
+            }
+            // Collect function calls
+            if (partRecord.functionCall) {
+                functionCalls.push(partRecord.functionCall);
+            }
+        }
+        // Yield final content chunk
+        yield {
+            content: textParts.join('').trimEnd(),
+            finishReason,
+            isComplete: true,
+            toolCalls: functionCalls.length > 0
+                ? functionCalls.map((fc, index) => ({
+                    function: {
+                        arguments: JSON.stringify(fc.args ?? {}),
+                        name: fc.name ?? '',
+                    },
+                    id: `call_${Date.now()}_${index}`,
+                    type: 'function',
+                }))
+                : undefined,
+        };
+    }
+    /**
+     * Extract thinking/reasoning chunks from a complete response.
+     *
+     * Looks for parts with `thought: true` and yields them as THINKING chunks.
+     *
+     * @param response - Complete GenerateContentResponse
+     * @yields GenerateContentChunk for each thinking part
+     */
+    *extractThinkingFromResponse(response) {
+        const { candidates } = response;
+        if (!candidates || candidates.length === 0)
+            return;
+        const parts = candidates[0]?.content?.parts;
+        if (!parts)
+            return;
+        let thinkingSubject;
+        for (const part of parts) {
+            const partRecord = part;
+            // Check for thinking part (thought: true)
+            if (partRecord.thought === true && partRecord.text && typeof partRecord.text === 'string') {
+                const delta = partRecord.text;
+                // Extract subject from **Subject** markdown if not already found
+                if (!thinkingSubject && delta) {
+                    const parsed = ThoughtParser.parse(delta);
+                    if (parsed.subject) {
+                        thinkingSubject = parsed.subject;
+                    }
+                }
+                yield {
+                    isComplete: false,
+                    providerMetadata: {
+                        subject: thinkingSubject,
+                    },
+                    reasoning: delta.trimEnd(),
+                    type: StreamChunkType.THINKING,
+                };
+            }
+        }
+    }
+    /**
+     * Map provider finish reason to standard format.
+     */
+    mapFinishReason(reason) {
+        switch (reason.toUpperCase()) {
+            case 'FUNCTION_CALL':
+            case 'TOOL_CALLS': {
+                return 'tool_calls';
+            }
+            case 'LENGTH':
+            case 'MAX_TOKENS': {
+                return 'max_tokens';
+            }
+            case 'STOP': {
+                return 'stop';
+            }
+            default: {
+                return 'stop';
+            }
+        }
+    }
 }

package/dist/infra/cipher/llm/formatters/gemini-formatter.js CHANGED Viewed

@@ -72,8 +72,14 @@ export class GeminiMessageFormatter {
         }
         const textParts = [];
         const functionCallsWithSignatures = [];
-        // Extract text and function calls from response parts
+        let thoughtText;
+        // Extract text, thoughts, and function calls from response parts
         for (const part of candidate.content.parts) {
+            // Check for thought parts first (Gemini 2.5+ with includeThoughts: true)
+            if ('thought' in part && part.thought === true && 'text' in part && part.text) {
+                thoughtText = part.text;
+                continue; // Don't add thought to textParts
+            }
             if ('text' in part && part.text) {
                 textParts.push(part.text);
             }
@@ -102,6 +108,7 @@ export class GeminiMessageFormatter {
             {
                 content: textParts.join('') || null,
                 role: 'assistant',
+                thought: thoughtText,
                 toolCalls,
             },
         ];

package/dist/infra/cipher/llm/generators/byterover-content-generator.d.ts CHANGED Viewed

@@ -60,9 +60,8 @@ export declare class ByteRoverContentGenerator implements IContentGenerator {
     /**
      * Generate content with streaming.
      *
-     * Note: The current gRPC service collects all chunks before returning.
-     * This implementation yields the complete response as a single chunk.
-     * True streaming can be implemented when the gRPC service exposes the stream.
+     * Uses the HTTP service's streaming endpoint to yield chunks as they arrive.
+     * Handles both regular content and thinking/reasoning parts from Gemini models.
      *
      * @param request - Generation request
      * @yields Content chunks as they are generated

package/dist/infra/cipher/llm/generators/byterover-content-generator.js CHANGED Viewed

@@ -117,24 +117,33 @@ export class ByteRoverContentGenerator {
     /**
      * Generate content with streaming.
      *
-     * Note: The current gRPC service collects all chunks before returning.
-     * This implementation yields the complete response as a single chunk.
-     * True streaming can be implemented when the gRPC service exposes the stream.
+     * Uses the HTTP service's streaming endpoint to yield chunks as they arrive.
+     * Handles both regular content and thinking/reasoning parts from Gemini models.
      *
      * @param request - Generation request
      * @yields Content chunks as they are generated
      * @returns Async generator yielding content chunks
      */
     async *generateContentStream(request) {
-        // For now, use non-streaming and yield complete response
-        // True streaming can be added when gRPC service exposes the stream
-        const response = await this.generateContent(request);
-        yield {
-            content: response.content,
-            finishReason: response.finishReason,
-            isComplete: true,
-            toolCalls: response.toolCalls,
+        // Format messages for provider
+        let formattedMessages = this.formatter.format(request.contents);
+        // For Gemini 3+ models, ensure function calls in the active loop have thought signatures
+        if (this.providerType === 'gemini') {
+            formattedMessages = ensureActiveLoopHasThoughtSignatures(formattedMessages, this.config.model);
+        }
+        // Build generation config
+        const genConfig = this.buildGenerationConfig(request.tools ?? {}, request.systemPrompt ?? '', formattedMessages);
+        // Build execution metadata from request
+        const executionMetadata = {
+            sessionId: request.taskId,
+            taskId: request.taskId,
+            ...(request.executionContext && { executionContext: request.executionContext }),
         };
+        // Determine contents and config based on provider
+        const contents = this.providerType === 'claude' ? genConfig : formattedMessages;
+        const config = this.providerType === 'claude' ? {} : genConfig;
+        // Stream from HTTP service
+        yield* this.httpService.generateContentStream(contents, config, this.config.model, executionMetadata);
     }
     /**
      * Build Claude-specific generation configuration.

package/dist/infra/cipher/llm/generators/openrouter-content-generator.d.ts CHANGED Viewed

@@ -64,6 +64,7 @@ export declare class OpenRouterContentGenerator implements IContentGenerator {
      * Generate content with streaming.
      *
      * Uses OpenAI SDK's native streaming support for real-time content generation.
+     * Includes rawChunk for native reasoning extraction by the stream transformer.
      *
      * @param request - Generation request
      * @yields Content chunks as they are generated

package/dist/infra/cipher/llm/generators/openrouter-content-generator.js CHANGED Viewed

@@ -121,6 +121,7 @@ export class OpenRouterContentGenerator {
      * Generate content with streaming.
      *
      * Uses OpenAI SDK's native streaming support for real-time content generation.
+     * Includes rawChunk for native reasoning extraction by the stream transformer.
      *
      * @param request - Generation request
      * @yields Content chunks as they are generated
@@ -161,10 +162,35 @@ export class OpenRouterContentGenerator {
             const isComplete = choice.finish_reason !== null;
             const finishReason = this.determineFinishReason(choice.finish_reason, isComplete);
             const toolCalls = this.buildToolCallsArray(accumulatedToolCalls, isComplete);
+            // Extract native reasoning fields if present (for OpenAI o1/o3, Grok, Gemini)
+            // Different providers return reasoning differently:
+            // - OpenAI: delta.reasoning
+            // - Grok: delta.reasoning_content or delta.reasoning_details
+            // - Gemini via OpenRouter: delta.reasoning_details array with {type: 'reasoning.text', text: '...'}
+            // The rawChunk allows the stream transformer to extract reasoning using model-specific logic
+            const deltaAny = delta;
+            // Check for standard reasoning fields first
+            let reasoning = (deltaAny.reasoning ?? deltaAny.reasoning_content ?? deltaAny.thoughts);
+            // Check for OpenRouter's reasoning_details array format (used for Gemini and some other models)
+            if (!reasoning && deltaAny.reasoning_details) {
+                const details = deltaAny.reasoning_details;
+                if (Array.isArray(details)) {
+                    const reasoningText = details
+                        .filter((d) => d.type === 'reasoning.text' && d.text)
+                        .map((d) => d.text)
+                        .join('');
+                    if (reasoningText) {
+                        reasoning = reasoningText;
+                    }
+                }
+            }
             yield {
                 content: delta.content ?? undefined,
                 finishReason,
                 isComplete,
+                rawChunk: chunk,
+                reasoning,
+                reasoningId: reasoning ? chunk.id : undefined,
                 toolCalls,
             };
         }

package/dist/infra/cipher/llm/internal-llm-service.d.ts CHANGED Viewed

@@ -218,6 +218,18 @@ export declare class ByteRoverLLMService implements ILLMService {
      * @returns Parsed internal message from response
      */
     private callLLMAndParseResponse;
+    /**
+     * Streaming variant of callLLMAndParseResponse that:
+     * - Uses generateContentStream for real-time chunk delivery
+     * - Accumulates content and tool calls from chunks
+     * - Emits llmservice:chunk events for thinking/reasoning chunks
+     * - Returns complete InternalMessage when stream ends
+     *
+     * @param request - Generation request
+     * @param taskId - Task ID for event emission
+     * @returns Parsed internal message from accumulated stream
+     */
+    private callLLMAndParseResponseStreaming;
     /**
      * Check for context overflow and trigger compaction if needed.
      * Called after each assistant response and after tool execution batches.
@@ -262,6 +274,7 @@ export declare class ByteRoverLLMService implements ILLMService {
      * @param options.fileData - Optional file data (only used on first iteration)
      * @param options.imageData - Optional image data (only used on first iteration)
      * @param options.iterationCount - Current iteration number
+     * @param options.stream - Whether to stream response and emit thinking chunks
      * @param options.taskId - Task ID from usecase for billing tracking
      * @param options.textInput - User input text (only used on first iteration)
      * @param options.tools - Available tools for this iteration

package/dist/infra/cipher/llm/internal-llm-service.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { AgentStateMachine } from '../../../core/domain/cipher/agent/agent-state
 import { AgentState, TerminationReason } from '../../../core/domain/cipher/agent/agent-state.js';
 import { LlmGenerationError, LlmMaxIterationsError, LlmResponseParsingError, } from '../../../core/domain/cipher/errors/llm-error.js';
 import { getEffectiveMaxInputTokens, getMaxInputTokensForModel, getProviderFromModel, isValidProviderModel, safeParseLLMConfig, } from '../../../core/domain/cipher/llm/index.js';
+import { StreamChunkType } from '../../../core/interfaces/cipher/i-content-generator.js';
 import { NoOpLogger } from '../../../core/interfaces/cipher/i-logger.js';
 import { getErrorMessage } from '../../../utils/error-helpers.js';
 import { EnvironmentContextBuilder } from '../system-prompt/environment-context-builder.js';
@@ -148,7 +149,7 @@ export class ByteRoverLLMService {
      */
     async completeTask(textInput, options) {
         // Extract options with defaults
-        const { executionContext, fileData, imageData, signal, taskId } = options ?? {};
+        const { executionContext, fileData, imageData, signal, stream, taskId } = options ?? {};
         // Get filtered tools based on command type (e.g., only read-only tools for 'query')
         const toolSet = this.toolManager.getToolsForCommand(options?.executionContext?.commandType);
         // Create state machine with configured limits
@@ -174,6 +175,7 @@ export class ByteRoverLLMService {
                     fileData,
                     imageData,
                     iterationCount: stateMachine.getContext().turnCount,
+                    stream,
                     taskId,
                     textInput,
                     tools: toolSet,
@@ -340,6 +342,69 @@ export class ByteRoverLLMService {
             throw new LlmGenerationError(error instanceof Error ? error.message : String(error), 'byterover', this.config.model);
         }
     }
+    /**
+     * Streaming variant of callLLMAndParseResponse that:
+     * - Uses generateContentStream for real-time chunk delivery
+     * - Accumulates content and tool calls from chunks
+     * - Emits llmservice:chunk events for thinking/reasoning chunks
+     * - Returns complete InternalMessage when stream ends
+     *
+     * @param request - Generation request
+     * @param taskId - Task ID for event emission
+     * @returns Parsed internal message from accumulated stream
+     */
+    async callLLMAndParseResponseStreaming(request, taskId) {
+        try {
+            let accumulatedContent = '';
+            let accumulatedToolCalls = [];
+            // Stream chunks and accumulate content
+            for await (const chunk of this.generator.generateContentStream(request)) {
+                // Emit thinking/reasoning chunks as events for TUI display
+                if (chunk.type === StreamChunkType.THINKING && chunk.reasoning) {
+                    this.sessionEventBus.emit('llmservice:chunk', {
+                        content: chunk.reasoning,
+                        isComplete: chunk.isComplete,
+                        taskId,
+                        type: 'reasoning', // Convert THINKING to 'reasoning' for TUI compatibility
+                    });
+                }
+                // Accumulate text content (skip thinking chunks from accumulated content)
+                if (chunk.content && chunk.type !== StreamChunkType.THINKING) {
+                    accumulatedContent += chunk.content;
+                    // Emit text chunks for TUI display
+                    this.sessionEventBus.emit('llmservice:chunk', {
+                        content: chunk.content,
+                        isComplete: chunk.isComplete,
+                        taskId,
+                        type: 'text',
+                    });
+                }
+                // Accumulate tool calls
+                if (chunk.toolCalls) {
+                    accumulatedToolCalls = chunk.toolCalls;
+                }
+            }
+            // Convert accumulated response to InternalMessage format
+            const message = {
+                content: accumulatedContent || null,
+                role: 'assistant',
+                toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
+            };
+            // Validate the message has content or tool calls
+            if (!message.content && (!message.toolCalls || message.toolCalls.length === 0)) {
+                throw new LlmResponseParsingError('Response has neither content nor tool calls', 'byterover', this.config.model);
+            }
+            return message;
+        }
+        catch (error) {
+            // Re-throw LLM errors as-is
+            if (error instanceof LlmResponseParsingError || error instanceof LlmGenerationError) {
+                throw error;
+            }
+            // Wrap other errors
+            throw new LlmGenerationError(error instanceof Error ? error.message : String(error), 'byterover', this.config.model);
+        }
+    }
     /**
      * Check for context overflow and trigger compaction if needed.
      * Called after each assistant response and after tool execution batches.
@@ -484,13 +549,14 @@ export class ByteRoverLLMService {
      * @param options.fileData - Optional file data (only used on first iteration)
      * @param options.imageData - Optional image data (only used on first iteration)
      * @param options.iterationCount - Current iteration number
+     * @param options.stream - Whether to stream response and emit thinking chunks
      * @param options.taskId - Task ID from usecase for billing tracking
      * @param options.textInput - User input text (only used on first iteration)
      * @param options.tools - Available tools for this iteration
      * @returns Final response string if complete, null if more iterations needed
      */
     async executeAgenticIteration(options) {
-        const { executionContext, fileData, imageData, iterationCount, taskId, textInput, tools } = options;
+        const { executionContext, fileData, imageData, iterationCount, stream, taskId, textInput, tools } = options;
         // Build system prompt using SystemPromptManager (before compression for correct token accounting)
         // Use filtered tool names based on command type (e.g., only read-only tools for 'query')
         const availableTools = this.toolManager.getToolNamesForCommand(executionContext?.commandType);
@@ -549,7 +615,9 @@ export class ByteRoverLLMService {
         // Add user message and compress context within mutex lock
         return this.mutex.withLock(async () => {
             // Add user message to context only on the first iteration
-            await this.contextManager.addUserMessage(textInput, imageData, fileData);
+            if (iterationCount === 0) {
+                await this.contextManager.addUserMessage(textInput, imageData, fileData);
+            }
             const messages = this.contextManager.getMessages();
             const messageTokenCounts = messages.map((msg) => this.generator.estimateTokensSync(typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)));
             const maxMessageTokens = this.config.maxInputTokens - systemPromptTokens;
@@ -578,7 +646,10 @@ export class ByteRoverLLMService {
                 tools: toolsForThisIteration,
             });
             // Call LLM via generator (retry + logging handled by decorators)
-            const lastMessage = await this.callLLMAndParseResponse(request);
+            // Use streaming variant if enabled to emit thinking/reasoning chunks
+            const lastMessage = stream
+                ? await this.callLLMAndParseResponseStreaming(request, taskId)
+                : await this.callLLMAndParseResponse(request);
             // Check if there are tool calls
             if (!lastMessage.toolCalls || lastMessage.toolCalls.length === 0) {
                 const response = await this.handleFinalResponse(lastMessage, taskId);

package/dist/infra/cipher/llm/model-capabilities.d.ts ADDED Viewed

@@ -0,0 +1,74 @@
+/**
+ * Model Capabilities Detection
+ *
+ * Detects reasoning/thinking capabilities and format for each model.
+ * Following OpenCode's pattern of model-specific capability detection.
+ *
+ * Different models use different formats for reasoning:
+ * - OpenAI (o1, o3, gpt-5): Native `reasoning` field in API response
+ * - Grok: `reasoning_content` or `reasoning_details` fields
+ * - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field
+ * - Claude/DeepSeek/MiniMax: `<think>...</think>` XML tags in content
+ */
+/**
+ * Reasoning format types
+ */
+export type ReasoningFormat =
+/** Model uses <think>...</think> XML tags in content */
+'interleaved'
+/** Model uses a native field in the API response */
+ | 'native-field'
+/** Model interleaves reasoning in content parts */
+ | 'none'
+/** Model does not support reasoning */
+ | 'think-tags';
+/**
+ * Model capabilities for reasoning/thinking
+ */
+export interface ModelCapabilities {
+    /** Additional fields to check for reasoning content */
+    alternativeFields?: string[];
+    /** Whether the model supports reasoning/thinking output */
+    reasoning: boolean;
+    /** The field name for native reasoning (e.g., 'reasoning_content', 'reasoning', 'thoughts') */
+    reasoningField?: string;
+    /** How the model outputs reasoning content */
+    reasoningFormat: ReasoningFormat;
+}
+/**
+ * Get model capabilities for a given model ID.
+ *
+ * @param modelId - The model identifier (can be full path like "openai/gpt-5" or short like "gpt-5")
+ * @returns Model capabilities including reasoning support and format
+ *
+ * @example
+ * ```typescript
+ * const caps = getModelCapabilities('openai/o3-mini')
+ * // { reasoning: true, reasoningFormat: 'native-field', reasoningField: 'reasoning' }
+ *
+ * const caps2 = getModelCapabilities('anthropic/claude-3-opus')
+ * // { reasoning: true, reasoningFormat: 'think-tags' }
+ * ```
+ */
+export declare function getModelCapabilities(modelId: string): ModelCapabilities;
+/**
+ * Check if a model supports reasoning.
+ *
+ * @param modelId - The model identifier
+ * @returns True if the model supports reasoning output
+ */
+export declare function supportsReasoning(modelId: string): boolean;
+/**
+ * Check if a model uses think tags for reasoning.
+ *
+ * @param modelId - The model identifier
+ * @returns True if the model uses <think>...</think> tags
+ */
+export declare function usesThinkTags(modelId: string): boolean;
+/**
+ * Check if a model uses native reasoning fields.
+ *
+ * @param modelId - The model identifier
+ * @returns True if the model uses native API fields for reasoning
+ */
+export declare function usesNativeReasoning(modelId: string): boolean;