npm - byterover-cli - Versions diffs - 3.10.2 → 3.11.0 - Mend

byterover-cli 3.10.2 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md CHANGED Viewed

@@ -34,7 +34,7 @@ Or download our self-hosted PDF version of the paper [here](https://byterover.de
 - 🖥️ Interactive TUI with REPL interface (React/Ink)
 - 🧠 Context tree and knowledge storage management
 - 🔀 Git-like version control for the context tree (branch, commit, merge, push/pull)
-- 🤖 18 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, and more)
+- 🤖 20 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, DeepSeek, and more)
 - 🛠️ 24 built-in agent tools (code exec, file ops, knowledge search, memory management)
 - 🔄 Cloud sync with push/pull
 - 👀 Review workflow for curate operations (approve/reject pending changes)
@@ -220,7 +220,7 @@ Run `brv --help` for the full command reference.
 <details>
 <summary><h2>Supported LLM Providers</h2></summary>
-ByteRover CLI supports 18 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.
+ByteRover CLI supports 20 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.
 | Provider | Description |
 |----------|-------------|
@@ -233,6 +233,7 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi
 | Cerebras | Fast inference |
 | Cohere | Command models |
 | DeepInfra | Open-source model hosting |
+| DeepSeek | DeepSeek V3 and R1 reasoning models |
 | OpenRouter | Multi-provider gateway |
 | Perplexity | Search-augmented models |
 | TogetherAI | Open-source model hosting |
@@ -240,6 +241,7 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi
 | Minimax | Minimax models |
 | Moonshot | Kimi models |
 | GLM | GLM models |
+| GLM Coding Plan | GLM models on Z.AI Coding Plan subscription |
 | OpenAI-Compatible | Any OpenAI-compatible API |
 | ByteRover | ByteRover's hosted models |

package/dist/agent/core/domain/llm/registry.d.ts CHANGED Viewed

@@ -119,3 +119,15 @@ export declare function resolveRegistryProvider(model: string, explicitProvider?
  * @returns true if provider accepts arbitrary models
  */
 export declare function acceptsAnyModel(provider: LLMProvider): boolean;
+/**
+ * Check whether a model accepts the sampling parameters `temperature`, `top_p`,
+ * and `top_k`.
+ *
+ * Some models (e.g. Claude Opus 4.7) reject any non-default value with a 400
+ * error — callers must omit these parameters entirely when this returns false.
+ *
+ * Handles both bare model ids (`claude-opus-4-7`) and OpenRouter-style prefixed
+ * ids (`anthropic/claude-opus-4-7`). Unknown models default to true so we don't
+ * regress arbitrary OpenRouter-routed models.
+ */
+export declare function modelAcceptsSamplingParameters(modelId: string): boolean;

package/dist/agent/core/domain/llm/registry.js CHANGED Viewed

@@ -25,6 +25,21 @@ export const LLM_REGISTRY = {
         defaultModel: '',
         models: [
             // Claude 4.x series
+            {
+                capabilities: {
+                    acceptsSamplingParameters: false,
+                    supportsAudio: false,
+                    supportsImages: true,
+                    supportsPdf: true,
+                    supportsStreaming: true,
+                },
+                charsPerToken: 3.5,
+                displayName: 'Claude Opus 4.7',
+                maxInputTokens: 200_000,
+                maxOutputTokens: 128_000,
+                name: 'claude-opus-4-7',
+                supportedFileTypes: ['image', 'pdf'],
+            },
             {
                 capabilities: { supportsAudio: false, supportsImages: true, supportsPdf: true, supportsStreaming: true },
                 charsPerToken: 3.5,
@@ -684,3 +699,37 @@ export function acceptsAnyModel(provider) {
     // OpenAI provider type accepts arbitrary models (OpenRouter, direct OpenAI, xAI, etc.)
     return provider === 'openai';
 }
+/**
+ * Strip an OpenRouter-style `provider/` prefix from a model id.
+ * Returns the input unchanged if no slash is present.
+ */
+function stripRouterPrefix(modelId) {
+    const slash = modelId.lastIndexOf('/');
+    return slash === -1 ? modelId : modelId.slice(slash + 1);
+}
+/**
+ * Check whether a model accepts the sampling parameters `temperature`, `top_p`,
+ * and `top_k`.
+ *
+ * Some models (e.g. Claude Opus 4.7) reject any non-default value with a 400
+ * error — callers must omit these parameters entirely when this returns false.
+ *
+ * Handles both bare model ids (`claude-opus-4-7`) and OpenRouter-style prefixed
+ * ids (`anthropic/claude-opus-4-7`). Unknown models default to true so we don't
+ * regress arbitrary OpenRouter-routed models.
+ */
+export function modelAcceptsSamplingParameters(modelId) {
+    const bare = stripRouterPrefix(modelId);
+    for (const provider of PROVIDER_TYPES) {
+        const info = getModelInfo(provider, bare);
+        if (info) {
+            return info.capabilities.acceptsSamplingParameters !== false;
+        }
+    }
+    // Family-level fallback: catches date-suffixed snapshots not yet in the registry
+    // (e.g. `claude-opus-4-7-20260101`). Extend this list as new families deprecate
+    // sampling params.
+    if (bare.startsWith('claude-opus-4-7'))
+        return false;
+    return true;
+}

package/dist/agent/core/domain/llm/types.d.ts CHANGED Viewed

@@ -34,6 +34,12 @@ export type SupportedFileType = (typeof SUPPORTED_FILE_TYPES)[number];
  * Defines what features a specific model supports.
  */
 export interface ModelCapabilities {
+    /**
+     * Whether the model accepts the sampling parameters `temperature`, `top_p`, and `top_k`.
+     * When false, callers must omit these from the request — Claude Opus 4.7 returns 400
+     * on any non-default value. Defaults to true when omitted.
+     */
+    acceptsSamplingParameters?: boolean;
     /** Whether the model supports audio input */
     supportsAudio: boolean;
     /** Whether the model supports image input */

package/dist/agent/core/interfaces/i-content-generator.d.ts CHANGED Viewed

@@ -65,6 +65,14 @@ export interface GenerateContentResponse {
     finishReason: 'error' | 'max_tokens' | 'stop' | 'tool_calls';
     /** Raw response from provider (for debugging) */
     rawResponse?: unknown;
+    /**
+     * Reasoning / thinking text emitted by the model (e.g. DeepSeek-R1's
+     * `reasoning_content`, OpenAI o1's reasoning summary). Required to be
+     * passed back to the API on the next turn for some providers — DeepSeek-R1
+     * rejects the next call with "The reasoning_content in the thinking mode
+     * must be passed back to the API" if absent.
+     */
+    reasoning?: string;
     /** Tool calls requested by the model */
     toolCalls?: ToolCall[];
     /** Token usage statistics */

package/dist/agent/infra/llm/agent-llm-service.js CHANGED Viewed

@@ -388,9 +388,13 @@ export class AgentLLMService {
     async callLLMAndParseResponse(request) {
         try {
             const response = await this.generator.generateContent(request);
-            // Convert response to InternalMessage format
+            // Convert response to InternalMessage format. The reasoning field must
+            // round-trip on the next turn for some providers (e.g. DeepSeek-R1
+            // rejects with "reasoning_content must be passed back to the API"
+            // otherwise).
             const message = {
                 content: response.content,
+                ...(response.reasoning && { reasoning: response.reasoning }),
                 role: 'assistant',
                 toolCalls: response.toolCalls,
             };
@@ -423,11 +427,15 @@ export class AgentLLMService {
     async callLLMAndParseResponseStreaming(request, taskId) {
         try {
             let accumulatedContent = '';
+            let accumulatedReasoning = '';
             let accumulatedToolCalls = [];
             // Stream chunks and accumulate content
             for await (const chunk of this.generator.generateContentStream(request)) {
-                // Emit thinking/reasoning chunks as events for TUI display
+                // Emit thinking/reasoning chunks as events for TUI display + accumulate
+                // for the InternalMessage so it round-trips on the next turn (DeepSeek-R1
+                // requires reasoning_content to be passed back).
                 if (chunk.type === StreamChunkType.THINKING && chunk.reasoning) {
+                    accumulatedReasoning += chunk.reasoning;
                     this.sessionEventBus.emit('llmservice:chunk', {
                         content: chunk.reasoning,
                         isComplete: chunk.isComplete,
@@ -454,6 +462,7 @@ export class AgentLLMService {
             // Convert accumulated response to InternalMessage format
             const message = {
                 content: accumulatedContent || null,
+                ...(accumulatedReasoning && { reasoning: accumulatedReasoning }),
                 role: 'assistant',
                 toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
             };
@@ -960,8 +969,10 @@ export class AgentLLMService {
             provider: this.providerId,
             taskId: taskId || undefined,
         });
-        // Add assistant message to context
-        await this.contextManager.addAssistantMessage(content);
+        // Add assistant message to context. Pass reasoning so it round-trips to
+        // providers that demand it (DeepSeek-R1 rejects with "reasoning_content
+        // must be passed back to the API" otherwise).
+        await this.contextManager.addAssistantMessage(content, undefined, lastMessage.reasoning);
         return content;
     }
     /**
@@ -1084,9 +1095,10 @@ export class AgentLLMService {
         }
         // Emit thought events if present
         this.handleThoughts(lastMessage, taskId);
-        // Has tool calls - add assistant message with tool calls
+        // Has tool calls - add assistant message with tool calls. Pass reasoning
+        // so it round-trips to providers that demand it.
         const assistantContent = this.extractTextContent(lastMessage);
-        await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls);
+        await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls, lastMessage.reasoning);
         // Step 1: Create pending tool parts for all tool calls
         for (const toolCall of lastMessage.toolCalls) {
             const toolArgs = JSON.parse(toolCall.function.arguments);

package/dist/agent/infra/llm/context/context-manager.d.ts CHANGED Viewed

@@ -140,8 +140,11 @@ export declare class ContextManager<T> {
      *
      * @param content - Message content (text or null if only tool calls)
      * @param toolCalls - Optional tool calls made by the assistant
+     * @param reasoning - Optional reasoning/thinking trace from the model.
+     *   Required to round-trip for providers like DeepSeek-R1 that reject
+     *   the next turn unless reasoning_content is replayed.
      */
-    addAssistantMessage(content: null | string, toolCalls?: InternalMessage['toolCalls']): Promise<void>;
+    addAssistantMessage(content: null | string, toolCalls?: InternalMessage['toolCalls'], reasoning?: string): Promise<void>;
     /**
      * Add a system message to the conversation.
      *

package/dist/agent/infra/llm/context/context-manager.js CHANGED Viewed

@@ -83,10 +83,14 @@ export class ContextManager {
      *
      * @param content - Message content (text or null if only tool calls)
      * @param toolCalls - Optional tool calls made by the assistant
+     * @param reasoning - Optional reasoning/thinking trace from the model.
+     *   Required to round-trip for providers like DeepSeek-R1 that reject
+     *   the next turn unless reasoning_content is replayed.
      */
-    async addAssistantMessage(content, toolCalls) {
+    async addAssistantMessage(content, toolCalls, reasoning) {
         const message = {
             content,
+            ...(reasoning && { reasoning }),
             role: 'assistant',
             toolCalls,
         };

package/dist/agent/infra/llm/generators/ai-sdk-content-generator.d.ts CHANGED Viewed

@@ -21,6 +21,12 @@ export declare function prependCachedSystemMessage(systemPrompt: string | undefi
 export interface AiSdkContentGeneratorConfig {
     /** Characters per token ratio for token estimation */
     charsPerToken?: number;
+    /**
+     * Drop the sampling request parameters (`temperature`, `top_p`, `top_k`)
+     * before calling the model. Set when targeting models that reject these
+     * (e.g. Claude Opus 4.7 returns 400 on any non-default value).
+     */
+    excludeSamplingParameters?: boolean;
     /** AI SDK LanguageModel instance */
     model: LanguageModel;
 }
@@ -34,11 +40,18 @@ export interface AiSdkContentGeneratorConfig {
  */
 export declare class AiSdkContentGenerator implements IContentGenerator {
     private readonly charsPerToken;
+    private readonly excludeSamplingParameters;
     private readonly model;
     constructor(config: AiSdkContentGeneratorConfig);
     estimateTokensSync(content: string): number;
     generateContent(request: GenerateContentRequest): Promise<GenerateContentResponse>;
     generateContentStream(request: GenerateContentRequest): AsyncGenerator<GenerateContentChunk>;
+    /**
+     * Build the sampling-parameter slice for an AI SDK request. Returns an
+     * empty object when the model-level exclusion is set, so callers can spread
+     * the result into the request payload without conditionally emitting fields.
+     */
+    private buildSamplingParams;
 }
 /**
  * Extract a human-readable message from an AI SDK stream error.

package/dist/agent/infra/llm/generators/ai-sdk-content-generator.js CHANGED Viewed

@@ -37,10 +37,12 @@ export function prependCachedSystemMessage(systemPrompt, messages) {
  */
 export class AiSdkContentGenerator {
     charsPerToken;
+    excludeSamplingParameters;
     model;
     constructor(config) {
         this.model = config.model;
         this.charsPerToken = config.charsPerToken ?? DEFAULT_CHARS_PER_TOKEN;
+        this.excludeSamplingParameters = config.excludeSamplingParameters ?? false;
     }
     estimateTokensSync(content) {
         return Math.ceil(content.length / this.charsPerToken);
@@ -53,10 +55,8 @@ export class AiSdkContentGenerator {
             maxRetries: 0, // RetryableContentGenerator handles retries
             messages,
             model: this.model,
-            temperature: request.config.temperature,
+            ...this.buildSamplingParams(request.config),
             ...(tools && { tools }),
-            ...(request.config.topK !== undefined && { topK: request.config.topK }),
-            ...(request.config.topP !== undefined && { topP: request.config.topP }),
         });
         // Map AI SDK tool calls to our ToolCall format
         // Preserve thoughtSignature from providerMetadata (required by Gemini 3+ models)
@@ -77,6 +77,7 @@ export class AiSdkContentGenerator {
             content: result.text,
             finishReason: mapFinishReason(result.finishReason, toolCalls.length > 0),
             rawResponse: result.response,
+            ...(result.reasoningText && { reasoning: result.reasoningText }),
             toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
             usage: {
                 completionTokens: result.usage.outputTokens ?? 0,
@@ -93,10 +94,8 @@ export class AiSdkContentGenerator {
             maxRetries: 0,
             messages,
             model: this.model,
-            temperature: request.config.temperature,
+            ...this.buildSamplingParams(request.config),
             ...(tools && { tools }),
-            ...(request.config.topK !== undefined && { topK: request.config.topK }),
-            ...(request.config.topP !== undefined && { topP: request.config.topP }),
         });
         // Accumulate tool calls during streaming
         const pendingToolCalls = [];
@@ -155,6 +154,20 @@ export class AiSdkContentGenerator {
             }
         }
     }
+    /**
+     * Build the sampling-parameter slice for an AI SDK request. Returns an
+     * empty object when the model-level exclusion is set, so callers can spread
+     * the result into the request payload without conditionally emitting fields.
+     */
+    buildSamplingParams(config) {
+        if (this.excludeSamplingParameters)
+            return {};
+        return {
+            ...(config.temperature !== undefined && { temperature: config.temperature }),
+            ...(config.topK !== undefined && { topK: config.topK }),
+            ...(config.topP !== undefined && { topP: config.topP }),
+        };
+    }
 }
 /**
  * Extract a human-readable message from an AI SDK stream error.

package/dist/agent/infra/llm/generators/ai-sdk-message-converter.js CHANGED Viewed

@@ -129,19 +129,31 @@ function convertUserMessage(msg) {
 }
 /**
  * Convert an internal assistant message to AI SDK format.
- * Handles text content and tool calls.
+ * Handles reasoning, text content, and tool calls.
+ *
+ * The reasoning part is required when the message is replayed to providers
+ * that demand the previous turn's thinking trace round-trip back — DeepSeek-R1
+ * rejects requests with "The reasoning_content in the thinking mode must be
+ * passed back to the API" if the assistant message in history lacks the
+ * reasoning that was emitted on the prior turn.
  */
 function convertAssistantMessage(msg) {
     const textContent = extractTextContent(msg);
     const hasToolCalls = msg.toolCalls && msg.toolCalls.length > 0;
-    if (!textContent && !hasToolCalls) {
+    const hasReasoning = Boolean(msg.reasoning);
+    if (!textContent && !hasToolCalls && !hasReasoning) {
         return undefined;
     }
-    // Simple text-only case
-    if (textContent && !hasToolCalls) {
+    // Simple text-only case (no reasoning, no tools)
+    if (textContent && !hasToolCalls && !hasReasoning) {
         return { content: textContent, role: 'assistant' };
     }
     const parts = [];
+    // Reasoning must come first — providers that consume it expect it at the
+    // start of the assistant turn, before any text/tool-call output.
+    if (msg.reasoning) {
+        parts.push({ text: msg.reasoning, type: 'reasoning' });
+    }
     if (textContent) {
         parts.push({ text: textContent, type: 'text' });
     }

package/dist/agent/infra/llm/generators/byterover-content-generator.d.ts CHANGED Viewed

@@ -31,6 +31,7 @@ export interface ByteRoverContentGeneratorConfig {
  * - Response parsing to unified format
  */
 export declare class ByteRoverContentGenerator implements IContentGenerator {
+    private readonly acceptsSamplingParameters;
     private readonly config;
     private readonly formatter;
     private readonly httpService;

package/dist/agent/infra/llm/generators/byterover-content-generator.js CHANGED Viewed

@@ -5,6 +5,7 @@
  * Supports both Claude and Gemini models through the unified HTTP interface.
  */
 import { FunctionCallingConfigMode } from '@google/genai';
+import { modelAcceptsSamplingParameters } from '../../../core/domain/llm/registry.js';
 import { ClaudeMessageFormatter } from '../formatters/claude-formatter.js';
 import { ensureActiveLoopHasThoughtSignatures, GeminiMessageFormatter } from '../formatters/gemini-formatter.js';
 import { ThinkingConfigManager } from '../thought-parser.js';
@@ -21,6 +22,7 @@ import { GeminiTokenizer } from '../tokenizers/gemini-tokenizer.js';
  * - Response parsing to unified format
  */
 export class ByteRoverContentGenerator {
+    acceptsSamplingParameters;
     config;
     formatter;
     httpService;
@@ -40,6 +42,7 @@ export class ByteRoverContentGenerator {
             temperature: config.temperature ?? 0.7,
             thinkingConfig: config.thinkingConfig,
         };
+        this.acceptsSamplingParameters = modelAcceptsSamplingParameters(this.config.model);
         // Detect provider type from model name
         this.providerType = this.detectProviderType(this.config.model);
         // Initialize formatter and tokenizer based on provider type
@@ -160,7 +163,7 @@ export class ByteRoverContentGenerator {
             messages,
             model: this.config.model,
             system: systemPrompt,
-            temperature: this.config.temperature,
+            ...(this.acceptsSamplingParameters && { temperature: this.config.temperature }),
             ...(claudeTools.length > 0 && { tools: claudeTools }),
         };
         /* eslint-enable camelcase */

package/dist/agent/infra/llm/model-capabilities.d.ts CHANGED Viewed

@@ -9,7 +9,8 @@
  * - Grok: `reasoning_content` or `reasoning_details` fields
  * - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field
  * - GLM (Zhipu AI): `reasoning_content` field in API response
- * - Claude/DeepSeek/MiniMax: `<think>...</think>` XML tags in content
+ * - DeepSeek (R1/Reasoner): `reasoning_content` field in API response (OpenAI-compatible)
+ * - Claude/MiniMax: `<think>...</think>` XML tags in content
  */
 /**
  * Reasoning format types

package/dist/agent/infra/llm/model-capabilities.js CHANGED Viewed

@@ -9,7 +9,8 @@
  * - Grok: `reasoning_content` or `reasoning_details` fields
  * - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field
  * - GLM (Zhipu AI): `reasoning_content` field in API response
- * - Claude/DeepSeek/MiniMax: `<think>...</think>` XML tags in content
+ * - DeepSeek (R1/Reasoner): `reasoning_content` field in API response (OpenAI-compatible)
+ * - Claude/MiniMax: `<think>...</think>` XML tags in content
  */
 /**
  * Get model capabilities for a given model ID.
@@ -95,13 +96,14 @@ export function getModelCapabilities(modelId) {
             reasoningFormat: 'think-tags',
         };
     }
-    // DeepSeek models use think tags
+    // DeepSeek models — reasoning models stream `reasoning_content` natively
+    // (OpenAI-compatible field), not <think> tags.
     if (id.includes('deepseek')) {
-        // DeepSeek-R1 and reasoning models
         if (id.includes('r1') || id.includes('reasoner')) {
             return {
                 reasoning: true,
-                reasoningFormat: 'think-tags',
+                reasoningField: 'reasoning_content',
+                reasoningFormat: 'native-field',
             };
         }
         return {

package/dist/agent/infra/llm/providers/anthropic.js CHANGED Viewed

@@ -4,6 +4,7 @@
  * Direct access to Claude models via @ai-sdk/anthropic.
  */
 import { createAnthropic } from '@ai-sdk/anthropic';
+import { modelAcceptsSamplingParameters } from '../../../core/domain/llm/registry.js';
 import { AiSdkContentGenerator } from '../generators/ai-sdk-content-generator.js';
 export const anthropicProvider = {
     apiKeyUrl: 'https://console.anthropic.com/settings/keys',
@@ -14,6 +15,7 @@ export const anthropicProvider = {
         const provider = createAnthropic({ apiKey: config.apiKey });
         return new AiSdkContentGenerator({
             charsPerToken: 3.5,
+            excludeSamplingParameters: !modelAcceptsSamplingParameters(config.model),
             model: provider(config.model),
         });
     },

package/dist/agent/infra/llm/providers/deepseek.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * DeepSeek Provider Module
+ *
+ * Access to DeepSeek V3 (deepseek-chat) and R1 (deepseek-reasoner) via their
+ * OpenAI-compatible API. The reasoner model streams thinking through the
+ * native `reasoning_content` field rather than `<think>` tags — see
+ * model-capabilities.ts for the parser routing.
+ */
+import type { ProviderModule } from './types.js';
+export declare const deepseekProvider: ProviderModule;

package/dist/agent/infra/llm/providers/deepseek.js ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * DeepSeek Provider Module
+ *
+ * Access to DeepSeek V3 (deepseek-chat) and R1 (deepseek-reasoner) via their
+ * OpenAI-compatible API. The reasoner model streams thinking through the
+ * native `reasoning_content` field rather than `<think>` tags — see
+ * model-capabilities.ts for the parser routing.
+ */
+import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
+import { AiSdkContentGenerator } from '../generators/ai-sdk-content-generator.js';
+export const deepseekProvider = {
+    apiKeyUrl: 'https://platform.deepseek.com/api_keys',
+    authType: 'api-key',
+    baseUrl: 'https://api.deepseek.com/v1',
+    category: 'other',
+    createGenerator(config) {
+        const provider = createOpenAICompatible({
+            apiKey: config.apiKey,
+            baseURL: 'https://api.deepseek.com/v1',
+            name: 'deepseek',
+        });
+        return new AiSdkContentGenerator({
+            model: provider.chatModel(config.model),
+        });
+    },
+    defaultModel: 'deepseek-chat',
+    description: 'DeepSeek V3 and R1 reasoning models',
+    envVars: ['DEEPSEEK_API_KEY'],
+    id: 'deepseek',
+    name: 'DeepSeek',
+    priority: 19,
+    providerType: 'openai',
+};

package/dist/agent/infra/llm/providers/glm-coding-plan.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * GLM Coding Plan (Z.AI) Provider Module
+ *
+ * Same Z.AI account as the standard `glm` provider but routes through the
+ * coding-plan endpoint so subscription quota is consumed instead of
+ * pay-per-token billing.
+ */
+import type { ProviderModule } from './types.js';
+export declare const glmCodingPlanProvider: ProviderModule;

package/dist/agent/infra/llm/providers/glm-coding-plan.js ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * GLM Coding Plan (Z.AI) Provider Module
+ *
+ * Same Z.AI account as the standard `glm` provider but routes through the
+ * coding-plan endpoint so subscription quota is consumed instead of
+ * pay-per-token billing.
+ */
+import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
+import { AiSdkContentGenerator } from '../generators/ai-sdk-content-generator.js';
+export const glmCodingPlanProvider = {
+    apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list',
+    authType: 'api-key',
+    baseUrl: 'https://api.z.ai/api/coding/paas/v4',
+    category: 'other',
+    createGenerator(config) {
+        const provider = createOpenAICompatible({
+            apiKey: config.apiKey,
+            baseURL: 'https://api.z.ai/api/coding/paas/v4',
+            name: 'glm-coding-plan',
+        });
+        return new AiSdkContentGenerator({
+            model: provider.chatModel(config.model),
+        });
+    },
+    defaultModel: 'glm-4.7',
+    description: 'GLM models on the Z.AI Coding Plan subscription',
+    envVars: ['ZHIPU_API_KEY'],
+    id: 'glm-coding-plan',
+    name: 'GLM Coding Plan (Z.AI)',
+    priority: 17.5,
+    providerType: 'openai',
+};

package/dist/agent/infra/llm/providers/index.js CHANGED Viewed

@@ -10,6 +10,8 @@ import { byteroverProvider } from './byterover.js';
 import { cerebrasProvider } from './cerebras.js';
 import { cohereProvider } from './cohere.js';
 import { deepinfraProvider } from './deepinfra.js';
+import { deepseekProvider } from './deepseek.js';
+import { glmCodingPlanProvider } from './glm-coding-plan.js';
 import { glmProvider } from './glm.js';
 import { googleProvider } from './google.js';
 import { groqProvider } from './groq.js';
@@ -33,7 +35,9 @@ const PROVIDER_MODULES = {
     cerebras: cerebrasProvider,
     cohere: cohereProvider,
     deepinfra: deepinfraProvider,
+    deepseek: deepseekProvider,
     glm: glmProvider,
+    'glm-coding-plan': glmCodingPlanProvider,
     google: googleProvider,
     groq: groqProvider,
     minimax: minimaxProvider,

package/dist/agent/infra/llm/providers/openrouter.js CHANGED Viewed

@@ -4,6 +4,7 @@
  * Access 200+ models via the OpenRouter aggregator using @openrouter/ai-sdk-provider.
  */
 import { createOpenRouter } from '@openrouter/ai-sdk-provider';
+import { modelAcceptsSamplingParameters } from '../../../core/domain/llm/registry.js';
 import { AiSdkContentGenerator } from '../generators/ai-sdk-content-generator.js';
 export const openrouterProvider = {
     apiKeyUrl: 'https://openrouter.ai/keys',
@@ -13,6 +14,7 @@ export const openrouterProvider = {
     createGenerator(config) {
         const provider = createOpenRouter({ apiKey: config.apiKey });
         return new AiSdkContentGenerator({
+            excludeSamplingParameters: !modelAcceptsSamplingParameters(config.model),
             model: provider.chat(config.model),
         });
     },

package/dist/oclif/commands/query.js CHANGED Viewed

@@ -110,7 +110,7 @@ Bad:
             client,
             command: 'query',
             format,
-            onCompleted: ({ result, taskId: tid }) => {
+            onCompleted: ({ durationMs, matchedDocs, result, taskId: tid, tier, topScore }) => {
                 const previousResult = finalResult;
                 // Always prefer the completed payload — it carries the attribution footer
                 // that may not be present in the earlier llmservice:response event.
@@ -133,11 +133,17 @@ Bad:
                 if (format === 'json') {
                     writeJsonResponse({
                         command: 'query',
+                        // Recall metadata is only present on query tasks; older daemons omit it. Spread
+                        // conditionally so JSON consumers do not see undefined keys.
                         data: {
+                            ...(durationMs === undefined ? {} : { durationMs }),
                             event: 'completed',
+                            ...(matchedDocs === undefined ? {} : { matchedDocs }),
                             result: finalResult,
                             status: 'completed',
                             taskId: tid,
+                            ...(tier === undefined ? {} : { tier }),
+                            ...(topScore === undefined ? {} : { topScore }),
                         },
                         success: true,
                     });