npm - @corbat-tech/coco - Versions diffs - 2.30.0 → 2.31.0 - Mend

@corbat-tech/coco 2.30.0 → 2.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -503,16 +503,16 @@ declare const CocoConfigSchema: z.ZodObject<{
             "kimi-code": "kimi-code";
             lmstudio: "lmstudio";
             codex: "codex";
-            qwen: "qwen";
-            deepseek: "deepseek";
-            mistral: "mistral";
             copilot: "copilot";
-            vertex: "vertex";
-            ollama: "ollama";
             groq: "groq";
             openrouter: "openrouter";
+            mistral: "mistral";
+            deepseek: "deepseek";
             together: "together";
             huggingface: "huggingface";
+            qwen: "qwen";
+            vertex: "vertex";
+            ollama: "ollama";
         }>>;
         apiKey: z.ZodOptional<z.ZodString>;
         model: z.ZodDefault<z.ZodString>;
@@ -523,6 +523,15 @@ declare const CocoConfigSchema: z.ZodObject<{
         location: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
     providerModels: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+    providerThinking: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodEnum<{
+        high: "high";
+        medium: "medium";
+        low: "low";
+        off: "off";
+        auto: "auto";
+    }>, z.ZodObject<{
+        budget: z.ZodNumber;
+    }, z.core.$strip>]>>>;
     quality: z.ZodDefault<z.ZodObject<{
         minScore: z.ZodDefault<z.ZodNumber>;
         minCoverage: z.ZodDefault<z.ZodNumber>;
@@ -926,9 +935,33 @@ interface InputAnalysis {
  */
 type ProjectType = "cli" | "api" | "web_app" | "library" | "service" | "full_stack" | "automation" | "unknown";
+/**
+ * Unified thinking/reasoning mode support for all LLM providers.
+ *
+ * Normalizes three distinct API surfaces:
+ *   - Anthropic: thinking.budget_tokens
+ *   - OpenAI Chat Completions: reasoning_effort
+ *   - OpenAI Responses API: reasoning.effort
+ *   - Gemini: thinkingConfig.thinkingBudget
+ *   - Kimi: thinking.type enabled/disabled
+ */
+/**
+ * Provider-agnostic thinking mode.
+ * "off"    — disable thinking entirely (or keep disabled for models like Kimi)
+ * "auto"   — provider default / dynamic budget
+ * "low"    — minimal reasoning
+ * "medium" — balanced reasoning
+ * "high"   — maximum reasoning
+ * { budget: N } — explicit token budget (Anthropic / Gemini only; rejected for effort-only providers)
+ */
+type ThinkingMode = "off" | "auto" | "low" | "medium" | "high" | {
+    budget: number;
+};
 /**
  * LLM Provider types for Corbat-Coco
  */
 /**
  * Message role
  */
@@ -1016,6 +1049,8 @@ interface ChatOptions {
     timeout?: number;
     /** Abort signal to cancel in-flight requests */
     signal?: AbortSignal;
+    /** Thinking/reasoning mode to pass to the model (if supported) */
+    thinking?: ThinkingMode;
 }
 /**
  * Chat response

package/dist/index.js CHANGED Viewed

@@ -1032,6 +1032,7 @@ __export(schema_exports, {
   ShipConfigSchema: () => ShipConfigSchema,
   SkillsConfigSchema: () => SkillsConfigSchema,
   StackConfigSchema: () => StackConfigSchema,
+  ThinkingModeSchema: () => ThinkingModeSchema,
   ToolsConfigSchema: () => ToolsConfigSchema,
   createDefaultConfigObject: () => createDefaultConfigObject,
   validateConfig: () => validateConfig
@@ -1076,9 +1077,13 @@ function createDefaultConfigObject(projectName, language = "typescript") {
     }
   };
 }
-var ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema2, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
+var ThinkingModeSchema, ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema2, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
 var init_schema = __esm({
   "src/config/schema.ts"() {
+    ThinkingModeSchema = z.union([
+      z.enum(["off", "auto", "low", "medium", "high"]),
+      z.object({ budget: z.number().int().min(0).max(2e5) })
+    ]);
     ProviderConfigSchema = z.object({
       type: z.enum([
         "anthropic",
@@ -1240,6 +1245,7 @@ var init_schema = __esm({
         timeout: 12e4
       }),
       providerModels: z.record(z.string(), z.string()).optional(),
+      providerThinking: z.record(z.string(), ThinkingModeSchema).optional(),
       quality: QualityConfigSchema.default({
         minScore: 85,
         minCoverage: 80,
@@ -13563,6 +13569,94 @@ async function withRetry(fn, config = {}) {
 // src/providers/anthropic.ts
 init_logger();
+// src/providers/thinking.ts
+var ANTHROPIC_BUDGET = {
+  low: 2048,
+  medium: 8e3,
+  high: 16e3
+};
+var GEMINI_BUDGET = {
+  low: 2048,
+  medium: 8e3,
+  high: 16e3
+};
+function isAnthropicThinkingModel(model) {
+  const m = model.toLowerCase();
+  if (m === "kimi-for-coding") return false;
+  return m.includes("claude-3-7") || m.includes("claude-opus-4") || m.includes("claude-sonnet-4") || m.includes("claude-haiku-4-5") || m.includes("claude-4");
+}
+function isOpenAIReasoningModel(model) {
+  const m = model.toLowerCase();
+  return m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4") || m.startsWith("gpt-5") || m.includes("codex");
+}
+function isGeminiThinkingModel(model) {
+  const m = model.toLowerCase();
+  return m.includes("gemini-2.5-pro") || m.includes("gemini-2.5-flash") || m.includes("gemini-3") && !m.includes("flash-lite") || m.includes("gemini-2.0-flash-thinking");
+}
+function isKimiThinkingModel(model) {
+  const m = model.toLowerCase();
+  return m.includes("kimi-k2") || m === "kimi-latest";
+}
+var ANTHROPIC_CAPABILITY = {
+  budgetRange: { min: 1024, max: 64e3, default: ANTHROPIC_BUDGET.medium }};
+var GEMINI_CAPABILITY = {
+  budgetRange: { min: 0, max: 32e3}};
+function mapToAnthropic(mode, model) {
+  if (!mode || mode === "off") return void 0;
+  if (!isAnthropicThinkingModel(model)) return void 0;
+  const cap = ANTHROPIC_CAPABILITY;
+  const { min, max } = cap.budgetRange;
+  if (typeof mode === "object") {
+    return { type: "enabled", budget_tokens: Math.min(Math.max(mode.budget, min), max) };
+  }
+  const budgetMap = {
+    auto: cap.budgetRange.default,
+    low: ANTHROPIC_BUDGET.low,
+    medium: ANTHROPIC_BUDGET.medium,
+    high: ANTHROPIC_BUDGET.high
+  };
+  const budget = budgetMap[mode];
+  if (budget === void 0) return void 0;
+  return { type: "enabled", budget_tokens: budget };
+}
+function mapToOpenAIEffort(mode, model) {
+  if (!mode || mode === "off") return void 0;
+  if (!isOpenAIReasoningModel(model)) return void 0;
+  if (typeof mode === "object") {
+    const { budget } = mode;
+    if (budget <= 2048) return "low";
+    if (budget <= 8e3) return "medium";
+    return "high";
+  }
+  if (mode === "auto") return "medium";
+  if (mode === "low" || mode === "medium" || mode === "high") return mode;
+  return void 0;
+}
+function mapToGeminiBudget(mode, model) {
+  if (!isGeminiThinkingModel(model)) return void 0;
+  if (!mode) return void 0;
+  if (mode === "off") return 0;
+  if (mode === "auto") return -1;
+  const { min, max } = GEMINI_CAPABILITY.budgetRange;
+  if (typeof mode === "object") {
+    return Math.min(Math.max(mode.budget, min), max);
+  }
+  const budgetMap = {
+    low: GEMINI_BUDGET.low,
+    medium: GEMINI_BUDGET.medium,
+    high: GEMINI_BUDGET.high
+  };
+  return budgetMap[mode];
+}
+function mapToKimiExtraBody(mode, model) {
+  if (!isKimiThinkingModel(model)) return void 0;
+  const effectiveMode = mode ?? "off";
+  const enabled = effectiveMode !== "off";
+  return { thinking: { type: enabled ? "enabled" : "disabled" } };
+}
+// src/providers/anthropic.ts
 var DEFAULT_MODEL = "claude-opus-4-6";
 var CONTEXT_WINDOWS = {
   // Kimi Code model (Anthropic-compatible endpoint)
@@ -13623,13 +13717,19 @@ var AnthropicProvider = class {
     this.ensureInitialized();
     return withRetry(async () => {
       try {
+        const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+        const thinkingParam = mapToAnthropic(options?.thinking, model);
+        const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
         const response = await this.client.messages.create({
-          model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-          max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-          temperature: options?.temperature ?? this.config.temperature ?? 0,
+          model,
+          // Anthropic requires max_tokens > budget_tokens
+          max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+          // Anthropic requires temperature=1 when thinking is enabled
+          temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
           system: this.extractSystem(messages, options?.system),
           messages: this.convertMessages(messages),
-          stop_sequences: options?.stopSequences
+          stop_sequences: options?.stopSequences,
+          ...thinkingParam && { thinking: thinkingParam }
         });
         return {
           id: response.id,
@@ -13653,14 +13753,18 @@ var AnthropicProvider = class {
     this.ensureInitialized();
     return withRetry(async () => {
       try {
+        const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+        const thinkingParam = mapToAnthropic(options?.thinking, model);
+        const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
         const response = await this.client.messages.create({
-          model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-          max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-          temperature: options?.temperature ?? this.config.temperature ?? 0,
+          model,
+          max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+          temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
           system: this.extractSystem(messages, options?.system),
           messages: this.convertMessages(messages),
           tools: this.convertTools(options.tools),
-          tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
+          tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
+          ...thinkingParam && { thinking: thinkingParam }
         });
         const toolCalls = this.extractToolCalls(response.content);
         return {
@@ -13686,13 +13790,17 @@ var AnthropicProvider = class {
     this.ensureInitialized();
     let timeoutTriggered = false;
     try {
+      const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+      const thinkingParam = mapToAnthropic(options?.thinking, model);
+      const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
       const stream = await this.client.messages.stream(
         {
-          model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-          max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-          temperature: options?.temperature ?? this.config.temperature ?? 0,
+          model,
+          max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+          temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
           system: this.extractSystem(messages, options?.system),
-          messages: this.convertMessages(messages)
+          messages: this.convertMessages(messages),
+          ...thinkingParam && { thinking: thinkingParam }
         },
         { signal: options?.signal }
       );
@@ -13748,15 +13856,19 @@ var AnthropicProvider = class {
     this.ensureInitialized();
     let timeoutTriggered = false;
     try {
+      const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+      const thinkingParam = mapToAnthropic(options?.thinking, model);
+      const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
       const stream = await this.client.messages.stream(
         {
-          model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-          max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-          temperature: options?.temperature ?? this.config.temperature ?? 0,
+          model,
+          max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+          temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
           system: this.extractSystem(messages, options?.system),
           messages: this.convertMessages(messages),
           tools: this.convertTools(options.tools),
-          tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
+          tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
+          ...thinkingParam && { thinking: thinkingParam }
         },
         { signal: options?.signal }
       );
@@ -14421,26 +14533,15 @@ var OpenAIProvider = class {
     return !MODELS_WITHOUT_TEMPERATURE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
   }
   /**
-   * Check if a model needs thinking mode disabled for tool use
-   * Kimi models have thinking mode enabled by default which requires
-   * reasoning_content in multi-turn conversations with tools
+   * Get extra body parameters for API calls.
+   * Honors the user's ThinkingMode for Kimi models; defaults to disabled
+   * (preserving existing behavior) when no mode is specified.
    */
-  needsThinkingDisabled(model) {
-    return MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
-  }
-  /**
-   * Get extra body parameters for API calls
-   * Used to disable thinking mode for Kimi models
-   * See: https://huggingface.co/moonshotai/Kimi-K2.5
-   *
-   * For Official Moonshot API: {'thinking': {'type': 'disabled'}}
-   * For vLLM/SGLang: {'chat_template_kwargs': {"thinking": False}}
-   */
-  getExtraBody(model) {
-    if (this.needsThinkingDisabled(model)) {
-      return {
-        thinking: { type: "disabled" }
-      };
+  getExtraBody(model, thinking) {
+    const kimiBody = mapToKimiExtraBody(thinking, model);
+    if (kimiBody) return kimiBody;
+    if (MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()))) {
+      return { thinking: { type: "disabled" } };
     }
     return void 0;
   }
@@ -14457,6 +14558,7 @@ var OpenAIProvider = class {
       try {
         const supportsTemp = this.supportsTemperature(model);
         const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
+        const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
         const response = await this.client.chat.completions.create({
           model,
           ...buildMaxTokensParam(model, maxTokens),
@@ -14464,7 +14566,8 @@ var OpenAIProvider = class {
           stop: options?.stopSequences,
           ...supportsTemp && {
             temperature: options?.temperature ?? this.config.temperature ?? 0
-          }
+          },
+          ...reasoningEffort && { reasoning_effort: reasoningEffort }
         });
         const choice = response.choices[0];
         return {
@@ -14494,7 +14597,8 @@ var OpenAIProvider = class {
     return withRetry(async () => {
       try {
         const supportsTemp = this.supportsTemperature(model);
-        const extraBody = this.getExtraBody(model);
+        const extraBody = this.getExtraBody(model, options?.thinking);
+        const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
         const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
         const requestParams = {
           model,
@@ -14506,6 +14610,9 @@ var OpenAIProvider = class {
         if (supportsTemp) {
           requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
         }
+        if (reasoningEffort) {
+          requestParams.reasoning_effort = reasoningEffort;
+        }
         if (extraBody) {
           Object.assign(requestParams, extraBody);
         }
@@ -14543,12 +14650,14 @@ var OpenAIProvider = class {
     try {
       const supportsTemp = this.supportsTemperature(model);
       const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
+      const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
       const stream = await this.client.chat.completions.create({
         model,
         ...buildMaxTokensParam(model, maxTokens),
         messages: this.convertMessages(messages, options?.system),
         stream: true,
-        ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 }
+        ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+        ...reasoningEffort && { reasoning_effort: reasoningEffort }
       });
       let streamStopReason;
       for await (const chunk of stream) {
@@ -14579,7 +14688,8 @@ var OpenAIProvider = class {
     let timeoutTriggered = false;
     try {
       const supportsTemp = this.supportsTemperature(model);
-      const extraBody = this.getExtraBody(model);
+      const extraBody = this.getExtraBody(model, options?.thinking);
+      const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
       const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
       const requestParams = {
         model,
@@ -14592,6 +14702,9 @@ var OpenAIProvider = class {
       if (supportsTemp) {
         requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
       }
+      if (reasoningEffort) {
+        requestParams.reasoning_effort = reasoningEffort;
+      }
       if (extraBody) {
         Object.assign(requestParams, extraBody);
       }
@@ -15059,6 +15172,7 @@ var OpenAIProvider = class {
         const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
         const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
         const supportsTemp = this.supportsTemperature(model);
+        const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
         const response = await this.client.responses.create({
           model,
           input,
@@ -15067,6 +15181,8 @@ var OpenAIProvider = class {
           ...supportsTemp && {
             temperature: options?.temperature ?? this.config.temperature ?? 0
           },
+          // Responses API uses nested reasoning.effort (not top-level reasoning_effort)
+          ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
           store: false
         });
         return {
@@ -15095,6 +15211,7 @@ var OpenAIProvider = class {
         const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
         const tools = this.convertToolsForResponses(options.tools);
         const supportsTemp = this.supportsTemperature(model);
+        const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
         const response = await this.client.responses.create({
           model,
           input,
@@ -15104,6 +15221,7 @@ var OpenAIProvider = class {
           ...supportsTemp && {
             temperature: options?.temperature ?? this.config.temperature ?? 0
           },
+          ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
           store: false
         });
         let content = "";
@@ -15149,12 +15267,14 @@ var OpenAIProvider = class {
       const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
       const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
       const supportsTemp = this.supportsTemperature(model);
+      const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
       const stream = await this.client.responses.create({
         model,
         input,
         instructions: instructions ?? void 0,
         max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
         ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+        ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
         store: false,
         stream: true
       });
@@ -15212,12 +15332,14 @@ var OpenAIProvider = class {
       const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
       const tools = options.tools.length > 0 ? this.convertToolsForResponses(options.tools) : void 0;
       const supportsTemp = this.supportsTemperature(model);
+      const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
       const requestParams = {
         model,
         input,
         instructions: instructions ?? void 0,
         max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
         ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+        ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
         store: false,
         stream: true
       };
@@ -16396,12 +16518,17 @@ var GeminiProvider = class {
     return model ?? this.config.model ?? DEFAULT_MODEL5;
   }
   buildConfig(messages, options, tools, toolChoice) {
+    const model = this.getModel(options?.model);
+    const thinkingBudget = mapToGeminiBudget(options?.thinking, model);
     const config = {
       maxOutputTokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
       temperature: options?.temperature ?? this.config.temperature ?? 0,
       stopSequences: options?.stopSequences,
       systemInstruction: this.extractSystem(messages, options?.system)
     };
+    if (thinkingBudget !== void 0) {
+      config.thinkingConfig = { thinkingBudget };
+    }
     if (tools && tools.length > 0) {
       config.tools = [{ functionDeclarations: this.convertTools(tools) }];
       config.toolConfig = {