@corbat-tech/coco 2.29.0 → 2.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -503,16 +503,16 @@ declare const CocoConfigSchema: z.ZodObject<{
503
503
  "kimi-code": "kimi-code";
504
504
  lmstudio: "lmstudio";
505
505
  codex: "codex";
506
- qwen: "qwen";
507
- deepseek: "deepseek";
508
- mistral: "mistral";
509
506
  copilot: "copilot";
510
- vertex: "vertex";
511
- ollama: "ollama";
512
507
  groq: "groq";
513
508
  openrouter: "openrouter";
509
+ mistral: "mistral";
510
+ deepseek: "deepseek";
514
511
  together: "together";
515
512
  huggingface: "huggingface";
513
+ qwen: "qwen";
514
+ vertex: "vertex";
515
+ ollama: "ollama";
516
516
  }>>;
517
517
  apiKey: z.ZodOptional<z.ZodString>;
518
518
  model: z.ZodDefault<z.ZodString>;
@@ -523,6 +523,15 @@ declare const CocoConfigSchema: z.ZodObject<{
523
523
  location: z.ZodOptional<z.ZodString>;
524
524
  }, z.core.$strip>>;
525
525
  providerModels: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
526
+ providerThinking: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodEnum<{
527
+ high: "high";
528
+ medium: "medium";
529
+ low: "low";
530
+ off: "off";
531
+ auto: "auto";
532
+ }>, z.ZodObject<{
533
+ budget: z.ZodNumber;
534
+ }, z.core.$strip>]>>>;
526
535
  quality: z.ZodDefault<z.ZodObject<{
527
536
  minScore: z.ZodDefault<z.ZodNumber>;
528
537
  minCoverage: z.ZodDefault<z.ZodNumber>;
@@ -926,9 +935,33 @@ interface InputAnalysis {
926
935
  */
927
936
  type ProjectType = "cli" | "api" | "web_app" | "library" | "service" | "full_stack" | "automation" | "unknown";
928
937
 
938
+ /**
939
+ * Unified thinking/reasoning mode support for all LLM providers.
940
+ *
941
+ * Normalizes three distinct API surfaces:
942
+ * - Anthropic: thinking.budget_tokens
943
+ * - OpenAI Chat Completions: reasoning_effort
944
+ * - OpenAI Responses API: reasoning.effort
945
+ * - Gemini: thinkingConfig.thinkingBudget
946
+ * - Kimi: thinking.type enabled/disabled
947
+ */
948
+ /**
949
+ * Provider-agnostic thinking mode.
950
+ * "off" — disable thinking entirely (or keep disabled for models like Kimi)
951
+ * "auto" — provider default / dynamic budget
952
+ * "low" — minimal reasoning
953
+ * "medium" — balanced reasoning
954
+ * "high" — maximum reasoning
955
+ * { budget: N } — explicit token budget (Anthropic / Gemini only; rejected for effort-only providers)
956
+ */
957
+ type ThinkingMode = "off" | "auto" | "low" | "medium" | "high" | {
958
+ budget: number;
959
+ };
960
+
929
961
  /**
930
962
  * LLM Provider types for Corbat-Coco
931
963
  */
964
+
932
965
  /**
933
966
  * Message role
934
967
  */
@@ -1016,6 +1049,8 @@ interface ChatOptions {
1016
1049
  timeout?: number;
1017
1050
  /** Abort signal to cancel in-flight requests */
1018
1051
  signal?: AbortSignal;
1052
+ /** Thinking/reasoning mode to pass to the model (if supported) */
1053
+ thinking?: ThinkingMode;
1019
1054
  }
1020
1055
  /**
1021
1056
  * Chat response
@@ -3252,12 +3287,17 @@ interface LoggerConfig {
3252
3287
  */
3253
3288
  declare function createLogger(config?: Partial<LoggerConfig>): Logger<ILogObj>;
3254
3289
 
3290
+ interface SystemProxyConfig {
3291
+ proxyUrl: string;
3292
+ noProxy?: string;
3293
+ }
3255
3294
  /**
3256
- * Install an undici EnvHttpProxyAgent as the global dispatcher when any
3257
- * proxy env var is set. Safe to call multiple times (idempotent).
3295
+ * Install an undici EnvHttpProxyAgent as the global dispatcher. Resolves
3296
+ * the proxy from env vars first, then from OS-level config as a fallback.
3297
+ * Safe to call multiple times (idempotent).
3258
3298
  *
3259
3299
  * Returns a masked proxy URL when a proxy is installed, or null otherwise.
3260
3300
  */
3261
- declare function installProxyDispatcher(): string | null;
3301
+ declare function installProxyDispatcher(resolveSystem?: () => SystemProxyConfig | null): string | null;
3262
3302
 
3263
3303
  export { ADRGenerator, AnthropicProvider, ArchitectureGenerator, type Backlog, BacklogGenerator, CICDGenerator, type ChatOptions, type ChatResponse, type CocoConfig, CocoError, CodeGenerator, CodeReviewer, CompleteExecutor, ConfigError, ConvergeExecutor, DiscoveryEngine, DockerGenerator, DocsGenerator, type Epic, type LLMProvider, type Message, OrchestrateExecutor, type Orchestrator, type OrchestratorConfig, OutputExecutor, type Phase, type PhaseContext, PhaseError, type PhaseExecutor, type PhaseResult, type Progress, type ProjectState, type QualityDimensions, type QualityScores, type QualityThresholds, SessionManager, SpecificationGenerator, type Sprint, type Story, type Task, TaskError, type TaskHistory, TaskIterator, type TaskVersion, ToolRegistry, VERSION, configExists, createADRGenerator, createAnthropicProvider, createArchitectureGenerator, createBacklogGenerator, createCICDGenerator, createCodeGenerator, createCodeReviewer, createCompleteExecutor, createConvergeExecutor, createDefaultConfig, createDiscoveryEngine, createDockerGenerator, createDocsGenerator, createFullToolRegistry, createLogger, createOrchestrateExecutor, createOrchestrator, createOutputExecutor, createProvider, createSessionManager, createSpecificationGenerator, createTaskIterator, createToolRegistry, installProxyDispatcher, loadConfig, registerAllTools, saveConfig };
package/dist/index.js CHANGED
@@ -8,8 +8,8 @@ import fs16__default, { access, readFile, readdir, writeFile, mkdir } from 'fs/p
8
8
  import { randomUUID, randomBytes, createHash } from 'crypto';
9
9
  import * as http from 'http';
10
10
  import { fileURLToPath, URL as URL$1 } from 'url';
11
+ import { exec, execFile, execSync, execFileSync, spawn } from 'child_process';
11
12
  import { setGlobalDispatcher, EnvHttpProxyAgent } from 'undici';
12
- import { exec, execFile, execSync, spawn } from 'child_process';
13
13
  import { promisify } from 'util';
14
14
  import { z } from 'zod';
15
15
  import * as p4 from '@clack/prompts';
@@ -670,23 +670,121 @@ function maskProxyUrl(url) {
670
670
  return "[invalid proxy URL]";
671
671
  }
672
672
  }
673
- function installProxyDispatcher() {
674
- if (installed) return getProxyFromEnv() ? maskProxyUrl(getProxyFromEnv()) : null;
675
- const proxy = getProxyFromEnv();
676
- if (!proxy) return null;
673
+ function defaultRunner(cmd, args) {
674
+ try {
675
+ return execFileSync(cmd, args, {
676
+ encoding: "utf-8",
677
+ timeout: 2e3,
678
+ stdio: ["ignore", "pipe", "ignore"]
679
+ });
680
+ } catch {
681
+ return null;
682
+ }
683
+ }
684
+ function parseMacOsProxy(output) {
685
+ const getField = (name) => {
686
+ const re = new RegExp(`^\\s*${name}\\s*:\\s*(.+?)\\s*$`, "m");
687
+ return output.match(re)?.[1];
688
+ };
689
+ if (getField("ProxyAutoConfigEnable") === "1") {
690
+ return null;
691
+ }
692
+ const pick = (prefix) => {
693
+ if (getField(`${prefix}Enable`) !== "1") return null;
694
+ const host = getField(`${prefix}Proxy`);
695
+ const port = getField(`${prefix}Port`);
696
+ if (!host) return null;
697
+ return `http://${host}${port ? `:${port}` : ""}`;
698
+ };
699
+ const proxyUrl = pick("HTTPS") ?? pick("HTTP");
700
+ if (!proxyUrl) return null;
701
+ const exceptionsMatch = output.match(/ExceptionsList\s*:\s*<array>\s*\{([\s\S]*?)\}/);
702
+ const exceptions = [];
703
+ const exceptionsBody = exceptionsMatch?.[1];
704
+ if (exceptionsBody) {
705
+ for (const line of exceptionsBody.split("\n")) {
706
+ const entry = line.match(/^\s*\d+\s*:\s*(.+?)\s*$/)?.[1];
707
+ if (entry) exceptions.push(entry);
708
+ }
709
+ }
710
+ return {
711
+ proxyUrl,
712
+ noProxy: exceptions.length > 0 ? exceptions.join(",") : void 0
713
+ };
714
+ }
715
+ function parseWindowsProxy(output) {
716
+ if (/Direct access/i.test(output)) return null;
717
+ const raw = output.match(/Proxy\s+Server\(s\)\s*:\s*(\S.*?)\s*$/m)?.[1]?.trim();
718
+ if (!raw) return null;
719
+ let hostPort = raw;
720
+ if (raw.includes("=")) {
721
+ const parts = raw.split(";").map((p5) => p5.trim());
722
+ const httpsEntry = parts.find((p5) => p5.toLowerCase().startsWith("https="));
723
+ const httpEntry = parts.find((p5) => p5.toLowerCase().startsWith("http="));
724
+ const chosen = httpsEntry ?? httpEntry;
725
+ if (!chosen) return null;
726
+ hostPort = chosen.split("=", 2)[1]?.trim() ?? "";
727
+ if (!hostPort) return null;
728
+ }
729
+ const proxyUrl = /^https?:\/\//i.test(hostPort) ? hostPort : `http://${hostPort}`;
730
+ let noProxy;
731
+ const bypass = output.match(/Bypass\s+List\s*:\s*(\S.*?)\s*$/m)?.[1]?.trim();
732
+ if (bypass && !/\(none\)/i.test(bypass)) {
733
+ noProxy = bypass.replace(/;/g, ",");
734
+ }
735
+ return { proxyUrl, noProxy };
736
+ }
737
+ function getProxyFromSystem(platform = process.platform, run = defaultRunner) {
738
+ if (platform === "darwin") {
739
+ const out = run("scutil", ["--proxy"]);
740
+ return out ? parseMacOsProxy(out) : null;
741
+ }
742
+ if (platform === "win32") {
743
+ const out = run("netsh", ["winhttp", "show", "proxy"]);
744
+ return out ? parseWindowsProxy(out) : null;
745
+ }
746
+ return null;
747
+ }
748
+ function installProxyDispatcher(resolveSystem = () => getProxyFromSystem()) {
749
+ if (installed) {
750
+ const existing = getProxyFromEnv();
751
+ return existing ? maskProxyUrl(existing) : null;
752
+ }
753
+ const envProxy = getProxyFromEnv();
754
+ if (envProxy) {
755
+ return applyDispatcher(envProxy);
756
+ }
757
+ const sys = resolveSystem();
758
+ if (sys) {
759
+ seedEnv("HTTPS_PROXY", sys.proxyUrl);
760
+ seedEnv("HTTP_PROXY", sys.proxyUrl);
761
+ if (sys.noProxy && !process.env.NO_PROXY && !process.env.no_proxy) {
762
+ seedEnv("NO_PROXY", sys.noProxy);
763
+ }
764
+ return applyDispatcher(sys.proxyUrl);
765
+ }
766
+ return null;
767
+ }
768
+ function seedEnv(key, value) {
769
+ if (process.env[key] !== void 0) return;
770
+ process.env[key] = value;
771
+ seededEnvKeys.push(key);
772
+ }
773
+ function applyDispatcher(proxyUrl) {
677
774
  try {
678
775
  setGlobalDispatcher(new EnvHttpProxyAgent());
679
776
  installed = true;
680
- return maskProxyUrl(proxy);
777
+ return maskProxyUrl(proxyUrl);
681
778
  } catch {
682
779
  return null;
683
780
  }
684
781
  }
685
- var PROXY_ENV_VARS, installed;
782
+ var PROXY_ENV_VARS, installed, seededEnvKeys;
686
783
  var init_proxy = __esm({
687
784
  "src/utils/proxy.ts"() {
688
785
  PROXY_ENV_VARS = ["HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"];
689
786
  installed = false;
787
+ seededEnvKeys = [];
690
788
  }
691
789
  });
692
790
  async function exchangeForCopilotToken(githubToken) {
@@ -934,6 +1032,7 @@ __export(schema_exports, {
934
1032
  ShipConfigSchema: () => ShipConfigSchema,
935
1033
  SkillsConfigSchema: () => SkillsConfigSchema,
936
1034
  StackConfigSchema: () => StackConfigSchema,
1035
+ ThinkingModeSchema: () => ThinkingModeSchema,
937
1036
  ToolsConfigSchema: () => ToolsConfigSchema,
938
1037
  createDefaultConfigObject: () => createDefaultConfigObject,
939
1038
  validateConfig: () => validateConfig
@@ -978,9 +1077,13 @@ function createDefaultConfigObject(projectName, language = "typescript") {
978
1077
  }
979
1078
  };
980
1079
  }
981
- var ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema2, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
1080
+ var ThinkingModeSchema, ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema2, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
982
1081
  var init_schema = __esm({
983
1082
  "src/config/schema.ts"() {
1083
+ ThinkingModeSchema = z.union([
1084
+ z.enum(["off", "auto", "low", "medium", "high"]),
1085
+ z.object({ budget: z.number().int().min(0).max(2e5) })
1086
+ ]);
984
1087
  ProviderConfigSchema = z.object({
985
1088
  type: z.enum([
986
1089
  "anthropic",
@@ -1142,6 +1245,7 @@ var init_schema = __esm({
1142
1245
  timeout: 12e4
1143
1246
  }),
1144
1247
  providerModels: z.record(z.string(), z.string()).optional(),
1248
+ providerThinking: z.record(z.string(), ThinkingModeSchema).optional(),
1145
1249
  quality: QualityConfigSchema.default({
1146
1250
  minScore: 85,
1147
1251
  minCoverage: 80,
@@ -13465,6 +13569,94 @@ async function withRetry(fn, config = {}) {
13465
13569
 
13466
13570
  // src/providers/anthropic.ts
13467
13571
  init_logger();
13572
+
13573
+ // src/providers/thinking.ts
13574
+ var ANTHROPIC_BUDGET = {
13575
+ low: 2048,
13576
+ medium: 8e3,
13577
+ high: 16e3
13578
+ };
13579
+ var GEMINI_BUDGET = {
13580
+ low: 2048,
13581
+ medium: 8e3,
13582
+ high: 16e3
13583
+ };
13584
+ function isAnthropicThinkingModel(model) {
13585
+ const m = model.toLowerCase();
13586
+ if (m === "kimi-for-coding") return false;
13587
+ return m.includes("claude-3-7") || m.includes("claude-opus-4") || m.includes("claude-sonnet-4") || m.includes("claude-haiku-4-5") || m.includes("claude-4");
13588
+ }
13589
+ function isOpenAIReasoningModel(model) {
13590
+ const m = model.toLowerCase();
13591
+ return m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4") || m.startsWith("gpt-5") || m.includes("codex");
13592
+ }
13593
+ function isGeminiThinkingModel(model) {
13594
+ const m = model.toLowerCase();
13595
+ return m.includes("gemini-2.5-pro") || m.includes("gemini-2.5-flash") || m.includes("gemini-3") && !m.includes("flash-lite") || m.includes("gemini-2.0-flash-thinking");
13596
+ }
13597
+ function isKimiThinkingModel(model) {
13598
+ const m = model.toLowerCase();
13599
+ return m.includes("kimi-k2") || m === "kimi-latest";
13600
+ }
13601
+ var ANTHROPIC_CAPABILITY = {
13602
+ budgetRange: { min: 1024, max: 64e3, default: ANTHROPIC_BUDGET.medium }};
13603
+ var GEMINI_CAPABILITY = {
13604
+ budgetRange: { min: 0, max: 32e3}};
13605
+ function mapToAnthropic(mode, model) {
13606
+ if (!mode || mode === "off") return void 0;
13607
+ if (!isAnthropicThinkingModel(model)) return void 0;
13608
+ const cap = ANTHROPIC_CAPABILITY;
13609
+ const { min, max } = cap.budgetRange;
13610
+ if (typeof mode === "object") {
13611
+ return { type: "enabled", budget_tokens: Math.min(Math.max(mode.budget, min), max) };
13612
+ }
13613
+ const budgetMap = {
13614
+ auto: cap.budgetRange.default,
13615
+ low: ANTHROPIC_BUDGET.low,
13616
+ medium: ANTHROPIC_BUDGET.medium,
13617
+ high: ANTHROPIC_BUDGET.high
13618
+ };
13619
+ const budget = budgetMap[mode];
13620
+ if (budget === void 0) return void 0;
13621
+ return { type: "enabled", budget_tokens: budget };
13622
+ }
13623
+ function mapToOpenAIEffort(mode, model) {
13624
+ if (!mode || mode === "off") return void 0;
13625
+ if (!isOpenAIReasoningModel(model)) return void 0;
13626
+ if (typeof mode === "object") {
13627
+ const { budget } = mode;
13628
+ if (budget <= 2048) return "low";
13629
+ if (budget <= 8e3) return "medium";
13630
+ return "high";
13631
+ }
13632
+ if (mode === "auto") return "medium";
13633
+ if (mode === "low" || mode === "medium" || mode === "high") return mode;
13634
+ return void 0;
13635
+ }
13636
+ function mapToGeminiBudget(mode, model) {
13637
+ if (!isGeminiThinkingModel(model)) return void 0;
13638
+ if (!mode) return void 0;
13639
+ if (mode === "off") return 0;
13640
+ if (mode === "auto") return -1;
13641
+ const { min, max } = GEMINI_CAPABILITY.budgetRange;
13642
+ if (typeof mode === "object") {
13643
+ return Math.min(Math.max(mode.budget, min), max);
13644
+ }
13645
+ const budgetMap = {
13646
+ low: GEMINI_BUDGET.low,
13647
+ medium: GEMINI_BUDGET.medium,
13648
+ high: GEMINI_BUDGET.high
13649
+ };
13650
+ return budgetMap[mode];
13651
+ }
13652
+ function mapToKimiExtraBody(mode, model) {
13653
+ if (!isKimiThinkingModel(model)) return void 0;
13654
+ const effectiveMode = mode ?? "off";
13655
+ const enabled = effectiveMode !== "off";
13656
+ return { thinking: { type: enabled ? "enabled" : "disabled" } };
13657
+ }
13658
+
13659
+ // src/providers/anthropic.ts
13468
13660
  var DEFAULT_MODEL = "claude-opus-4-6";
13469
13661
  var CONTEXT_WINDOWS = {
13470
13662
  // Kimi Code model (Anthropic-compatible endpoint)
@@ -13525,13 +13717,19 @@ var AnthropicProvider = class {
13525
13717
  this.ensureInitialized();
13526
13718
  return withRetry(async () => {
13527
13719
  try {
13720
+ const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
13721
+ const thinkingParam = mapToAnthropic(options?.thinking, model);
13722
+ const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
13528
13723
  const response = await this.client.messages.create({
13529
- model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
13530
- max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
13531
- temperature: options?.temperature ?? this.config.temperature ?? 0,
13724
+ model,
13725
+ // Anthropic requires max_tokens > budget_tokens
13726
+ max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
13727
+ // Anthropic requires temperature=1 when thinking is enabled
13728
+ temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
13532
13729
  system: this.extractSystem(messages, options?.system),
13533
13730
  messages: this.convertMessages(messages),
13534
- stop_sequences: options?.stopSequences
13731
+ stop_sequences: options?.stopSequences,
13732
+ ...thinkingParam && { thinking: thinkingParam }
13535
13733
  });
13536
13734
  return {
13537
13735
  id: response.id,
@@ -13555,14 +13753,18 @@ var AnthropicProvider = class {
13555
13753
  this.ensureInitialized();
13556
13754
  return withRetry(async () => {
13557
13755
  try {
13756
+ const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
13757
+ const thinkingParam = mapToAnthropic(options?.thinking, model);
13758
+ const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
13558
13759
  const response = await this.client.messages.create({
13559
- model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
13560
- max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
13561
- temperature: options?.temperature ?? this.config.temperature ?? 0,
13760
+ model,
13761
+ max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
13762
+ temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
13562
13763
  system: this.extractSystem(messages, options?.system),
13563
13764
  messages: this.convertMessages(messages),
13564
13765
  tools: this.convertTools(options.tools),
13565
- tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
13766
+ tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
13767
+ ...thinkingParam && { thinking: thinkingParam }
13566
13768
  });
13567
13769
  const toolCalls = this.extractToolCalls(response.content);
13568
13770
  return {
@@ -13588,13 +13790,17 @@ var AnthropicProvider = class {
13588
13790
  this.ensureInitialized();
13589
13791
  let timeoutTriggered = false;
13590
13792
  try {
13793
+ const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
13794
+ const thinkingParam = mapToAnthropic(options?.thinking, model);
13795
+ const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
13591
13796
  const stream = await this.client.messages.stream(
13592
13797
  {
13593
- model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
13594
- max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
13595
- temperature: options?.temperature ?? this.config.temperature ?? 0,
13798
+ model,
13799
+ max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
13800
+ temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
13596
13801
  system: this.extractSystem(messages, options?.system),
13597
- messages: this.convertMessages(messages)
13802
+ messages: this.convertMessages(messages),
13803
+ ...thinkingParam && { thinking: thinkingParam }
13598
13804
  },
13599
13805
  { signal: options?.signal }
13600
13806
  );
@@ -13650,15 +13856,19 @@ var AnthropicProvider = class {
13650
13856
  this.ensureInitialized();
13651
13857
  let timeoutTriggered = false;
13652
13858
  try {
13859
+ const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
13860
+ const thinkingParam = mapToAnthropic(options?.thinking, model);
13861
+ const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
13653
13862
  const stream = await this.client.messages.stream(
13654
13863
  {
13655
- model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
13656
- max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
13657
- temperature: options?.temperature ?? this.config.temperature ?? 0,
13864
+ model,
13865
+ max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
13866
+ temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
13658
13867
  system: this.extractSystem(messages, options?.system),
13659
13868
  messages: this.convertMessages(messages),
13660
13869
  tools: this.convertTools(options.tools),
13661
- tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
13870
+ tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
13871
+ ...thinkingParam && { thinking: thinkingParam }
13662
13872
  },
13663
13873
  { signal: options?.signal }
13664
13874
  );
@@ -14323,26 +14533,15 @@ var OpenAIProvider = class {
14323
14533
  return !MODELS_WITHOUT_TEMPERATURE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
14324
14534
  }
14325
14535
  /**
14326
- * Check if a model needs thinking mode disabled for tool use
14327
- * Kimi models have thinking mode enabled by default which requires
14328
- * reasoning_content in multi-turn conversations with tools
14536
+ * Get extra body parameters for API calls.
14537
+ * Honors the user's ThinkingMode for Kimi models; defaults to disabled
14538
+ * (preserving existing behavior) when no mode is specified.
14329
14539
  */
14330
- needsThinkingDisabled(model) {
14331
- return MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
14332
- }
14333
- /**
14334
- * Get extra body parameters for API calls
14335
- * Used to disable thinking mode for Kimi models
14336
- * See: https://huggingface.co/moonshotai/Kimi-K2.5
14337
- *
14338
- * For Official Moonshot API: {'thinking': {'type': 'disabled'}}
14339
- * For vLLM/SGLang: {'chat_template_kwargs': {"thinking": False}}
14340
- */
14341
- getExtraBody(model) {
14342
- if (this.needsThinkingDisabled(model)) {
14343
- return {
14344
- thinking: { type: "disabled" }
14345
- };
14540
+ getExtraBody(model, thinking) {
14541
+ const kimiBody = mapToKimiExtraBody(thinking, model);
14542
+ if (kimiBody) return kimiBody;
14543
+ if (MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()))) {
14544
+ return { thinking: { type: "disabled" } };
14346
14545
  }
14347
14546
  return void 0;
14348
14547
  }
@@ -14359,6 +14558,7 @@ var OpenAIProvider = class {
14359
14558
  try {
14360
14559
  const supportsTemp = this.supportsTemperature(model);
14361
14560
  const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
14561
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
14362
14562
  const response = await this.client.chat.completions.create({
14363
14563
  model,
14364
14564
  ...buildMaxTokensParam(model, maxTokens),
@@ -14366,7 +14566,8 @@ var OpenAIProvider = class {
14366
14566
  stop: options?.stopSequences,
14367
14567
  ...supportsTemp && {
14368
14568
  temperature: options?.temperature ?? this.config.temperature ?? 0
14369
- }
14569
+ },
14570
+ ...reasoningEffort && { reasoning_effort: reasoningEffort }
14370
14571
  });
14371
14572
  const choice = response.choices[0];
14372
14573
  return {
@@ -14396,7 +14597,8 @@ var OpenAIProvider = class {
14396
14597
  return withRetry(async () => {
14397
14598
  try {
14398
14599
  const supportsTemp = this.supportsTemperature(model);
14399
- const extraBody = this.getExtraBody(model);
14600
+ const extraBody = this.getExtraBody(model, options?.thinking);
14601
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
14400
14602
  const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
14401
14603
  const requestParams = {
14402
14604
  model,
@@ -14408,6 +14610,9 @@ var OpenAIProvider = class {
14408
14610
  if (supportsTemp) {
14409
14611
  requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
14410
14612
  }
14613
+ if (reasoningEffort) {
14614
+ requestParams.reasoning_effort = reasoningEffort;
14615
+ }
14411
14616
  if (extraBody) {
14412
14617
  Object.assign(requestParams, extraBody);
14413
14618
  }
@@ -14445,12 +14650,14 @@ var OpenAIProvider = class {
14445
14650
  try {
14446
14651
  const supportsTemp = this.supportsTemperature(model);
14447
14652
  const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
14653
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
14448
14654
  const stream = await this.client.chat.completions.create({
14449
14655
  model,
14450
14656
  ...buildMaxTokensParam(model, maxTokens),
14451
14657
  messages: this.convertMessages(messages, options?.system),
14452
14658
  stream: true,
14453
- ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 }
14659
+ ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
14660
+ ...reasoningEffort && { reasoning_effort: reasoningEffort }
14454
14661
  });
14455
14662
  let streamStopReason;
14456
14663
  for await (const chunk of stream) {
@@ -14481,7 +14688,8 @@ var OpenAIProvider = class {
14481
14688
  let timeoutTriggered = false;
14482
14689
  try {
14483
14690
  const supportsTemp = this.supportsTemperature(model);
14484
- const extraBody = this.getExtraBody(model);
14691
+ const extraBody = this.getExtraBody(model, options?.thinking);
14692
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
14485
14693
  const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
14486
14694
  const requestParams = {
14487
14695
  model,
@@ -14494,6 +14702,9 @@ var OpenAIProvider = class {
14494
14702
  if (supportsTemp) {
14495
14703
  requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
14496
14704
  }
14705
+ if (reasoningEffort) {
14706
+ requestParams.reasoning_effort = reasoningEffort;
14707
+ }
14497
14708
  if (extraBody) {
14498
14709
  Object.assign(requestParams, extraBody);
14499
14710
  }
@@ -14961,6 +15172,7 @@ var OpenAIProvider = class {
14961
15172
  const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
14962
15173
  const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
14963
15174
  const supportsTemp = this.supportsTemperature(model);
15175
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
14964
15176
  const response = await this.client.responses.create({
14965
15177
  model,
14966
15178
  input,
@@ -14969,6 +15181,8 @@ var OpenAIProvider = class {
14969
15181
  ...supportsTemp && {
14970
15182
  temperature: options?.temperature ?? this.config.temperature ?? 0
14971
15183
  },
15184
+ // Responses API uses nested reasoning.effort (not top-level reasoning_effort)
15185
+ ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
14972
15186
  store: false
14973
15187
  });
14974
15188
  return {
@@ -14997,6 +15211,7 @@ var OpenAIProvider = class {
14997
15211
  const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
14998
15212
  const tools = this.convertToolsForResponses(options.tools);
14999
15213
  const supportsTemp = this.supportsTemperature(model);
15214
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
15000
15215
  const response = await this.client.responses.create({
15001
15216
  model,
15002
15217
  input,
@@ -15006,6 +15221,7 @@ var OpenAIProvider = class {
15006
15221
  ...supportsTemp && {
15007
15222
  temperature: options?.temperature ?? this.config.temperature ?? 0
15008
15223
  },
15224
+ ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
15009
15225
  store: false
15010
15226
  });
15011
15227
  let content = "";
@@ -15051,12 +15267,14 @@ var OpenAIProvider = class {
15051
15267
  const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
15052
15268
  const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
15053
15269
  const supportsTemp = this.supportsTemperature(model);
15270
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
15054
15271
  const stream = await this.client.responses.create({
15055
15272
  model,
15056
15273
  input,
15057
15274
  instructions: instructions ?? void 0,
15058
15275
  max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
15059
15276
  ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
15277
+ ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
15060
15278
  store: false,
15061
15279
  stream: true
15062
15280
  });
@@ -15114,12 +15332,14 @@ var OpenAIProvider = class {
15114
15332
  const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
15115
15333
  const tools = options.tools.length > 0 ? this.convertToolsForResponses(options.tools) : void 0;
15116
15334
  const supportsTemp = this.supportsTemperature(model);
15335
+ const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
15117
15336
  const requestParams = {
15118
15337
  model,
15119
15338
  input,
15120
15339
  instructions: instructions ?? void 0,
15121
15340
  max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
15122
15341
  ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
15342
+ ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
15123
15343
  store: false,
15124
15344
  stream: true
15125
15345
  };
@@ -16298,12 +16518,17 @@ var GeminiProvider = class {
16298
16518
  return model ?? this.config.model ?? DEFAULT_MODEL5;
16299
16519
  }
16300
16520
  buildConfig(messages, options, tools, toolChoice) {
16521
+ const model = this.getModel(options?.model);
16522
+ const thinkingBudget = mapToGeminiBudget(options?.thinking, model);
16301
16523
  const config = {
16302
16524
  maxOutputTokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
16303
16525
  temperature: options?.temperature ?? this.config.temperature ?? 0,
16304
16526
  stopSequences: options?.stopSequences,
16305
16527
  systemInstruction: this.extractSystem(messages, options?.system)
16306
16528
  };
16529
+ if (thinkingBudget !== void 0) {
16530
+ config.thinkingConfig = { thinkingBudget };
16531
+ }
16307
16532
  if (tools && tools.length > 0) {
16308
16533
  config.tools = [{ functionDeclarations: this.convertTools(tools) }];
16309
16534
  config.toolConfig = {