@qwen-code/qwen-code 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.js +73 -49
  2. package/package.json +2 -2
package/cli.js CHANGED
@@ -133792,6 +133792,10 @@ function normalize2(model) {
133792
133792
  s5 = s5.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, "");
133793
133793
  return s5;
133794
133794
  }
133795
+ function hasExplicitOutputLimit(model) {
133796
+ const norm = normalize2(model);
133797
+ return OUTPUT_PATTERNS.some(([regex3]) => regex3.test(norm));
133798
+ }
133795
133799
  function tokenLimit(model, type = "input") {
133796
133800
  const norm = normalize2(model);
133797
133801
  const patterns = type === "output" ? OUTPUT_PATTERNS : PATTERNS;
@@ -133808,7 +133812,7 @@ var init_tokenLimits = __esm({
133808
133812
  "use strict";
133809
133813
  init_esbuild_shims();
133810
133814
  DEFAULT_TOKEN_LIMIT = 131072;
133811
- DEFAULT_OUTPUT_TOKEN_LIMIT = 16384;
133815
+ DEFAULT_OUTPUT_TOKEN_LIMIT = 32e3;
133812
133816
  LIMITS = {
133813
133817
  "32k": 32768,
133814
133818
  "64k": 65536,
@@ -133932,6 +133936,7 @@ var init_tokenLimits = __esm({
133932
133936
  // Kimi
133933
133937
  [/^kimi-k2\.5/, LIMITS["32k"]]
133934
133938
  ];
133939
+ __name(hasExplicitOutputLimit, "hasExplicitOutputLimit");
133935
133940
  __name(tokenLimit, "tokenLimit");
133936
133941
  }
133937
133942
  });
@@ -134801,18 +134806,6 @@ var init_modelsConfig = __esm({
134801
134806
  detail: "auto-detected from model"
134802
134807
  };
134803
134808
  }
134804
- if (!this._generationConfig.samplingParams?.max_tokens) {
134805
- const outputLimit = tokenLimit(model.id, "output");
134806
- if (!this._generationConfig.samplingParams) {
134807
- this._generationConfig.samplingParams = {};
134808
- }
134809
- this._generationConfig.samplingParams.max_tokens = outputLimit;
134810
- const existingSource = this.generationConfigSources["samplingParams"];
134811
- this.generationConfigSources["samplingParams"] = {
134812
- kind: "computed",
134813
- detail: existingSource ? `max_tokens auto-detected from model (other params from ${existingSource.kind})` : "max_tokens auto-detected from model"
134814
- };
134815
- }
134816
134809
  if (gc.modalities === void 0) {
134817
134810
  this._generationConfig.modalities = defaultModalities(model.id);
134818
134811
  this.generationConfigSources["modalities"] = {
@@ -142713,6 +142706,7 @@ var init_default = __esm({
142713
142706
  init_openai();
142714
142707
  init_constants2();
142715
142708
  init_runtimeFetchOptions();
142709
+ init_tokenLimits();
142716
142710
  DefaultOpenAICompatibleProvider = class {
142717
142711
  static {
142718
142712
  __name(this, "DefaultOpenAICompatibleProvider");
@@ -142747,15 +142741,66 @@ var init_default = __esm({
142747
142741
  }
142748
142742
  buildRequest(request4, _userPromptId) {
142749
142743
  const extraBody = this.contentGeneratorConfig.extra_body;
142744
+ const requestWithTokenLimits = this.applyOutputTokenLimit(request4);
142750
142745
  return {
142751
- ...request4,
142752
- // Preserve all original parameters including sampling params
142746
+ ...requestWithTokenLimits,
142753
142747
  ...extraBody ? extraBody : {}
142754
142748
  };
142755
142749
  }
142756
142750
  getDefaultGenerationConfig() {
142757
142751
  return {};
142758
142752
  }
142753
+ /**
142754
+ * Apply output token limit to a request's max_tokens parameter.
142755
+ *
142756
+ * Purpose:
142757
+ * Some APIs (e.g., OpenAI-compatible) default to a very small max_tokens value,
142758
+ * which can cause responses to be truncated mid-output. This function ensures
142759
+ * a reasonable default is set while respecting user configuration.
142760
+ *
142761
+ * Logic:
142762
+ * 1. If user explicitly configured max_tokens:
142763
+ * - For known models (in OUTPUT_PATTERNS): use the user's value, but cap at
142764
+ * model's max output limit to avoid API errors
142765
+ * (input + max_output > contextWindowSize would cause 400 errors on some APIs)
142766
+ * - For unknown models (deployment aliases, self-hosted): respect user's
142767
+ * configured value entirely (backend may support larger limits)
142768
+ * 2. If user didn't configure max_tokens:
142769
+ * - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT)
142770
+ * - This provides a conservative default (32K) that avoids truncating output
142771
+ * while preserving input quota (not occupying too much context window)
142772
+ * 3. If model has no specific limit (tokenLimit returns default):
142773
+ * - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard
142774
+ *
142775
+ * Examples:
142776
+ * - User sets 4K, known model limit 64K → uses 4K (respects user preference)
142777
+ * - User sets 100K, known model limit 64K → uses 64K (capped to avoid API error)
142778
+ * - User sets 100K, unknown model → uses 100K (respects user, backend may support it)
142779
+ * - User not set, model limit 64K → uses 32K (conservative default)
142780
+ * - User not set, model limit 8K → uses 8K (model limit is lower)
142781
+ *
142782
+ * @param request - The chat completion request parameters
142783
+ * @returns The request with max_tokens adjusted according to the logic
142784
+ */
142785
+ applyOutputTokenLimit(request4) {
142786
+ const userMaxTokens = request4.max_tokens;
142787
+ const modelLimit = tokenLimit(request4.model, "output");
142788
+ const isKnownModel = hasExplicitOutputLimit(request4.model);
142789
+ let effectiveMaxTokens;
142790
+ if (userMaxTokens !== void 0 && userMaxTokens !== null) {
142791
+ if (isKnownModel) {
142792
+ effectiveMaxTokens = Math.min(userMaxTokens, modelLimit);
142793
+ } else {
142794
+ effectiveMaxTokens = userMaxTokens;
142795
+ }
142796
+ } else {
142797
+ effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
142798
+ }
142799
+ return {
142800
+ ...request4,
142801
+ max_tokens: effectiveMaxTokens
142802
+ };
142803
+ }
142759
142804
  };
142760
142805
  }
142761
142806
  });
@@ -142802,16 +142847,13 @@ var init_dashscope = __esm({
142802
142847
  init_contentGenerator();
142803
142848
  init_constants2();
142804
142849
  init_runtimeFetchOptions();
142805
- init_tokenLimits();
142806
- DashScopeOpenAICompatibleProvider = class _DashScopeOpenAICompatibleProvider {
142850
+ init_default();
142851
+ DashScopeOpenAICompatibleProvider = class _DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatibleProvider {
142807
142852
  static {
142808
142853
  __name(this, "DashScopeOpenAICompatibleProvider");
142809
142854
  }
142810
- contentGeneratorConfig;
142811
- cliConfig;
142812
142855
  constructor(contentGeneratorConfig, cliConfig) {
142813
- this.cliConfig = cliConfig;
142814
- this.contentGeneratorConfig = contentGeneratorConfig;
142856
+ super(contentGeneratorConfig, cliConfig);
142815
142857
  }
142816
142858
  static isDashScopeProvider(contentGeneratorConfig) {
142817
142859
  const { authType, baseUrl } = contentGeneratorConfig;
@@ -143003,29 +143045,6 @@ var init_dashscope = __esm({
143003
143045
  }
143004
143046
  return false;
143005
143047
  }
143006
- /**
143007
- * Apply output token limit to a request's max_tokens parameter.
143008
- *
143009
- * Ensures that existing max_tokens parameters don't exceed the model's maximum output
143010
- * token limit. Only modifies max_tokens when already present in the request.
143011
- *
143012
- * @param request - The chat completion request parameters
143013
- * @returns The request with max_tokens adjusted to respect the model's limits (if present)
143014
- */
143015
- applyOutputTokenLimit(request4) {
143016
- const currentMaxTokens = request4.max_tokens;
143017
- if (currentMaxTokens === void 0 || currentMaxTokens === null) {
143018
- return request4;
143019
- }
143020
- const modelLimit = tokenLimit(request4.model, "output");
143021
- if (currentMaxTokens > modelLimit) {
143022
- return {
143023
- ...request4,
143024
- max_tokens: modelLimit
143025
- };
143026
- }
143027
- return request4;
143028
- }
143029
143048
  /**
143030
143049
  * Check if cache control should be disabled based on configuration.
143031
143050
  *
@@ -157264,6 +157283,7 @@ var init_anthropicContentGenerator = __esm({
157264
157283
  init_runtimeFetchOptions();
157265
157284
  init_constants2();
157266
157285
  init_debugLogger();
157286
+ init_tokenLimits();
157267
157287
  debugLogger14 = createDebugLogger("ANTHROPIC");
157268
157288
  AnthropicContentGenerator = class {
157269
157289
  static {
@@ -157373,7 +157393,11 @@ var init_anthropicContentGenerator = __esm({
157373
157393
  const requestValue = requestKey ? requestConfig[requestKey] : void 0;
157374
157394
  return configValue !== void 0 ? configValue : requestValue;
157375
157395
  }, "getParam");
157376
- const maxTokens = getParam("max_tokens", "maxOutputTokens") ?? 1e4;
157396
+ const userMaxTokens = getParam("max_tokens", "maxOutputTokens");
157397
+ const modelId = this.contentGeneratorConfig.model;
157398
+ const modelLimit = tokenLimit(modelId, "output");
157399
+ const isKnownModel = hasExplicitOutputLimit(modelId);
157400
+ const maxTokens = userMaxTokens !== void 0 && userMaxTokens !== null ? isKnownModel ? Math.min(userMaxTokens, modelLimit) : userMaxTokens : Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
157377
157401
  return {
157378
157402
  max_tokens: maxTokens,
157379
157403
  temperature: getParam("temperature", "temperature") ?? 1,
@@ -157778,7 +157802,7 @@ __export(geminiContentGenerator_exports, {
157778
157802
  createGeminiContentGenerator: () => createGeminiContentGenerator
157779
157803
  });
157780
157804
  function createGeminiContentGenerator(config2, gcConfig) {
157781
- const version2 = "0.12.5";
157805
+ const version2 = "0.12.6";
157782
157806
  const userAgent2 = config2.userAgent || `QwenCode/${version2} (${process.platform}; ${process.arch})`;
157783
157807
  const baseHeaders = {
157784
157808
  "User-Agent": userAgent2
@@ -390052,7 +390076,7 @@ __name(getPackageJson, "getPackageJson");
390052
390076
  // packages/cli/src/utils/version.ts
390053
390077
  async function getCliVersion() {
390054
390078
  const pkgJson = await getPackageJson();
390055
- return "0.12.5";
390079
+ return "0.12.6";
390056
390080
  }
390057
390081
  __name(getCliVersion, "getCliVersion");
390058
390082
 
@@ -397613,7 +397637,7 @@ var formatDuration = /* @__PURE__ */ __name((milliseconds) => {
397613
397637
 
397614
397638
  // packages/cli/src/generated/git-commit.ts
397615
397639
  init_esbuild_shims();
397616
- var GIT_COMMIT_INFO = "dbfa5b3e8";
397640
+ var GIT_COMMIT_INFO = "ac30c98a2";
397617
397641
 
397618
397642
  // packages/cli/src/utils/systemInfo.ts
397619
397643
  async function getNpmVersion() {
@@ -456290,7 +456314,7 @@ var QwenAgent = class {
456290
456314
  async initialize(args) {
456291
456315
  this.clientCapabilities = args.clientCapabilities;
456292
456316
  const authMethods = buildAuthMethods();
456293
- const version2 = "0.12.5";
456317
+ const version2 = "0.12.6";
456294
456318
  return {
456295
456319
  protocolVersion: PROTOCOL_VERSION,
456296
456320
  agentInfo: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@qwen-code/qwen-code",
3
- "version": "0.12.5",
3
+ "version": "0.12.6",
4
4
  "description": "Qwen Code - AI-powered coding assistant",
5
5
  "repository": {
6
6
  "type": "git",
@@ -20,7 +20,7 @@
20
20
  "locales"
21
21
  ],
22
22
  "config": {
23
- "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.5"
23
+ "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.6"
24
24
  },
25
25
  "dependencies": {},
26
26
  "optionalDependencies": {