@qwen-code/qwen-code 0.12.5 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +73 -49
- package/package.json +2 -2
package/cli.js
CHANGED
|
@@ -133792,6 +133792,10 @@ function normalize2(model) {
|
|
|
133792
133792
|
s5 = s5.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, "");
|
|
133793
133793
|
return s5;
|
|
133794
133794
|
}
|
|
133795
|
+
function hasExplicitOutputLimit(model) {
|
|
133796
|
+
const norm = normalize2(model);
|
|
133797
|
+
return OUTPUT_PATTERNS.some(([regex3]) => regex3.test(norm));
|
|
133798
|
+
}
|
|
133795
133799
|
function tokenLimit(model, type = "input") {
|
|
133796
133800
|
const norm = normalize2(model);
|
|
133797
133801
|
const patterns = type === "output" ? OUTPUT_PATTERNS : PATTERNS;
|
|
@@ -133808,7 +133812,7 @@ var init_tokenLimits = __esm({
|
|
|
133808
133812
|
"use strict";
|
|
133809
133813
|
init_esbuild_shims();
|
|
133810
133814
|
DEFAULT_TOKEN_LIMIT = 131072;
|
|
133811
|
-
DEFAULT_OUTPUT_TOKEN_LIMIT =
|
|
133815
|
+
DEFAULT_OUTPUT_TOKEN_LIMIT = 32e3;
|
|
133812
133816
|
LIMITS = {
|
|
133813
133817
|
"32k": 32768,
|
|
133814
133818
|
"64k": 65536,
|
|
@@ -133932,6 +133936,7 @@ var init_tokenLimits = __esm({
|
|
|
133932
133936
|
// Kimi
|
|
133933
133937
|
[/^kimi-k2\.5/, LIMITS["32k"]]
|
|
133934
133938
|
];
|
|
133939
|
+
__name(hasExplicitOutputLimit, "hasExplicitOutputLimit");
|
|
133935
133940
|
__name(tokenLimit, "tokenLimit");
|
|
133936
133941
|
}
|
|
133937
133942
|
});
|
|
@@ -134801,18 +134806,6 @@ var init_modelsConfig = __esm({
|
|
|
134801
134806
|
detail: "auto-detected from model"
|
|
134802
134807
|
};
|
|
134803
134808
|
}
|
|
134804
|
-
if (!this._generationConfig.samplingParams?.max_tokens) {
|
|
134805
|
-
const outputLimit = tokenLimit(model.id, "output");
|
|
134806
|
-
if (!this._generationConfig.samplingParams) {
|
|
134807
|
-
this._generationConfig.samplingParams = {};
|
|
134808
|
-
}
|
|
134809
|
-
this._generationConfig.samplingParams.max_tokens = outputLimit;
|
|
134810
|
-
const existingSource = this.generationConfigSources["samplingParams"];
|
|
134811
|
-
this.generationConfigSources["samplingParams"] = {
|
|
134812
|
-
kind: "computed",
|
|
134813
|
-
detail: existingSource ? `max_tokens auto-detected from model (other params from ${existingSource.kind})` : "max_tokens auto-detected from model"
|
|
134814
|
-
};
|
|
134815
|
-
}
|
|
134816
134809
|
if (gc.modalities === void 0) {
|
|
134817
134810
|
this._generationConfig.modalities = defaultModalities(model.id);
|
|
134818
134811
|
this.generationConfigSources["modalities"] = {
|
|
@@ -142713,6 +142706,7 @@ var init_default = __esm({
|
|
|
142713
142706
|
init_openai();
|
|
142714
142707
|
init_constants2();
|
|
142715
142708
|
init_runtimeFetchOptions();
|
|
142709
|
+
init_tokenLimits();
|
|
142716
142710
|
DefaultOpenAICompatibleProvider = class {
|
|
142717
142711
|
static {
|
|
142718
142712
|
__name(this, "DefaultOpenAICompatibleProvider");
|
|
@@ -142747,15 +142741,66 @@ var init_default = __esm({
|
|
|
142747
142741
|
}
|
|
142748
142742
|
buildRequest(request4, _userPromptId) {
|
|
142749
142743
|
const extraBody = this.contentGeneratorConfig.extra_body;
|
|
142744
|
+
const requestWithTokenLimits = this.applyOutputTokenLimit(request4);
|
|
142750
142745
|
return {
|
|
142751
|
-
...
|
|
142752
|
-
// Preserve all original parameters including sampling params
|
|
142746
|
+
...requestWithTokenLimits,
|
|
142753
142747
|
...extraBody ? extraBody : {}
|
|
142754
142748
|
};
|
|
142755
142749
|
}
|
|
142756
142750
|
getDefaultGenerationConfig() {
|
|
142757
142751
|
return {};
|
|
142758
142752
|
}
|
|
142753
|
+
/**
|
|
142754
|
+
* Apply output token limit to a request's max_tokens parameter.
|
|
142755
|
+
*
|
|
142756
|
+
* Purpose:
|
|
142757
|
+
* Some APIs (e.g., OpenAI-compatible) default to a very small max_tokens value,
|
|
142758
|
+
* which can cause responses to be truncated mid-output. This function ensures
|
|
142759
|
+
* a reasonable default is set while respecting user configuration.
|
|
142760
|
+
*
|
|
142761
|
+
* Logic:
|
|
142762
|
+
* 1. If user explicitly configured max_tokens:
|
|
142763
|
+
* - For known models (in OUTPUT_PATTERNS): use the user's value, but cap at
|
|
142764
|
+
* model's max output limit to avoid API errors
|
|
142765
|
+
* (input + max_output > contextWindowSize would cause 400 errors on some APIs)
|
|
142766
|
+
* - For unknown models (deployment aliases, self-hosted): respect user's
|
|
142767
|
+
* configured value entirely (backend may support larger limits)
|
|
142768
|
+
* 2. If user didn't configure max_tokens:
|
|
142769
|
+
* - Use min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT)
|
|
142770
|
+
* - This provides a conservative default (32K) that avoids truncating output
|
|
142771
|
+
* while preserving input quota (not occupying too much context window)
|
|
142772
|
+
* 3. If model has no specific limit (tokenLimit returns default):
|
|
142773
|
+
* - Still apply DEFAULT_OUTPUT_TOKEN_LIMIT as safeguard
|
|
142774
|
+
*
|
|
142775
|
+
* Examples:
|
|
142776
|
+
* - User sets 4K, known model limit 64K → uses 4K (respects user preference)
|
|
142777
|
+
* - User sets 100K, known model limit 64K → uses 64K (capped to avoid API error)
|
|
142778
|
+
* - User sets 100K, unknown model → uses 100K (respects user, backend may support it)
|
|
142779
|
+
* - User not set, model limit 64K → uses 32K (conservative default)
|
|
142780
|
+
* - User not set, model limit 8K → uses 8K (model limit is lower)
|
|
142781
|
+
*
|
|
142782
|
+
* @param request - The chat completion request parameters
|
|
142783
|
+
* @returns The request with max_tokens adjusted according to the logic
|
|
142784
|
+
*/
|
|
142785
|
+
applyOutputTokenLimit(request4) {
|
|
142786
|
+
const userMaxTokens = request4.max_tokens;
|
|
142787
|
+
const modelLimit = tokenLimit(request4.model, "output");
|
|
142788
|
+
const isKnownModel = hasExplicitOutputLimit(request4.model);
|
|
142789
|
+
let effectiveMaxTokens;
|
|
142790
|
+
if (userMaxTokens !== void 0 && userMaxTokens !== null) {
|
|
142791
|
+
if (isKnownModel) {
|
|
142792
|
+
effectiveMaxTokens = Math.min(userMaxTokens, modelLimit);
|
|
142793
|
+
} else {
|
|
142794
|
+
effectiveMaxTokens = userMaxTokens;
|
|
142795
|
+
}
|
|
142796
|
+
} else {
|
|
142797
|
+
effectiveMaxTokens = Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
|
|
142798
|
+
}
|
|
142799
|
+
return {
|
|
142800
|
+
...request4,
|
|
142801
|
+
max_tokens: effectiveMaxTokens
|
|
142802
|
+
};
|
|
142803
|
+
}
|
|
142759
142804
|
};
|
|
142760
142805
|
}
|
|
142761
142806
|
});
|
|
@@ -142802,16 +142847,13 @@ var init_dashscope = __esm({
|
|
|
142802
142847
|
init_contentGenerator();
|
|
142803
142848
|
init_constants2();
|
|
142804
142849
|
init_runtimeFetchOptions();
|
|
142805
|
-
|
|
142806
|
-
DashScopeOpenAICompatibleProvider = class _DashScopeOpenAICompatibleProvider {
|
|
142850
|
+
init_default();
|
|
142851
|
+
DashScopeOpenAICompatibleProvider = class _DashScopeOpenAICompatibleProvider extends DefaultOpenAICompatibleProvider {
|
|
142807
142852
|
static {
|
|
142808
142853
|
__name(this, "DashScopeOpenAICompatibleProvider");
|
|
142809
142854
|
}
|
|
142810
|
-
contentGeneratorConfig;
|
|
142811
|
-
cliConfig;
|
|
142812
142855
|
constructor(contentGeneratorConfig, cliConfig) {
|
|
142813
|
-
|
|
142814
|
-
this.contentGeneratorConfig = contentGeneratorConfig;
|
|
142856
|
+
super(contentGeneratorConfig, cliConfig);
|
|
142815
142857
|
}
|
|
142816
142858
|
static isDashScopeProvider(contentGeneratorConfig) {
|
|
142817
142859
|
const { authType, baseUrl } = contentGeneratorConfig;
|
|
@@ -143003,29 +143045,6 @@ var init_dashscope = __esm({
|
|
|
143003
143045
|
}
|
|
143004
143046
|
return false;
|
|
143005
143047
|
}
|
|
143006
|
-
/**
|
|
143007
|
-
* Apply output token limit to a request's max_tokens parameter.
|
|
143008
|
-
*
|
|
143009
|
-
* Ensures that existing max_tokens parameters don't exceed the model's maximum output
|
|
143010
|
-
* token limit. Only modifies max_tokens when already present in the request.
|
|
143011
|
-
*
|
|
143012
|
-
* @param request - The chat completion request parameters
|
|
143013
|
-
* @returns The request with max_tokens adjusted to respect the model's limits (if present)
|
|
143014
|
-
*/
|
|
143015
|
-
applyOutputTokenLimit(request4) {
|
|
143016
|
-
const currentMaxTokens = request4.max_tokens;
|
|
143017
|
-
if (currentMaxTokens === void 0 || currentMaxTokens === null) {
|
|
143018
|
-
return request4;
|
|
143019
|
-
}
|
|
143020
|
-
const modelLimit = tokenLimit(request4.model, "output");
|
|
143021
|
-
if (currentMaxTokens > modelLimit) {
|
|
143022
|
-
return {
|
|
143023
|
-
...request4,
|
|
143024
|
-
max_tokens: modelLimit
|
|
143025
|
-
};
|
|
143026
|
-
}
|
|
143027
|
-
return request4;
|
|
143028
|
-
}
|
|
143029
143048
|
/**
|
|
143030
143049
|
* Check if cache control should be disabled based on configuration.
|
|
143031
143050
|
*
|
|
@@ -157264,6 +157283,7 @@ var init_anthropicContentGenerator = __esm({
|
|
|
157264
157283
|
init_runtimeFetchOptions();
|
|
157265
157284
|
init_constants2();
|
|
157266
157285
|
init_debugLogger();
|
|
157286
|
+
init_tokenLimits();
|
|
157267
157287
|
debugLogger14 = createDebugLogger("ANTHROPIC");
|
|
157268
157288
|
AnthropicContentGenerator = class {
|
|
157269
157289
|
static {
|
|
@@ -157373,7 +157393,11 @@ var init_anthropicContentGenerator = __esm({
|
|
|
157373
157393
|
const requestValue = requestKey ? requestConfig[requestKey] : void 0;
|
|
157374
157394
|
return configValue !== void 0 ? configValue : requestValue;
|
|
157375
157395
|
}, "getParam");
|
|
157376
|
-
const
|
|
157396
|
+
const userMaxTokens = getParam("max_tokens", "maxOutputTokens");
|
|
157397
|
+
const modelId = this.contentGeneratorConfig.model;
|
|
157398
|
+
const modelLimit = tokenLimit(modelId, "output");
|
|
157399
|
+
const isKnownModel = hasExplicitOutputLimit(modelId);
|
|
157400
|
+
const maxTokens = userMaxTokens !== void 0 && userMaxTokens !== null ? isKnownModel ? Math.min(userMaxTokens, modelLimit) : userMaxTokens : Math.min(modelLimit, DEFAULT_OUTPUT_TOKEN_LIMIT);
|
|
157377
157401
|
return {
|
|
157378
157402
|
max_tokens: maxTokens,
|
|
157379
157403
|
temperature: getParam("temperature", "temperature") ?? 1,
|
|
@@ -157778,7 +157802,7 @@ __export(geminiContentGenerator_exports, {
|
|
|
157778
157802
|
createGeminiContentGenerator: () => createGeminiContentGenerator
|
|
157779
157803
|
});
|
|
157780
157804
|
function createGeminiContentGenerator(config2, gcConfig) {
|
|
157781
|
-
const version2 = "0.12.
|
|
157805
|
+
const version2 = "0.12.6";
|
|
157782
157806
|
const userAgent2 = config2.userAgent || `QwenCode/${version2} (${process.platform}; ${process.arch})`;
|
|
157783
157807
|
const baseHeaders = {
|
|
157784
157808
|
"User-Agent": userAgent2
|
|
@@ -390052,7 +390076,7 @@ __name(getPackageJson, "getPackageJson");
|
|
|
390052
390076
|
// packages/cli/src/utils/version.ts
|
|
390053
390077
|
async function getCliVersion() {
|
|
390054
390078
|
const pkgJson = await getPackageJson();
|
|
390055
|
-
return "0.12.
|
|
390079
|
+
return "0.12.6";
|
|
390056
390080
|
}
|
|
390057
390081
|
__name(getCliVersion, "getCliVersion");
|
|
390058
390082
|
|
|
@@ -397613,7 +397637,7 @@ var formatDuration = /* @__PURE__ */ __name((milliseconds) => {
|
|
|
397613
397637
|
|
|
397614
397638
|
// packages/cli/src/generated/git-commit.ts
|
|
397615
397639
|
init_esbuild_shims();
|
|
397616
|
-
var GIT_COMMIT_INFO = "
|
|
397640
|
+
var GIT_COMMIT_INFO = "ac30c98a2";
|
|
397617
397641
|
|
|
397618
397642
|
// packages/cli/src/utils/systemInfo.ts
|
|
397619
397643
|
async function getNpmVersion() {
|
|
@@ -456290,7 +456314,7 @@ var QwenAgent = class {
|
|
|
456290
456314
|
async initialize(args) {
|
|
456291
456315
|
this.clientCapabilities = args.clientCapabilities;
|
|
456292
456316
|
const authMethods = buildAuthMethods();
|
|
456293
|
-
const version2 = "0.12.
|
|
456317
|
+
const version2 = "0.12.6";
|
|
456294
456318
|
return {
|
|
456295
456319
|
protocolVersion: PROTOCOL_VERSION,
|
|
456296
456320
|
agentInfo: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@qwen-code/qwen-code",
|
|
3
|
-
"version": "0.12.
|
|
3
|
+
"version": "0.12.6",
|
|
4
4
|
"description": "Qwen Code - AI-powered coding assistant",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"locales"
|
|
21
21
|
],
|
|
22
22
|
"config": {
|
|
23
|
-
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.
|
|
23
|
+
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.12.6"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {},
|
|
26
26
|
"optionalDependencies": {
|