npm - @shenghuabi/llama - Versions diffs - 1.0.0 - Mend

@shenghuabi/llama 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/define/common.define.d.ts +534 -0
package/define/example-specific.define.d.ts +412 -0
package/define/index.d.ts +2378 -0
package/define/index.js +2403 -0
package/define/index.js.map +7 -0
package/define/index.mjs +2361 -0
package/define/index.mjs.map +7 -0
package/define/llama-server.define.d.ts +2347 -0
package/define/llama-swap.define.d.ts +12 -0
package/define/sampling.define.d.ts +218 -0
package/download/auto-check-vendor.d.ts +4 -0
package/download/get-gguf-file.d.ts +9 -0
package/download/get-model-manifest.d.ts +18 -0
package/download/get-ollama-gguf-file.d.ts +7 -0
package/index.d.ts +3 -0
package/index.js +3077 -0
package/index.js.map +7 -0
package/index.mjs +3035 -0
package/index.mjs.map +7 -0
package/llama-swap.service.d.ts +1029 -0
package/ollama-model/manifest.type.d.ts +11 -0
package/ollama-model/ollama-manifests.d.ts +4 -0
package/package.json +43 -0
package/token.d.ts +10 -0
package/util/file-name-by-version.d.ts +2 -0

package/define/llama-server.define.d.ts ADDED Viewed

@@ -0,0 +1,2347 @@
+import * as v from 'valibot';
+export declare const LlamaServerDefine: v.ObjectSchema<{
+    readonly common: v.OptionalSchema<v.ObjectSchema<{
+        readonly 'verbose-prompt': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 在生成前打印详细提示（默认：false）\n- print a verbose prompt before generation (default: false)">]>;
+        readonly threads: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 生成期间使用的线程数（默认：-1）\n- number of threads to use during generation (default: -1)  (env: LLAMA_ARG_THREADS)">]>;
+        readonly 'threads-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 批量和提示处理期间使用的线程数（默认：与--threads相同）\n- number of threads to use during batch and prompt processing (default: same as --threads)">]>;
+        readonly 'cpu-mask': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "M">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- CPU亲和力掩码：任意长度的十六进制数。补充cpu-range（默认：\"\"）\n- CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")">]>;
+        readonly 'cpu-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "lo-hi">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- CPU范围用于亲和力。补充--cpu-mask\n- range of CPUs for affinity. Complements --cpu-mask">]>;
+        readonly 'cpu-strict': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["0" | "1"] | undefined;
+        } | undefined, "- 使用严格CPU放置（默认：0）\n- use strict CPU placement (default: 0)">]>;
+        readonly prio: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<[0, 1, 2, 3], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [0 | 2 | 1 | 3] | undefined;
+        } | undefined, "- 设置进程/线程优先级：低(-1)、正常(0)、中等(1)、高(2)、实时(3)（默认值：0）\n- set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: 0)">]>;
+        readonly poll: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<0...100>">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 使用轮询级别等待工作 (0 - 无轮询，默认: 50)\n- use polling level to wait for work (0 - no polling, default: 50)">]>;
+        readonly 'cpu-mask-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "M">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- CPU亲和力掩码：任意长度的十六进制数。补充cpu-range-batch（默认：与--cpu-mask相同）\n- CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)">]>;
+        readonly 'cpu-range-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "lo-hi">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- CPU亲和力的范围。补充--cpu-mask-batch\n- ranges of CPUs for affinity. Complements --cpu-mask-batch">]>;
+        readonly 'cpu-strict-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["0" | "1"] | undefined;
+        } | undefined, "- 使用严格的CPU放置（默认：与--cpu-strict相同）\n- use strict CPU placement (default: same as --cpu-strict)">]>;
+        readonly 'prio-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<[0, 1, 2, 3], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [0 | 2 | 1 | 3] | undefined;
+        } | undefined, "- 设置进程/线程优先级 : 0-正常, 1-中等, 2-高, 3-实时 (默认: 0)\n- set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: 0)">]>;
+        readonly 'poll-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["0" | "1"] | undefined;
+        } | undefined, "- 使用轮询等待工作（默认：与--poll相同）\n- use polling to wait for work (default: same as --poll)">]>;
+        readonly 'ctx-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 提示上下文的大小（默认：4096，0 = 从模型加载）\n- size of the prompt context (default: 4096, 0 = loaded from model)  (env: LLAMA_ARG_CTX_SIZE)">]>;
+        readonly predict: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 预测的token数量（默认：-1，-1 = 无限）\n- number of tokens to predict (default: -1, -1 = infinity)  (env: LLAMA_ARG_N_PREDICT)">]>;
+        readonly 'batch-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 逻辑最大批处理大小（默认：2048）\n- logical maximum batch size (default: 2048)  (env: LLAMA_ARG_BATCH)">]>;
+        readonly 'ubatch-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 物理最大批处理大小（默认：512）\n- physical maximum batch size (default: 512)  (env: LLAMA_ARG_UBATCH)">]>;
+        readonly keep: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 保留初始提示中的令牌数量（默认：0，-1 = 所有）\n- number of tokens to keep from the initial prompt (default: 0, -1 = all)">]>;
+        readonly 'swa-full': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用全尺寸SWA缓存（默认：false） [更多信息](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)\n- use full-size SWA cache (default: false)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)  (env: LLAMA_ARG_SWA_FULL)">]>;
+        readonly 'kv-unified': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 为所有序列的 KV 缓存使用一个统一的 KV 缓冲区（默认值为 `false`）：[更多信息](https://github.com/ggml-org/llama.cpp/pull/14363)（环境变量：`LLAMA_ARG_KV_SPLIT`）\n- use single unified KV buffer for the KV cache of all sequences (default: false)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)  (env: LLAMA_ARG_KV_SPLIT)">]>;
+        readonly 'flash-attn': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["on", "off", "auto"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["on" | "off" | "auto"] | undefined;
+        } | undefined, "- 启用 Flash 注意力（默认：禁用）\n- set Flash Attention use ('on', 'off', or 'auto', default: 'auto')  (env: LLAMA_ARG_FLASH_ATTN)">]>;
+        readonly 'no-perf': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用内部libllama性能计时（默认：false）\n- disable internal libllama performance timings (default: false)  (env: LLAMA_ARG_NO_PERF)">]>;
+        readonly escape: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 处理转义序列（\\n, \\r, \\t, \\\", \\\\)（默认：true）\n- process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: true)">]>;
+        readonly 'no-escape': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 不要处理转义序列\n- do not process escape sequences">]>;
+        readonly 'rope-scaling': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["none", "linear", "yarn"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["none" | "linear" | "yarn"] | undefined;
+        } | undefined, "- RoPE频率缩放方法，除非模型指定，默认为线性\n- RoPE frequency scaling method, defaults to linear unless specified by the model  (env: LLAMA_ARG_ROPE_SCALING_TYPE)">]>;
+        readonly 'rope-scale': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- RoPE上下文缩放因子，将上下文扩展N倍\n- RoPE context scaling factor, expands context by a factor of N  (env: LLAMA_ARG_ROPE_SCALE)">]>;
+        readonly 'rope-freq-base': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- RoPE基频，用于NTK感知缩放（默认：从模型加载）\n- RoPE base frequency, used by NTK-aware scaling (default: loaded from model)  (env: LLAMA_ARG_ROPE_FREQ_BASE)">]>;
+        readonly 'rope-freq-scale': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- RoPE频率缩放因子，通过因子1/N扩展上下文\n- RoPE frequency scaling factor, expands context by a factor of 1/N  (env: LLAMA_ARG_ROPE_FREQ_SCALE)">]>;
+        readonly 'yarn-orig-ctx': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- YaRN: 原始上下文大小（默认：0 = 模型训练上下文大小）\n- YaRN: original context size of model (default: 0 = model training context size)  (env: LLAMA_ARG_YARN_ORIG_CTX)">]>;
+        readonly 'yarn-ext-factor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- YaRN: 插值混合因子（默认：-1.0，0.0 = 完全插值）\n- YaRN: extrapolation mix factor (default: -1.0, 0.0 = full interpolation)  (env: LLAMA_ARG_YARN_EXT_FACTOR)">]>;
+        readonly 'yarn-attn-factor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- YaRN: 缩放 sqrt(t) 或注意力幅度（默认：-1.0）\n- YaRN: scale sqrt(t) or attention magnitude (default: -1.0)  (env: LLAMA_ARG_YARN_ATTN_FACTOR)">]>;
+        readonly 'yarn-beta-slow': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- YaRN: 高校正维度或alpha（默认：-1.0）\n- YaRN: high correction dim or alpha (default: -1.0)  (env: LLAMA_ARG_YARN_BETA_SLOW)">]>;
+        readonly 'yarn-beta-fast': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- YaRN: 低校正维或beta（默认：-1）\n- YaRN: low correction dim or beta (default: -1.0)  (env: LLAMA_ARG_YARN_BETA_FAST)">]>;
+        readonly 'no-kv-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用KV卸载\n- disable KV offload  (env: LLAMA_ARG_NO_KV_OFFLOAD)">]>;
+        readonly 'no-repack': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用重量重新打包功能\n- disable weight repacking  (env: LLAMA_ARG_NO_REPACK)">]>;
+        readonly 'cache-type-k': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "iq4_nl", "q5_0", "q5_1"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
+        } | undefined, "- KV缓存数据类型用于K 允许的值: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1 （默认: f16）\n- KV cache data type for K  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_K)">]>;
+        readonly 'cache-type-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "iq4_nl", "q5_0", "q5_1"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
+        } | undefined, "- KV缓存数据类型用于V，允许的值：f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1（默认：f16）\n- KV cache data type for V  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_V)">]>;
+        readonly 'defrag-thold': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- KV缓存碎片化阈值（默认：0.1，<0-禁用）\n- KV cache defragmentation threshold (DEPRECATED)  (env: LLAMA_ARG_DEFRAG_THOLD)">]>;
+        readonly parallel: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 并行解码序列的数量（默认：1）\n- number of parallel sequences to decode (default: 1)  (env: LLAMA_ARG_N_PARALLEL)">]>;
+        readonly mlock: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 强制系统将模型保留在RAM中而不是交换或压缩\n- force system to keep model in RAM rather than swapping or compressing  (env: LLAMA_ARG_MLOCK)">]>;
+        readonly 'no-mmap': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 不要内存映射模型（加载速度较慢，但可能减少页面丢失，如果未使用mlock）\n- do not memory-map model (slower load but may reduce pageouts if not using mlock)  (env: LLAMA_ARG_NO_MMAP)">]>;
+        readonly numa: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.PicklistSchema<["distribute", "isolate", "numactl"], undefined>, v.MetadataAction<"distribute" | "isolate" | "numactl", {
+                readonly enumOptions: readonly [{
+                    readonly label: "distribute";
+                    readonly value: "distribute";
+                    readonly description: "将执行均匀分布在所有节点上";
+                }, {
+                    readonly label: "isolate";
+                    readonly value: "isolate";
+                    readonly description: "仅在执行开始的节点的CPU上生成线程";
+                }, {
+                    readonly label: "numactl";
+                    readonly value: "numactl";
+                    readonly description: "使用numactl提供的CPU映射";
+                }];
+            }>]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["distribute" | "isolate" | "numactl"] | undefined;
+        } | undefined, "- 尝试在某些NUMA系统上进行优化 - distribute：将执行均匀分布在所有节点上 - isolate：仅在执行开始的节点的CPU上生成线程 - numactl：使用numactl提供的CPU映射 如果之前未运行过此操作，请在使用此功能前删除系统页面缓存 详见 https://github.com/ggml-org/llama.cpp/issues/1437\n- attempt optimizations that help on some NUMA systems  - distribute: spread execution evenly over all nodes  - isolate: only spawn threads on CPUs on the node that execution started on  - numactl: use the CPU map provided by numactl  if run without this previously, it is recommended to drop the system page cache before using this  see https://github.com/ggml-org/llama.cpp/issues/1437  (env: LLAMA_ARG_NUMA)">]>;
+        readonly device: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<dev1,dev2,..>">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 逗号分隔的设备列表，用于卸载（none = 不卸载）使用 --list-devices 查看可用设备列表\n- comma-separated list of devices to use for offloading (none = don't offload)  use --list-devices to see a list of available devices  (env: LLAMA_ARG_DEVICE)">]>;
+        readonly 'override-tensor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<tensor name pattern>=<buffer type>,...">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 将所有专家组合（Mixture of Experts, MoE）的权重数据存储在 CPU 中。\n- override tensor buffer type">]>;
+        readonly 'cpu-moe': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 将前 N 层的“专家混合”（Expert Mixing, MoE）权重数据保留在 CPU 中。\n- keep all Mixture of Experts (MoE) weights in the CPU  (env: LLAMA_ARG_CPU_MOE)">]>;
+        readonly 'n-cpu-moe': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 覆盖张量缓冲区类型\n- keep the Mixture of Experts (MoE) weights of the first N layers in the CPU  (env: LLAMA_ARG_N_CPU_MOE)">]>;
+        readonly 'gpu-layers': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 存储在VRAM中的图层数量\n- max. number of layers to store in VRAM (default: -1)  (env: LLAMA_ARG_N_GPU_LAYERS)">]>;
+        readonly 'split-mode': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.PicklistSchema<["none", "layer", "row"], undefined>, v.MetadataAction<"none" | "layer" | "row", {
+                readonly enumOptions: readonly [{
+                    readonly label: "none";
+                    readonly value: "none";
+                    readonly description: "仅使用一个GPU";
+                }, {
+                    readonly label: "layer";
+                    readonly value: "layer";
+                    readonly description: "分割层和KV跨GPU";
+                }, {
+                    readonly label: "row";
+                    readonly value: "row";
+                    readonly description: "分割行跨GPU";
+                }];
+            }>]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["none" | "layer" | "row"] | undefined;
+        } | undefined, "- 如何将模型分布在多个GPU上，可选：- none: 仅使用一个GPU - layer (默认): 分割层和KV跨GPU - row: 分割行跨GPU\n- how to split the model across multiple GPUs, one of:  - none: use one GPU only  - layer (default): split layers and KV across GPUs  - row: split rows across GPUs  (env: LLAMA_ARG_SPLIT_MODE)">]>;
+        readonly 'tensor-split': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "N0,N1,N2,...">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 将模型分片到每个GPU的分数，逗号分隔的比例列表，例如3,1\n- fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1  (env: LLAMA_ARG_TENSOR_SPLIT)">]>;
+        readonly 'main-gpu': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "INDEX">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 用于模型（split-mode = none）或中间结果和KV（split-mode = row）的GPU（默认：0）\n- the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: 0)  (env: LLAMA_ARG_MAIN_GPU)">]>;
+        readonly 'check-tensors': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 检查模型张量数据中的无效值（默认：false）\n- check model tensor data for invalid values (default: false)">]>;
+        readonly 'override-kv': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "KEY=TYPE:VALUE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 用于通过键覆盖模型元数据的高级选项。可以多次指定。类型：int、float、bool、str。示例：--override-kv tokenizer.ggml.add_bos_token=bool:false\n- advanced option to override model metadata by key. may be specified multiple times.  types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false">]>;
+        readonly 'no-op-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用将主机张量操作卸载到设备（默认：false）\n- disable offloading host tensor operations to device (default: false)">]>;
+        readonly lora: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- LoRA适配器路径（可以重复使用多个适配器）\n- path to LoRA adapter (can be repeated to use multiple adapters)">]>;
+        readonly 'lora-scaled': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>, v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "SCALE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string, number] | undefined;
+        } | undefined, "- LoRA适配器路径，带用户定义的缩放（可重复使用多个适配器）\n- path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)">]>;
+        readonly 'control-vector': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 添加控制向量  请注意：此参数可以重复以添加多个控制向量\n- add a control vector  note: this argument can be repeated to add multiple control vectors">]>;
+        readonly 'control-vector-scaled': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>, v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "SCALE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string, number] | undefined;
+        } | undefined, "- 添加一个带用户定义缩放 SCALE 的控制向量  注意: 此参数可以重复以添加多个缩放的控制向量\n- add a control vector with user defined scaling SCALE  note: this argument can be repeated to add multiple scaled control vectors">]>;
+        readonly 'control-vector-layer-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "START">]>, v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "END">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string, string] | undefined;
+        } | undefined, "- 应用控制向量的图层范围，起始和结束值（包含）\n- layer range to apply the control vector(s) to, start and end inclusive">]>;
+        readonly model: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 模型路径 (默认: `models/$filename` 与 `--hf-file` 或 `--model-url` 的文件名，否则 models/7B/ggml-model-f16.gguf)\n- model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set, otherwise models/7B/ggml-model-f16.gguf)  (env: LLAMA_ARG_MODEL)">]>;
+        readonly 'model-url': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "MODEL_URL">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Docker Hub 模型仓库：该仓库是可选的，默认值为 `ai/`；`quant` 参数也是可选的，默认值为 `:latest`。例如：`gemma3`（默认值：未使用）。\n- model download url (default: unused)  (env: LLAMA_ARG_MODEL_URL)">]>;
+        readonly 'docker-repo': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["<repo>/]<model>[:quant"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["<repo>/]<model>[:quant"] | undefined;
+        } | undefined, "- 模型下载地址（默认：未使用）\n- Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.  example: gemma3  (default: unused)  (env: LLAMA_ARG_DOCKER_REPO)">]>;
+        readonly 'hf-repo': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Hugging Face 模型仓库; quant 是可选的，不区分大小写，默认为 Q4_K_M，如果不存在 Q4_K_M 则回退到仓库中的第一个文件。如果可用还会自动下载 mmproj。要禁用，添加 --no-mmproj  示例：unsloth/phi-4-GGUF:q4_k_m  （默认：未使用）\n- Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.  mmproj is also downloaded automatically if available. to disable, add --no-mmproj  example: unsloth/phi-4-GGUF:q4_k_m  (default: unused)  (env: LLAMA_ARG_HF_REPO)">]>;
+        readonly 'hf-repo-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 与--hf-repo相同，但用于草稿模型（默认：未使用）\n- Same as --hf-repo, but for the draft model (default: unused)  (env: LLAMA_ARG_HFD_REPO)">]>;
+        readonly 'hf-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Hugging Face模型文件。如果指定了，将覆盖--hf-repo中的量化（默认：未使用）\n- Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)  (env: LLAMA_ARG_HF_FILE)">]>;
+        readonly 'hf-repo-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Hugging Face vocoder模型仓库（默认：未使用）\n- Hugging Face model repository for the vocoder model (default: unused)  (env: LLAMA_ARG_HF_REPO_V)">]>;
+        readonly 'hf-file-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Hugging Face模型文件用于声码器模型（默认：未使用）\n- Hugging Face model file for the vocoder model (default: unused)  (env: LLAMA_ARG_HF_FILE_V)">]>;
+        readonly 'hf-token': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TOKEN">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- Hugging Face访问令牌（默认：来自HF_TOKEN环境变量的值）\n- Hugging Face access token (default: value from HF_TOKEN environment variable)  (env: HF_TOKEN)">]>;
+        readonly 'log-disable': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 日志禁用\n- Log disable">]>;
+        readonly 'log-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 日志记录到文件\n- Log to file">]>;
+        readonly 'log-colors': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["on", "off", "auto"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["on" | "off" | "auto"] | undefined;
+        } | undefined, "- 启用彩色日志记录\n- Set colored logging ('on', 'off', or 'auto', default: 'auto')  'auto' enables colors when output is to a terminal  (env: LLAMA_LOG_COLORS)">]>;
+        readonly verbose: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 设置 verbosity 级别为无穷大（即记录所有消息，用于调试）\n- Set verbosity level to infinity (i.e. log all messages, useful for debugging)">]>;
+        readonly offline: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 离线模式：强制使用缓存，阻止网络访问\n- Offline mode: forces use of cache, prevents network access  (env: LLAMA_OFFLINE)">]>;
+        readonly verbosity: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 设置日志的详细程度阈值。消息的详细程度越高，将被忽略。\n- Set the verbosity threshold. Messages with a higher verbosity will be ignored.  (env: LLAMA_LOG_VERBOSITY)">]>;
+        readonly 'log-prefix': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用日志消息中的前缀\n- Enable prefix in log messages  (env: LLAMA_LOG_PREFIX)">]>;
+        readonly 'log-timestamps': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用日志消息中的时间戳\n- Enable timestamps in log messages  (env: LLAMA_LOG_TIMESTAMPS)">]>;
+        readonly 'cache-type-k-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TYPE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- KV缓存中用于草稿模型的K的数据类型，允许的值包括：f32、f16、bf16、q8_0、q4_0、q4_1、iq4_nl、q5_0、q5_1（默认值：f16）\n- KV cache data type for K for the draft model  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_K_DRAFT)">]>;
+        readonly 'cache-type-v-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TYPE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- KV缓存中用于草稿模型的V的数据类型，允许的值包括：f32、f16、bf16、q8_0、q4_0、q4_1、iq4_nl、q5_0、q5_1（默认值：f16）\n- KV cache data type for V for the draft model  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_V_DRAFT)">]>;
+    }, undefined>, undefined>;
+    readonly exampleSpecific: v.OptionalSchema<v.ObjectSchema<{
+        readonly 'swa-checkpoints': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 每个时间槽内可创建的 SWA 检查点最大数量（默认值：3）：[更多信息](https://github.com/ggml-org/llama.cpp/pull/15293)\n- max number of SWA checkpoints per slot to create (default: 3)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)  (env: LLAMA_ARG_SWA_CHECKPOINTS)">]>;
+        readonly 'no-context-shift': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用无限文本生成中的上下文转换（默认：禁用）\n- disables context shift on infinite text generation (default: enabled)  (env: LLAMA_ARG_NO_CONTEXT_SHIFT)">]>;
+        readonly 'context-shift': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 该功能允许在无限文本生成过程中实现上下文切换（默认设置为禁用状态）。\n- enables context shift on infinite text generation (default: disabled)  (env: LLAMA_ARG_CONTEXT_SHIFT)">]>;
+        readonly 'reverse-prompt': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PROMPT">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 在 PROMPT 状态下停止程序的执行，然后以交互模式恢复程序的控制权。\n- halt generation at PROMPT, return control in interactive mode">]>;
+        readonly special: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 特殊标记输出启用（默认：false）\n- special tokens output enabled (default: false)">]>;
+        readonly 'no-warmup': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- skip warming up the model with an empty run\n- skip warming up the model with an empty run">]>;
+        readonly 'spm-infill': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用后缀/前缀/中间模式进行填充（代替前缀/后缀/中间）\n- use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: disabled)">]>;
+        readonly pooling: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["none", "mean", "cls", "last", "rank"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["none" | "mean" | "cls" | "last" | "rank"] | undefined;
+        } | undefined, "- 嵌入的池化类型，如果未指定则使用模型默认值\n- pooling type for embeddings, use model default if unspecified  (env: LLAMA_ARG_POOLING)">]>;
+        readonly 'cont-batching': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用连续批处理（又名动态批处理）（默认：启用）\n- enable continuous batching (a.k.a dynamic batching) (default: enabled)  (env: LLAMA_ARG_CONT_BATCHING)">]>;
+        readonly 'no-cont-batching': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- disable continuous batching\n- disable continuous batching  (env: LLAMA_ARG_NO_CONT_BATCHING)">]>;
+        readonly mmproj: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 多模态投影文件路径。参见tools/mtmd/README.md 说明：如果使用-hf参数，可以省略此参数\n- path to a multimodal projector file. see tools/mtmd/README.md  note: if -hf is used, this argument can be omitted  (env: LLAMA_ARG_MMPROJ)">]>;
+        readonly 'mmproj-url': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "URL">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 多模态投影文件的URL。参见tools/mtmd/README.md\n- URL to a multimodal projector file. see tools/mtmd/README.md  (env: LLAMA_ARG_MMPROJ_URL)">]>;
+        readonly 'no-mmproj': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 显式禁用多模态投影器，适用于使用 -hf 时\n- explicitly disable multimodal projector, useful when using -hf  (env: LLAMA_ARG_NO_MMPROJ)">]>;
+        readonly 'no-mmproj-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 不要将多模态投影器卸载到GPU\n- do not offload multimodal projector to GPU  (env: LLAMA_ARG_NO_MMPROJ_OFFLOAD)">]>;
+        readonly 'override-tensor-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<tensor name pattern>=<buffer type>,...">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 为草图模型覆盖张量缓冲区的类型\n- override tensor buffer type for draft model">]>;
+        readonly 'cpu-moe-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 将所有专家组合（Mixture of Experts, MoE）的权重都存储在 CPU 中，以便用于模型的训练过程。\n- keep all Mixture of Experts (MoE) weights in the CPU for the draft model  (env: LLAMA_ARG_CPU_MOE_DRAFT)">]>;
+        readonly 'n-cpu-moe-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 将前 N 层的专家组合（Expert Combination, MoE）权重存储在 CPU 中，以用于初步模型的训练。\n- keep the Mixture of Experts (MoE) weights of the first N layers in the CPU for the draft model  (env: LLAMA_ARG_N_CPU_MOE_DRAFT)">]>;
+        readonly alias: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 设置模型名称的别名（用于REST API）\n- set alias for model name (to be used by REST API)  (env: LLAMA_ARG_ALIAS)">]>;
+        readonly host: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "HOST">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 监听的IP地址，或以.sock结尾的UNIX套接字路径（默认：127.0.0.1）\n- ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: 127.0.0.1)  (env: LLAMA_ARG_HOST)">]>;
+        readonly port: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PORT">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 端口监听 (默认: 8080)\n- port to listen (default: 8080)  (env: LLAMA_ARG_PORT)">]>;
+        readonly path: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PATH">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 路径以供静态文件服务（默认：）\n- path to serve static files from (default: )  (env: LLAMA_ARG_STATIC_PATH)">]>;
+        readonly 'api-prefix': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PREFIX">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 服务器提供服务的路径前缀（不包括末尾的斜杠，默认:）。\n- prefix path the server serves from, without the trailing slash (default: )  (env: LLAMA_ARG_API_PREFIX)">]>;
+        readonly 'no-webui': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用Web UI（默认：启用）\n- Disable the Web UI (default: enabled)  (env: LLAMA_ARG_NO_WEBUI)">]>;
+        readonly embedding: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 限制仅支持嵌入用例; 仅与专用嵌入模型一起使用 (默认: 禁用)\n- restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)  (env: LLAMA_ARG_EMBEDDINGS)">]>;
+        readonly reranking: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用服务器上的重排序端点（默认：禁用）\n- enable reranking endpoint on server (default: disabled)  (env: LLAMA_ARG_RERANKING)">]>;
+        readonly 'api-key': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "KEY">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- API key to use for authentication (default: none)\n- API key to use for authentication (default: none)  (env: LLAMA_API_KEY)">]>;
+        readonly 'api-key-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 包含API密钥的文件路径（默认：无）\n- path to file containing API keys (default: none)">]>;
+        readonly 'ssl-key-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- PEM编码SSL私钥文件的路径\n- path to file a PEM-encoded SSL private key  (env: LLAMA_ARG_SSL_KEY_FILE)">]>;
+        readonly 'ssl-cert-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- PEM编码SSL证书文件路径\n- path to file a PEM-encoded SSL certificate  (env: LLAMA_ARG_SSL_CERT_FILE)">]>;
+        readonly 'chat-template-kwargs': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 为 JSON 模板解析器设置额外的参数\n- sets additional params for the json template parser  (env: LLAMA_CHAT_TEMPLATE_KWARGS)">]>;
+        readonly timeout: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 服务器读/写超时时间（秒），默认：600\n- server read/write timeout in seconds (default: 600)  (env: LLAMA_ARG_TIMEOUT)">]>;
+        readonly 'threads-http': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 用于处理HTTP请求的线程数（默认：-1）\n- number of threads used to process HTTP requests (default: -1)  (env: LLAMA_ARG_THREADS_HTTP)">]>;
+        readonly 'cache-reuse': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 最小块大小以尝试通过KV移动从缓存中重用 (默认: 0) [(card)](https://ggml.ai/f0.png)\n- min chunk size to attempt reusing from the cache via KV shifting (default: 0)  [(card)](https://ggml.ai/f0.png)  (env: LLAMA_ARG_CACHE_REUSE)">]>;
+        readonly metrics: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用 prometheus 兼容的 metrics 端点 (默认: 禁用)\n- enable prometheus compatible metrics endpoint (default: disabled)  (env: LLAMA_ARG_ENDPOINT_METRICS)">]>;
+        readonly props: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用通过POST /props更改全局属性（默认：禁用）\n- enable changing global properties via POST /props (default: disabled)  (env: LLAMA_ARG_ENDPOINT_PROPS)">]>;
+        readonly slots: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 启用插槽监控端点（默认：禁用）\n- enable slots monitoring endpoint (default: enabled)  (env: LLAMA_ARG_ENDPOINT_SLOTS)">]>;
+        readonly 'no-slots': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 禁用插槽监控端点\n- disables slots monitoring endpoint  (env: LLAMA_ARG_NO_ENDPOINT_SLOTS)">]>;
+        readonly 'slot-save-path': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PATH">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 保存slot kv缓存的路径（默认：禁用）\n- path to save slot kv cache (default: disabled)">]>;
+        readonly jinja: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用jinja模板进行聊天（默认：禁用）\n- use jinja template for chat (default: disabled)  (env: LLAMA_ARG_JINJA)">]>;
+        readonly 'reasoning-format': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["deepseek", "none"], undefined>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: ["none" | "deepseek"] | undefined;
+        } | undefined, "- 控制是否允许并/或从响应中提取思维标签，以及以何种格式返回；可选值包括：  \\n- none：将思维内容原样保留在 `message.content` 中  \\n- deepseek：将思维内容放入 `message.reasoning_content`（在流式模式下行为与 none 相同）  \\n（默认值：deepseek）\n- controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:  - none: leaves thoughts unparsed in `message.content`  - deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)  (default: auto)  (env: LLAMA_ARG_THINK)">]>;
+        readonly 'reasoning-budget': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 控制允许的思考量；目前只能选择以下选项之一：-1 表示无限制的思考预算，或 0 表示禁用思考（默认值：-1）\n- controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)  (env: LLAMA_ARG_THINK_BUDGET)">]>;
+        readonly 'chat-template': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "JINJA_TEMPLATE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 设置自定义的Jinja聊天模板（默认：从模型元数据中获取模板）如果指定了后缀/前缀，模板将被禁用 仅接受常用模板（除非在该标志之前设置了--jinja）：内置模板列表：bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-0, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr\n- set custom jinja chat template (default: template taken from model's metadata)  if suffix/prefix are specified, template will be disabled  only commonly used templates are accepted (unless --jinja is set before this flag):  list of built-in templates:  bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr  (env: LLAMA_ARG_CHAT_TEMPLATE)">]>;
+        readonly 'chat-template-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "JINJA_TEMPLATE_FILE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 设置自定义的Jinja聊天模板文件（默认：从模型元数据中获取模板）如果指定了后缀/前缀，模板将被禁用 仅接受常用模板（除非在该标志之前设置了--jinja）：内置模板列表：bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-0, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr\n- set custom jinja chat template file (default: template taken from model's metadata)  if suffix/prefix are specified, template will be disabled  only commonly used templates are accepted (unless --jinja is set before this flag):  list of built-in templates:  bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr  (env: LLAMA_ARG_CHAT_TEMPLATE_FILE)">]>;
+        readonly 'no-prefill-assistant': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 是否在最后一条消息是助手消息时预填充助手的响应（默认：预填充启用）当设置此标志时，如果最后一条消息是助手消息，则将其视为完整消息且不预填充\n- whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)  when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled    (env: LLAMA_ARG_NO_PREFILL_ASSISTANT)">]>;
+        readonly 'slot-prompt-similarity': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SIMILARITY">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 提示必须与槽的提示匹配多少才能使用该槽（默认：0.50，0.0 = 禁用）\n- how much the prompt of a request must match the prompt of a slot in order to use that slot (default: 0.10, 0.0 = disabled)">]>;
+        readonly 'lora-init-without-apply': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 加载LoRA适配器而不应用它们（稍后通过POST /lora-adapters应用）（默认：禁用）\n- load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: disabled)">]>;
+        readonly 'threads-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 在生成过程中使用的线程数量（默认值：与 --threads 的值相同）\n- number of threads to use during generation (default: same as --threads)">]>;
+        readonly 'threads-batch-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 在批量处理和即时处理过程中使用的线程数量（默认值：与 --threads-draft 的值相同）\n- number of threads to use during batch and prompt processing (default: same as --threads-draft)">]>;
+        readonly 'draft-max': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 用于推测解码的草稿令牌数量（默认：16）\n- number of tokens to draft for speculative decoding (default: 16)  (env: LLAMA_ARG_DRAFT_MAX)">]>;
+        readonly 'draft-min': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 最小的草稿标记数，用于推测解码（默认：0）\n- minimum number of draft tokens to use for speculative decoding (default: 0)  (env: LLAMA_ARG_DRAFT_MIN)">]>;
+        readonly 'draft-p-min': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "P">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 最小推测解码概率（贪心）（默认：0.8）\n- minimum speculative decoding probability (greedy) (default: 0.8)  (env: LLAMA_ARG_DRAFT_P_MIN)">]>;
+        readonly 'ctx-size-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 提示上下文的大小用于草稿模型（默认：0，0 = 从模型加载）\n- size of the prompt context for the draft model (default: 0, 0 = loaded from model)  (env: LLAMA_ARG_CTX_SIZE_DRAFT)">]>;
+        readonly 'device-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<dev1,dev2,..>">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 逗号分隔的设备列表，用于卸载草稿模型（none = 不卸载）使用--list-devices查看可用设备列表\n- comma-separated list of devices to use for offloading the draft model (none = don't offload)  use --list-devices to see a list of available devices">]>;
+        readonly 'gpu-layers-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 存储在VRAM中的图层数量用于草稿模型\n- number of layers to store in VRAM for the draft model  (env: LLAMA_ARG_N_GPU_LAYERS_DRAFT)">]>;
+        readonly 'model-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 用于推测解码的草稿模型（默认：未使用）\n- draft model for speculative decoding (default: unused)  (env: LLAMA_ARG_MODEL_DRAFT)">]>;
+        readonly 'spec-replace': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TARGET">]>, v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "DRAFT">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string, string] | undefined;
+        } | undefined, "- 如果草稿模型（Draft Model）与主模型（Main Model）不兼容，那么需要将目标字符串（Target String）转换为草稿格式（Draft Format）。\n- translate the string in TARGET into DRAFT if the draft model and main model are not compatible">]>;
+        readonly 'model-vocoder': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 语音合成模型用于音频生成（默认：未使用）\n- vocoder model for audio generation (default: unused)">]>;
+        readonly 'tts-use-guide-tokens': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用引导标记以提高TTS单词回忆\n- Use guide tokens to improve TTS word recall">]>;
+        readonly 'embd-bge-small-en-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的bge-small-en-v1.5模型（注意：可以从互联网下载权重）\n- use default bge-small-en-v1.5 model (note: can download weights from the internet)">]>;
+        readonly 'embd-e5-small-en-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的e5-small-v2模型（注意：可以从互联网下载权重）\n- use default e5-small-v2 model (note: can download weights from the internet)">]>;
+        readonly 'embd-gte-small-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的gte-small模型（注意：可以从互联网下载权重）\n- use default gte-small model (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-1.5b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的Qwen 2.5 Coder 1.5B\n- use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-3b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的Qwen 2.5 Coder 3B（注意：可以从互联网下载权重）\n- use default Qwen 2.5 Coder 3B (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-7b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的Qwen 2.5 Coder 7B（注意：可以从互联网下载权重）\n- use default Qwen 2.5 Coder 7B (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-7b-spec': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用Qwen 2.5 Coder 7B + 0.5B draft进行推测解码（注意：可以从互联网下载权重）\n- use Qwen 2.5 Coder 7B + 0.5B draft for speculative decoding (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-14b-spec': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用Qwen 2.5 Coder 14B + 0.5B draft进行推测解码（注意：可以从互联网下载权重）\n- use Qwen 2.5 Coder 14B + 0.5B draft for speculative decoding (note: can download weights from the internet)">]>;
+        readonly 'fim-qwen-30b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 使用默认的 Qwen 3 Coder 30B A3B Instruct；请注意：可以从互联网上下载相应的权重数据。\n- use default Qwen 3 Coder 30B A3B Instruct (note: can download weights from the internet)">]>;
+    }, undefined>, undefined>;
+    readonly sampling: v.OptionalSchema<v.ObjectSchema<{
+        readonly samplers: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SAMPLERS">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- samplers that will be used for generation in the order, separated by ';'  (default: penalties;dry;top_n_sigma;top_k;typ_p;top_p;min_p;xtc;temperature)\n- samplers that will be used for generation in the order, separated by ';'  (default: penalties;dry;top_n_sigma;top_k;typ_p;top_p;min_p;xtc;temperature)">]>;
+        readonly seed: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SEED">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- RNG种子（默认：-1，使用随机种子）\n- RNG seed (default: -1, use random seed for -1)">]>;
+        readonly 'sampling-seq': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SEQUENCE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 简化的采样序列，用于采样器（默认：edskypmxt）\n- simplified sequence for samplers that will be used (default: edskypmxt)">]>;
+        readonly 'ignore-eos': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+        } | undefined, "- 忽略流结束标记并继续生成（隐含--logit-bias EOS-inf）\n- ignore end of stream token and continue generating (implies --logit-bias EOS-inf)">]>;
+        readonly temp: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- temperature (default: 0.8)\n- temperature (default: 0.8)">]>;
+        readonly 'top-k': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- top-k 采样 (默认: 40, 0 = 禁用)\n- top-k sampling (default: 40, 0 = disabled)">]>;
+        readonly 'top-p': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- top-p 采样（默认：0.9，1.0 = 禁用）\n- top-p sampling (default: 0.9, 1.0 = disabled)">]>;
+        readonly 'min-p': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- min-p 采样 (默认: 0.1, 0.0 = 禁用)\n- min-p sampling (default: 0.1, 0.0 = disabled)">]>;
+        readonly 'top-nsigma': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- Top-N-Sigma抽样方法（默认值：-1.0；当两个参数都为-1.0时，该方法被禁用）\n- top-n-sigma sampling (default: -1.0, -1.0 = disabled)">]>;
+        readonly 'xtc-probability': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- xtc probability (默认: 0.0, 0.0 = 禁用)\n- xtc probability (default: 0.0, 0.0 = disabled)">]>;
+        readonly 'xtc-threshold': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- xtc threshold (默认: 0.1, 1.0 = 禁用)\n- xtc threshold (default: 0.1, 1.0 = disabled)">]>;
+        readonly typical: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 局部典型抽样，参数 p（默认：1.0，1.0 = 禁用）\n- locally typical sampling, parameter p (default: 1.0, 1.0 = disabled)">]>;
+        readonly 'repeat-last-n': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 用于惩罚的最后n个token（默认：64，0 = 禁用，-1 = 上下文大小）\n- last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size)">]>;
+        readonly 'repeat-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 惩罚重复的token序列（默认：1.0，1.0 = 禁用）\n- penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled)">]>;
+        readonly 'presence-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 重复 alpha 存在惩罚（默认：0.0，0.0 = 禁用）\n- repeat alpha presence penalty (default: 0.0, 0.0 = disabled)">]>;
+        readonly 'frequency-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 重复 alpha 频率惩罚（默认：0.0，0.0 = 禁用）\n- repeat alpha frequency penalty (default: 0.0, 0.0 = disabled)">]>;
+        readonly 'dry-multiplier': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 设置 DRY 采样乘数（默认：0.0，0.0 = 禁用）\n- set DRY sampling multiplier (default: 0.0, 0.0 = disabled)">]>;
+        readonly 'dry-base': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 设置 DRY 采样基础值 (默认: 1.75)\n- set DRY sampling base value (default: 1.75)">]>;
+        readonly 'dry-allowed-length': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 设置DRY采样的允许长度（默认：2）\n- set allowed length for DRY sampling (default: 2)">]>;
+        readonly 'dry-penalty-last-n': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 设置最后n个token的DRY惩罚（默认：-1，0 = 禁用，-1 = 上下文大小）\n- set DRY penalty for the last n tokens (default: -1, 0 = disable, -1 = context size)">]>;
+        readonly 'dry-sequence-breaker': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 添加序列破环器用于DRY抽样，清除默认的破环器（'\\n', ':', '\"', '*'）；使用 \"none\" 不使用任何序列破环器\n- add sequence breaker for DRY sampling, clearing out default breakers ('\\n', ':', '\"', '*') in the process; use \"none\" to not use any sequence breakers">]>;
+        readonly 'dynatemp-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 动态温度范围 (默认: 0.0, 0.0 = 禁用)\n- dynamic temperature range (default: 0.0, 0.0 = disabled)">]>;
+        readonly 'dynatemp-exp': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 动态温度指数（默认：1.0）\n- dynamic temperature exponent (default: 1.0)">]>;
+        readonly mirostat: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- 使用Mirostat抽样。如果使用了Top K、Nucleus和Locally Typical抽样器，则会被忽略。（默认：0，0=禁用，1=Mirostat，2=Mirostat 2.0）\n- use Mirostat sampling.  Top K, Nucleus and Locally Typical samplers are ignored if used.  (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)">]>;
+        readonly 'mirostat-lr': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- Mirostat学习率，参数eta（默认：0.1）\n- Mirostat learning rate, parameter eta (default: 0.1)">]>;
+        readonly 'mirostat-ent': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [number] | undefined;
+        } | undefined, "- Mirostat目标熵，参数tau（默认：5.0）\n- Mirostat target entropy, parameter tau (default: 5.0)">]>;
+        readonly 'logit-bias': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TOKEN_ID(+/-)BIAS">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 修改标记出现在完成中的可能性，例如：--logit-bias 15043+1 增加标记 ' Hello' 的可能性，或 --logit 15043-1 减少标记 ' Hello' 的可能性\n- modifies the likelihood of token appearing in the completion,  i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',  or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'">]>;
+        readonly grammar: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "GRAMMAR">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- BNF类似语法约束生成（参见grammars目录中的示例）（默认：''）\n- BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '')">]>;
+        readonly 'grammar-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 从文件读取语法\n- file to read grammar from">]>;
+        readonly 'json-schema': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SCHEMA">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- JSON schema 用于约束生成（https://json-schema.org/），例如 `{}` 表示任何 JSON 对象。对于包含外部 $refs 的 schema，使用 --grammar + example/json_schema_to_grammar.py 代替\n- JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object  For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead">]>;
+        readonly 'json-schema-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+            readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+        }, undefined>, undefined>, v.DescriptionAction<{
+            enable: boolean;
+            value?: [string] | undefined;
+        } | undefined, "- 包含用于约束生成的JSON架构的文件（https://json-schema.org/），例如{}表示任何JSON对象  对于具有外部$refs的架构，使用--grammar + example/json_schema_to_grammar.py代替\n- File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object  For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead">]>;
+    }, undefined>, undefined>;
+}, undefined>;
+export type LlamaServerType = v.InferOutput<typeof LlamaServerDefine>;
+export declare const ExecDefine: v.ObjectSchema<{
+    readonly version: v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TitleAction<string, "llama.cpp 版本号">, v.DescriptionAction<string, "llama.cpp的版本号,可以从 https://github.com/ggerganov/llama.cpp/releases 获得">]>;
+    readonly device: v.SchemaWithPipe<readonly [v.PicklistSchema<["cpu", "cuda12.4", "hip-radeon", "sycl", "vulkan"], undefined>, v.TitleAction<"cpu" | "cuda12.4" | "hip-radeon" | "sycl" | "vulkan", "llama.cpp 使用设备">]>;
+}, undefined>;
+export declare const LlamaServerSwapItemDefine: v.ObjectSchema<{
+    readonly config: v.OptionalSchema<v.ObjectSchema<{
+        readonly common: v.OptionalSchema<v.ObjectSchema<{
+            readonly 'verbose-prompt': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 在生成前打印详细提示（默认：false）\n- print a verbose prompt before generation (default: false)">]>;
+            readonly threads: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 生成期间使用的线程数（默认：-1）\n- number of threads to use during generation (default: -1)  (env: LLAMA_ARG_THREADS)">]>;
+            readonly 'threads-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 批量和提示处理期间使用的线程数（默认：与--threads相同）\n- number of threads to use during batch and prompt processing (default: same as --threads)">]>;
+            readonly 'cpu-mask': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "M">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- CPU亲和力掩码：任意长度的十六进制数。补充cpu-range（默认：\"\"）\n- CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")">]>;
+            readonly 'cpu-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "lo-hi">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- CPU范围用于亲和力。补充--cpu-mask\n- range of CPUs for affinity. Complements --cpu-mask">]>;
+            readonly 'cpu-strict': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["0" | "1"] | undefined;
+            } | undefined, "- 使用严格CPU放置（默认：0）\n- use strict CPU placement (default: 0)">]>;
+            readonly prio: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<[0, 1, 2, 3], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [0 | 2 | 1 | 3] | undefined;
+            } | undefined, "- 设置进程/线程优先级：低(-1)、正常(0)、中等(1)、高(2)、实时(3)（默认值：0）\n- set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: 0)">]>;
+            readonly poll: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<0...100>">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 使用轮询级别等待工作 (0 - 无轮询，默认: 50)\n- use polling level to wait for work (0 - no polling, default: 50)">]>;
+            readonly 'cpu-mask-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "M">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- CPU亲和力掩码：任意长度的十六进制数。补充cpu-range-batch（默认：与--cpu-mask相同）\n- CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)">]>;
+            readonly 'cpu-range-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "lo-hi">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- CPU亲和力的范围。补充--cpu-mask-batch\n- ranges of CPUs for affinity. Complements --cpu-mask-batch">]>;
+            readonly 'cpu-strict-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["0" | "1"] | undefined;
+            } | undefined, "- 使用严格的CPU放置（默认：与--cpu-strict相同）\n- use strict CPU placement (default: same as --cpu-strict)">]>;
+            readonly 'prio-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<[0, 1, 2, 3], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [0 | 2 | 1 | 3] | undefined;
+            } | undefined, "- 设置进程/线程优先级 : 0-正常, 1-中等, 2-高, 3-实时 (默认: 0)\n- set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: 0)">]>;
+            readonly 'poll-batch': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["0", "1"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["0" | "1"] | undefined;
+            } | undefined, "- 使用轮询等待工作（默认：与--poll相同）\n- use polling to wait for work (default: same as --poll)">]>;
+            readonly 'ctx-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 提示上下文的大小（默认：4096，0 = 从模型加载）\n- size of the prompt context (default: 4096, 0 = loaded from model)  (env: LLAMA_ARG_CTX_SIZE)">]>;
+            readonly predict: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 预测的token数量（默认：-1，-1 = 无限）\n- number of tokens to predict (default: -1, -1 = infinity)  (env: LLAMA_ARG_N_PREDICT)">]>;
+            readonly 'batch-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 逻辑最大批处理大小（默认：2048）\n- logical maximum batch size (default: 2048)  (env: LLAMA_ARG_BATCH)">]>;
+            readonly 'ubatch-size': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 物理最大批处理大小（默认：512）\n- physical maximum batch size (default: 512)  (env: LLAMA_ARG_UBATCH)">]>;
+            readonly keep: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 保留初始提示中的令牌数量（默认：0，-1 = 所有）\n- number of tokens to keep from the initial prompt (default: 0, -1 = all)">]>;
+            readonly 'swa-full': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用全尺寸SWA缓存（默认：false） [更多信息](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)\n- use full-size SWA cache (default: false)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)  (env: LLAMA_ARG_SWA_FULL)">]>;
+            readonly 'kv-unified': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 为所有序列的 KV 缓存使用一个统一的 KV 缓冲区（默认值为 `false`）：[更多信息](https://github.com/ggml-org/llama.cpp/pull/14363)（环境变量：`LLAMA_ARG_KV_SPLIT`）\n- use single unified KV buffer for the KV cache of all sequences (default: false)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)  (env: LLAMA_ARG_KV_SPLIT)">]>;
+            readonly 'flash-attn': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["on", "off", "auto"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["on" | "off" | "auto"] | undefined;
+            } | undefined, "- 启用 Flash 注意力（默认：禁用）\n- set Flash Attention use ('on', 'off', or 'auto', default: 'auto')  (env: LLAMA_ARG_FLASH_ATTN)">]>;
+            readonly 'no-perf': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用内部libllama性能计时（默认：false）\n- disable internal libllama performance timings (default: false)  (env: LLAMA_ARG_NO_PERF)">]>;
+            readonly escape: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 处理转义序列（\\n, \\r, \\t, \\\", \\\\)（默认：true）\n- process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: true)">]>;
+            readonly 'no-escape': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 不要处理转义序列\n- do not process escape sequences">]>;
+            readonly 'rope-scaling': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["none", "linear", "yarn"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["none" | "linear" | "yarn"] | undefined;
+            } | undefined, "- RoPE频率缩放方法，除非模型指定，默认为线性\n- RoPE frequency scaling method, defaults to linear unless specified by the model  (env: LLAMA_ARG_ROPE_SCALING_TYPE)">]>;
+            readonly 'rope-scale': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- RoPE上下文缩放因子，将上下文扩展N倍\n- RoPE context scaling factor, expands context by a factor of N  (env: LLAMA_ARG_ROPE_SCALE)">]>;
+            readonly 'rope-freq-base': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- RoPE基频，用于NTK感知缩放（默认：从模型加载）\n- RoPE base frequency, used by NTK-aware scaling (default: loaded from model)  (env: LLAMA_ARG_ROPE_FREQ_BASE)">]>;
+            readonly 'rope-freq-scale': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- RoPE频率缩放因子，通过因子1/N扩展上下文\n- RoPE frequency scaling factor, expands context by a factor of 1/N  (env: LLAMA_ARG_ROPE_FREQ_SCALE)">]>;
+            readonly 'yarn-orig-ctx': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- YaRN: 原始上下文大小（默认：0 = 模型训练上下文大小）\n- YaRN: original context size of model (default: 0 = model training context size)  (env: LLAMA_ARG_YARN_ORIG_CTX)">]>;
+            readonly 'yarn-ext-factor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- YaRN: 插值混合因子（默认：-1.0，0.0 = 完全插值）\n- YaRN: extrapolation mix factor (default: -1.0, 0.0 = full interpolation)  (env: LLAMA_ARG_YARN_EXT_FACTOR)">]>;
+            readonly 'yarn-attn-factor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- YaRN: 缩放 sqrt(t) 或注意力幅度（默认：-1.0）\n- YaRN: scale sqrt(t) or attention magnitude (default: -1.0)  (env: LLAMA_ARG_YARN_ATTN_FACTOR)">]>;
+            readonly 'yarn-beta-slow': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- YaRN: 高校正维度或alpha（默认：-1.0）\n- YaRN: high correction dim or alpha (default: -1.0)  (env: LLAMA_ARG_YARN_BETA_SLOW)">]>;
+            readonly 'yarn-beta-fast': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- YaRN: 低校正维或beta（默认：-1）\n- YaRN: low correction dim or beta (default: -1.0)  (env: LLAMA_ARG_YARN_BETA_FAST)">]>;
+            readonly 'no-kv-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用KV卸载\n- disable KV offload  (env: LLAMA_ARG_NO_KV_OFFLOAD)">]>;
+            readonly 'no-repack': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用重量重新打包功能\n- disable weight repacking  (env: LLAMA_ARG_NO_REPACK)">]>;
+            readonly 'cache-type-k': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "iq4_nl", "q5_0", "q5_1"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
+            } | undefined, "- KV缓存数据类型用于K 允许的值: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1 （默认: f16）\n- KV cache data type for K  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_K)">]>;
+            readonly 'cache-type-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "iq4_nl", "q5_0", "q5_1"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
+            } | undefined, "- KV缓存数据类型用于V，允许的值：f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1（默认：f16）\n- KV cache data type for V  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_V)">]>;
+            readonly 'defrag-thold': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- KV缓存碎片化阈值（默认：0.1，<0-禁用）\n- KV cache defragmentation threshold (DEPRECATED)  (env: LLAMA_ARG_DEFRAG_THOLD)">]>;
+            readonly parallel: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 并行解码序列的数量（默认：1）\n- number of parallel sequences to decode (default: 1)  (env: LLAMA_ARG_N_PARALLEL)">]>;
+            readonly mlock: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 强制系统将模型保留在RAM中而不是交换或压缩\n- force system to keep model in RAM rather than swapping or compressing  (env: LLAMA_ARG_MLOCK)">]>;
+            readonly 'no-mmap': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 不要内存映射模型（加载速度较慢，但可能减少页面丢失，如果未使用mlock）\n- do not memory-map model (slower load but may reduce pageouts if not using mlock)  (env: LLAMA_ARG_NO_MMAP)">]>;
+            readonly numa: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.PicklistSchema<["distribute", "isolate", "numactl"], undefined>, v.MetadataAction<"distribute" | "isolate" | "numactl", {
+                    readonly enumOptions: readonly [{
+                        readonly label: "distribute";
+                        readonly value: "distribute";
+                        readonly description: "将执行均匀分布在所有节点上";
+                    }, {
+                        readonly label: "isolate";
+                        readonly value: "isolate";
+                        readonly description: "仅在执行开始的节点的CPU上生成线程";
+                    }, {
+                        readonly label: "numactl";
+                        readonly value: "numactl";
+                        readonly description: "使用numactl提供的CPU映射";
+                    }];
+                }>]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["distribute" | "isolate" | "numactl"] | undefined;
+            } | undefined, "- 尝试在某些NUMA系统上进行优化 - distribute：将执行均匀分布在所有节点上 - isolate：仅在执行开始的节点的CPU上生成线程 - numactl：使用numactl提供的CPU映射 如果之前未运行过此操作，请在使用此功能前删除系统页面缓存 详见 https://github.com/ggml-org/llama.cpp/issues/1437\n- attempt optimizations that help on some NUMA systems  - distribute: spread execution evenly over all nodes  - isolate: only spawn threads on CPUs on the node that execution started on  - numactl: use the CPU map provided by numactl  if run without this previously, it is recommended to drop the system page cache before using this  see https://github.com/ggml-org/llama.cpp/issues/1437  (env: LLAMA_ARG_NUMA)">]>;
+            readonly device: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<dev1,dev2,..>">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 逗号分隔的设备列表，用于卸载（none = 不卸载）使用 --list-devices 查看可用设备列表\n- comma-separated list of devices to use for offloading (none = don't offload)  use --list-devices to see a list of available devices  (env: LLAMA_ARG_DEVICE)">]>;
+            readonly 'override-tensor': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<tensor name pattern>=<buffer type>,...">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 将所有专家组合（Mixture of Experts, MoE）的权重数据存储在 CPU 中。\n- override tensor buffer type">]>;
+            readonly 'cpu-moe': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 将前 N 层的“专家混合”（Expert Mixing, MoE）权重数据保留在 CPU 中。\n- keep all Mixture of Experts (MoE) weights in the CPU  (env: LLAMA_ARG_CPU_MOE)">]>;
+            readonly 'n-cpu-moe': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 覆盖张量缓冲区类型\n- keep the Mixture of Experts (MoE) weights of the first N layers in the CPU  (env: LLAMA_ARG_N_CPU_MOE)">]>;
+            readonly 'gpu-layers': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 存储在VRAM中的图层数量\n- max. number of layers to store in VRAM (default: -1)  (env: LLAMA_ARG_N_GPU_LAYERS)">]>;
+            readonly 'split-mode': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.PicklistSchema<["none", "layer", "row"], undefined>, v.MetadataAction<"none" | "layer" | "row", {
+                    readonly enumOptions: readonly [{
+                        readonly label: "none";
+                        readonly value: "none";
+                        readonly description: "仅使用一个GPU";
+                    }, {
+                        readonly label: "layer";
+                        readonly value: "layer";
+                        readonly description: "分割层和KV跨GPU";
+                    }, {
+                        readonly label: "row";
+                        readonly value: "row";
+                        readonly description: "分割行跨GPU";
+                    }];
+                }>]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["none" | "layer" | "row"] | undefined;
+            } | undefined, "- 如何将模型分布在多个GPU上，可选：- none: 仅使用一个GPU - layer (默认): 分割层和KV跨GPU - row: 分割行跨GPU\n- how to split the model across multiple GPUs, one of:  - none: use one GPU only  - layer (default): split layers and KV across GPUs  - row: split rows across GPUs  (env: LLAMA_ARG_SPLIT_MODE)">]>;
+            readonly 'tensor-split': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "N0,N1,N2,...">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 将模型分片到每个GPU的分数，逗号分隔的比例列表，例如3,1\n- fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1  (env: LLAMA_ARG_TENSOR_SPLIT)">]>;
+            readonly 'main-gpu': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "INDEX">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 用于模型（split-mode = none）或中间结果和KV（split-mode = row）的GPU（默认：0）\n- the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: 0)  (env: LLAMA_ARG_MAIN_GPU)">]>;
+            readonly 'check-tensors': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 检查模型张量数据中的无效值（默认：false）\n- check model tensor data for invalid values (default: false)">]>;
+            readonly 'override-kv': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "KEY=TYPE:VALUE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 用于通过键覆盖模型元数据的高级选项。可以多次指定。类型：int、float、bool、str。示例：--override-kv tokenizer.ggml.add_bos_token=bool:false\n- advanced option to override model metadata by key. may be specified multiple times.  types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false">]>;
+            readonly 'no-op-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用将主机张量操作卸载到设备（默认：false）\n- disable offloading host tensor operations to device (default: false)">]>;
+            readonly lora: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- LoRA适配器路径（可以重复使用多个适配器）\n- path to LoRA adapter (can be repeated to use multiple adapters)">]>;
+            readonly 'lora-scaled': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>, v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "SCALE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string, number] | undefined;
+            } | undefined, "- LoRA适配器路径，带用户定义的缩放（可重复使用多个适配器）\n- path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)">]>;
+            readonly 'control-vector': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 添加控制向量  请注意：此参数可以重复以添加多个控制向量\n- add a control vector  note: this argument can be repeated to add multiple control vectors">]>;
+            readonly 'control-vector-scaled': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>, v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "SCALE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string, number] | undefined;
+            } | undefined, "- 添加一个带用户定义缩放 SCALE 的控制向量  注意: 此参数可以重复以添加多个缩放的控制向量\n- add a control vector with user defined scaling SCALE  note: this argument can be repeated to add multiple scaled control vectors">]>;
+            readonly 'control-vector-layer-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "START">]>, v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "END">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string, string] | undefined;
+            } | undefined, "- 应用控制向量的图层范围，起始和结束值（包含）\n- layer range to apply the control vector(s) to, start and end inclusive">]>;
+            readonly model: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 模型路径 (默认: `models/$filename` 与 `--hf-file` 或 `--model-url` 的文件名，否则 models/7B/ggml-model-f16.gguf)\n- model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set, otherwise models/7B/ggml-model-f16.gguf)  (env: LLAMA_ARG_MODEL)">]>;
+            readonly 'model-url': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "MODEL_URL">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Docker Hub 模型仓库：该仓库是可选的，默认值为 `ai/`；`quant` 参数也是可选的，默认值为 `:latest`。例如：`gemma3`（默认值：未使用）。\n- model download url (default: unused)  (env: LLAMA_ARG_MODEL_URL)">]>;
+            readonly 'docker-repo': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["<repo>/]<model>[:quant"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["<repo>/]<model>[:quant"] | undefined;
+            } | undefined, "- 模型下载地址（默认：未使用）\n- Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.  example: gemma3  (default: unused)  (env: LLAMA_ARG_DOCKER_REPO)">]>;
+            readonly 'hf-repo': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Hugging Face 模型仓库; quant 是可选的，不区分大小写，默认为 Q4_K_M，如果不存在 Q4_K_M 则回退到仓库中的第一个文件。如果可用还会自动下载 mmproj。要禁用，添加 --no-mmproj  示例：unsloth/phi-4-GGUF:q4_k_m  （默认：未使用）\n- Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.  mmproj is also downloaded automatically if available. to disable, add --no-mmproj  example: unsloth/phi-4-GGUF:q4_k_m  (default: unused)  (env: LLAMA_ARG_HF_REPO)">]>;
+            readonly 'hf-repo-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 与--hf-repo相同，但用于草稿模型（默认：未使用）\n- Same as --hf-repo, but for the draft model (default: unused)  (env: LLAMA_ARG_HFD_REPO)">]>;
+            readonly 'hf-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Hugging Face模型文件。如果指定了，将覆盖--hf-repo中的量化（默认：未使用）\n- Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)  (env: LLAMA_ARG_HF_FILE)">]>;
+            readonly 'hf-repo-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<user>/<model>[:quant]">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Hugging Face vocoder模型仓库（默认：未使用）\n- Hugging Face model repository for the vocoder model (default: unused)  (env: LLAMA_ARG_HF_REPO_V)">]>;
+            readonly 'hf-file-v': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Hugging Face模型文件用于声码器模型（默认：未使用）\n- Hugging Face model file for the vocoder model (default: unused)  (env: LLAMA_ARG_HF_FILE_V)">]>;
+            readonly 'hf-token': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TOKEN">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- Hugging Face访问令牌（默认：来自HF_TOKEN环境变量的值）\n- Hugging Face access token (default: value from HF_TOKEN environment variable)  (env: HF_TOKEN)">]>;
+            readonly 'log-disable': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 日志禁用\n- Log disable">]>;
+            readonly 'log-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 日志记录到文件\n- Log to file">]>;
+            readonly 'log-colors': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["on", "off", "auto"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["on" | "off" | "auto"] | undefined;
+            } | undefined, "- 启用彩色日志记录\n- Set colored logging ('on', 'off', or 'auto', default: 'auto')  'auto' enables colors when output is to a terminal  (env: LLAMA_LOG_COLORS)">]>;
+            readonly verbose: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 设置 verbosity 级别为无穷大（即记录所有消息，用于调试）\n- Set verbosity level to infinity (i.e. log all messages, useful for debugging)">]>;
+            readonly offline: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 离线模式：强制使用缓存，阻止网络访问\n- Offline mode: forces use of cache, prevents network access  (env: LLAMA_OFFLINE)">]>;
+            readonly verbosity: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 设置日志的详细程度阈值。消息的详细程度越高，将被忽略。\n- Set the verbosity threshold. Messages with a higher verbosity will be ignored.  (env: LLAMA_LOG_VERBOSITY)">]>;
+            readonly 'log-prefix': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用日志消息中的前缀\n- Enable prefix in log messages  (env: LLAMA_LOG_PREFIX)">]>;
+            readonly 'log-timestamps': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用日志消息中的时间戳\n- Enable timestamps in log messages  (env: LLAMA_LOG_TIMESTAMPS)">]>;
+            readonly 'cache-type-k-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TYPE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- KV缓存中用于草稿模型的K的数据类型，允许的值包括：f32、f16、bf16、q8_0、q4_0、q4_1、iq4_nl、q5_0、q5_1（默认值：f16）\n- KV cache data type for K for the draft model  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_K_DRAFT)">]>;
+            readonly 'cache-type-v-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TYPE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- KV缓存中用于草稿模型的V的数据类型，允许的值包括：f32、f16、bf16、q8_0、q4_0、q4_1、iq4_nl、q5_0、q5_1（默认值：f16）\n- KV cache data type for V for the draft model  allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1  (default: f16)  (env: LLAMA_ARG_CACHE_TYPE_V_DRAFT)">]>;
+        }, undefined>, undefined>;
+        readonly exampleSpecific: v.OptionalSchema<v.ObjectSchema<{
+            readonly 'swa-checkpoints': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 每个时间槽内可创建的 SWA 检查点最大数量（默认值：3）：[更多信息](https://github.com/ggml-org/llama.cpp/pull/15293)\n- max number of SWA checkpoints per slot to create (default: 3)  [(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)  (env: LLAMA_ARG_SWA_CHECKPOINTS)">]>;
+            readonly 'no-context-shift': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用无限文本生成中的上下文转换（默认：禁用）\n- disables context shift on infinite text generation (default: enabled)  (env: LLAMA_ARG_NO_CONTEXT_SHIFT)">]>;
+            readonly 'context-shift': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 该功能允许在无限文本生成过程中实现上下文切换（默认设置为禁用状态）。\n- enables context shift on infinite text generation (default: disabled)  (env: LLAMA_ARG_CONTEXT_SHIFT)">]>;
+            readonly 'reverse-prompt': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PROMPT">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 在 PROMPT 状态下停止程序的执行，然后以交互模式恢复程序的控制权。\n- halt generation at PROMPT, return control in interactive mode">]>;
+            readonly special: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 特殊标记输出启用（默认：false）\n- special tokens output enabled (default: false)">]>;
+            readonly 'no-warmup': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- skip warming up the model with an empty run\n- skip warming up the model with an empty run">]>;
+            readonly 'spm-infill': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用后缀/前缀/中间模式进行填充（代替前缀/后缀/中间）\n- use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: disabled)">]>;
+            readonly pooling: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["none", "mean", "cls", "last", "rank"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["none" | "mean" | "cls" | "last" | "rank"] | undefined;
+            } | undefined, "- 嵌入的池化类型，如果未指定则使用模型默认值\n- pooling type for embeddings, use model default if unspecified  (env: LLAMA_ARG_POOLING)">]>;
+            readonly 'cont-batching': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用连续批处理（又名动态批处理）（默认：启用）\n- enable continuous batching (a.k.a dynamic batching) (default: enabled)  (env: LLAMA_ARG_CONT_BATCHING)">]>;
+            readonly 'no-cont-batching': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- disable continuous batching\n- disable continuous batching  (env: LLAMA_ARG_NO_CONT_BATCHING)">]>;
+            readonly mmproj: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 多模态投影文件路径。参见tools/mtmd/README.md 说明：如果使用-hf参数，可以省略此参数\n- path to a multimodal projector file. see tools/mtmd/README.md  note: if -hf is used, this argument can be omitted  (env: LLAMA_ARG_MMPROJ)">]>;
+            readonly 'mmproj-url': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "URL">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 多模态投影文件的URL。参见tools/mtmd/README.md\n- URL to a multimodal projector file. see tools/mtmd/README.md  (env: LLAMA_ARG_MMPROJ_URL)">]>;
+            readonly 'no-mmproj': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 显式禁用多模态投影器，适用于使用 -hf 时\n- explicitly disable multimodal projector, useful when using -hf  (env: LLAMA_ARG_NO_MMPROJ)">]>;
+            readonly 'no-mmproj-offload': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 不要将多模态投影器卸载到GPU\n- do not offload multimodal projector to GPU  (env: LLAMA_ARG_NO_MMPROJ_OFFLOAD)">]>;
+            readonly 'override-tensor-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<tensor name pattern>=<buffer type>,...">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 为草图模型覆盖张量缓冲区的类型\n- override tensor buffer type for draft model">]>;
+            readonly 'cpu-moe-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 将所有专家组合（Mixture of Experts, MoE）的权重都存储在 CPU 中，以便用于模型的训练过程。\n- keep all Mixture of Experts (MoE) weights in the CPU for the draft model  (env: LLAMA_ARG_CPU_MOE_DRAFT)">]>;
+            readonly 'n-cpu-moe-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 将前 N 层的专家组合（Expert Combination, MoE）权重存储在 CPU 中，以用于初步模型的训练。\n- keep the Mixture of Experts (MoE) weights of the first N layers in the CPU for the draft model  (env: LLAMA_ARG_N_CPU_MOE_DRAFT)">]>;
+            readonly alias: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 设置模型名称的别名（用于REST API）\n- set alias for model name (to be used by REST API)  (env: LLAMA_ARG_ALIAS)">]>;
+            readonly host: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "HOST">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 监听的IP地址，或以.sock结尾的UNIX套接字路径（默认：127.0.0.1）\n- ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: 127.0.0.1)  (env: LLAMA_ARG_HOST)">]>;
+            readonly port: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PORT">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 端口监听 (默认: 8080)\n- port to listen (default: 8080)  (env: LLAMA_ARG_PORT)">]>;
+            readonly path: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PATH">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 路径以供静态文件服务（默认：）\n- path to serve static files from (default: )  (env: LLAMA_ARG_STATIC_PATH)">]>;
+            readonly 'api-prefix': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PREFIX">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 服务器提供服务的路径前缀（不包括末尾的斜杠，默认:）。\n- prefix path the server serves from, without the trailing slash (default: )  (env: LLAMA_ARG_API_PREFIX)">]>;
+            readonly 'no-webui': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用Web UI（默认：启用）\n- Disable the Web UI (default: enabled)  (env: LLAMA_ARG_NO_WEBUI)">]>;
+            readonly embedding: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 限制仅支持嵌入用例; 仅与专用嵌入模型一起使用 (默认: 禁用)\n- restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)  (env: LLAMA_ARG_EMBEDDINGS)">]>;
+            readonly reranking: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用服务器上的重排序端点（默认：禁用）\n- enable reranking endpoint on server (default: disabled)  (env: LLAMA_ARG_RERANKING)">]>;
+            readonly 'api-key': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "KEY">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- API key to use for authentication (default: none)\n- API key to use for authentication (default: none)  (env: LLAMA_API_KEY)">]>;
+            readonly 'api-key-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 包含API密钥的文件路径（默认：无）\n- path to file containing API keys (default: none)">]>;
+            readonly 'ssl-key-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- PEM编码SSL私钥文件的路径\n- path to file a PEM-encoded SSL private key  (env: LLAMA_ARG_SSL_KEY_FILE)">]>;
+            readonly 'ssl-cert-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- PEM编码SSL证书文件路径\n- path to file a PEM-encoded SSL certificate  (env: LLAMA_ARG_SSL_CERT_FILE)">]>;
+            readonly 'chat-template-kwargs': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 为 JSON 模板解析器设置额外的参数\n- sets additional params for the json template parser  (env: LLAMA_CHAT_TEMPLATE_KWARGS)">]>;
+            readonly timeout: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 服务器读/写超时时间（秒），默认：600\n- server read/write timeout in seconds (default: 600)  (env: LLAMA_ARG_TIMEOUT)">]>;
+            readonly 'threads-http': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 用于处理HTTP请求的线程数（默认：-1）\n- number of threads used to process HTTP requests (default: -1)  (env: LLAMA_ARG_THREADS_HTTP)">]>;
+            readonly 'cache-reuse': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 最小块大小以尝试通过KV移动从缓存中重用 (默认: 0) [(card)](https://ggml.ai/f0.png)\n- min chunk size to attempt reusing from the cache via KV shifting (default: 0)  [(card)](https://ggml.ai/f0.png)  (env: LLAMA_ARG_CACHE_REUSE)">]>;
+            readonly metrics: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用 prometheus 兼容的 metrics 端点 (默认: 禁用)\n- enable prometheus compatible metrics endpoint (default: disabled)  (env: LLAMA_ARG_ENDPOINT_METRICS)">]>;
+            readonly props: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用通过POST /props更改全局属性（默认：禁用）\n- enable changing global properties via POST /props (default: disabled)  (env: LLAMA_ARG_ENDPOINT_PROPS)">]>;
+            readonly slots: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 启用插槽监控端点（默认：禁用）\n- enable slots monitoring endpoint (default: enabled)  (env: LLAMA_ARG_ENDPOINT_SLOTS)">]>;
+            readonly 'no-slots': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 禁用插槽监控端点\n- disables slots monitoring endpoint  (env: LLAMA_ARG_NO_ENDPOINT_SLOTS)">]>;
+            readonly 'slot-save-path': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "PATH">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 保存slot kv缓存的路径（默认：禁用）\n- path to save slot kv cache (default: disabled)">]>;
+            readonly jinja: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用jinja模板进行聊天（默认：禁用）\n- use jinja template for chat (default: disabled)  (env: LLAMA_ARG_JINJA)">]>;
+            readonly 'reasoning-format': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.PicklistSchema<["deepseek", "none"], undefined>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: ["none" | "deepseek"] | undefined;
+            } | undefined, "- 控制是否允许并/或从响应中提取思维标签，以及以何种格式返回；可选值包括：  \\n- none：将思维内容原样保留在 `message.content` 中  \\n- deepseek：将思维内容放入 `message.reasoning_content`（在流式模式下行为与 none 相同）  \\n（默认值：deepseek）\n- controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:  - none: leaves thoughts unparsed in `message.content`  - deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)  (default: auto)  (env: LLAMA_ARG_THINK)">]>;
+            readonly 'reasoning-budget': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 控制允许的思考量；目前只能选择以下选项之一：-1 表示无限制的思考预算，或 0 表示禁用思考（默认值：-1）\n- controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)  (env: LLAMA_ARG_THINK_BUDGET)">]>;
+            readonly 'chat-template': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "JINJA_TEMPLATE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 设置自定义的Jinja聊天模板（默认：从模型元数据中获取模板）如果指定了后缀/前缀，模板将被禁用 仅接受常用模板（除非在该标志之前设置了--jinja）：内置模板列表：bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-0, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr\n- set custom jinja chat template (default: template taken from model's metadata)  if suffix/prefix are specified, template will be disabled  only commonly used templates are accepted (unless --jinja is set before this flag):  list of built-in templates:  bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr  (env: LLAMA_ARG_CHAT_TEMPLATE)">]>;
+            readonly 'chat-template-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "JINJA_TEMPLATE_FILE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 设置自定义的Jinja聊天模板文件（默认：从模型元数据中获取模板）如果指定了后缀/前缀，模板将被禁用 仅接受常用模板（除非在该标志之前设置了--jinja）：内置模板列表：bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-0, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr\n- set custom jinja chat template file (default: template taken from model's metadata)  if suffix/prefix are specified, template will be disabled  only commonly used templates are accepted (unless --jinja is set before this flag):  list of built-in templates:  bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr  (env: LLAMA_ARG_CHAT_TEMPLATE_FILE)">]>;
+            readonly 'no-prefill-assistant': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 是否在最后一条消息是助手消息时预填充助手的响应（默认：预填充启用）当设置此标志时，如果最后一条消息是助手消息，则将其视为完整消息且不预填充\n- whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)  when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled    (env: LLAMA_ARG_NO_PREFILL_ASSISTANT)">]>;
+            readonly 'slot-prompt-similarity': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SIMILARITY">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 提示必须与槽的提示匹配多少才能使用该槽（默认：0.50，0.0 = 禁用）\n- how much the prompt of a request must match the prompt of a slot in order to use that slot (default: 0.10, 0.0 = disabled)">]>;
+            readonly 'lora-init-without-apply': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 加载LoRA适配器而不应用它们（稍后通过POST /lora-adapters应用）（默认：禁用）\n- load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: disabled)">]>;
+            readonly 'threads-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 在生成过程中使用的线程数量（默认值：与 --threads 的值相同）\n- number of threads to use during generation (default: same as --threads)">]>;
+            readonly 'threads-batch-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 在批量处理和即时处理过程中使用的线程数量（默认值：与 --threads-draft 的值相同）\n- number of threads to use during batch and prompt processing (default: same as --threads-draft)">]>;
+            readonly 'draft-max': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 用于推测解码的草稿令牌数量（默认：16）\n- number of tokens to draft for speculative decoding (default: 16)  (env: LLAMA_ARG_DRAFT_MAX)">]>;
+            readonly 'draft-min': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 最小的草稿标记数，用于推测解码（默认：0）\n- minimum number of draft tokens to use for speculative decoding (default: 0)  (env: LLAMA_ARG_DRAFT_MIN)">]>;
+            readonly 'draft-p-min': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "P">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 最小推测解码概率（贪心）（默认：0.8）\n- minimum speculative decoding probability (greedy) (default: 0.8)  (env: LLAMA_ARG_DRAFT_P_MIN)">]>;
+            readonly 'ctx-size-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 提示上下文的大小用于草稿模型（默认：0，0 = 从模型加载）\n- size of the prompt context for the draft model (default: 0, 0 = loaded from model)  (env: LLAMA_ARG_CTX_SIZE_DRAFT)">]>;
+            readonly 'device-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "<dev1,dev2,..>">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 逗号分隔的设备列表，用于卸载草稿模型（none = 不卸载）使用--list-devices查看可用设备列表\n- comma-separated list of devices to use for offloading the draft model (none = don't offload)  use --list-devices to see a list of available devices">]>;
+            readonly 'gpu-layers-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 存储在VRAM中的图层数量用于草稿模型\n- number of layers to store in VRAM for the draft model  (env: LLAMA_ARG_N_GPU_LAYERS_DRAFT)">]>;
+            readonly 'model-draft': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 用于推测解码的草稿模型（默认：未使用）\n- draft model for speculative decoding (default: unused)  (env: LLAMA_ARG_MODEL_DRAFT)">]>;
+            readonly 'spec-replace': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TARGET">]>, v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "DRAFT">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string, string] | undefined;
+            } | undefined, "- 如果草稿模型（Draft Model）与主模型（Main Model）不兼容，那么需要将目标字符串（Target String）转换为草稿格式（Draft Format）。\n- translate the string in TARGET into DRAFT if the draft model and main model are not compatible">]>;
+            readonly 'model-vocoder': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 语音合成模型用于音频生成（默认：未使用）\n- vocoder model for audio generation (default: unused)">]>;
+            readonly 'tts-use-guide-tokens': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用引导标记以提高TTS单词回忆\n- Use guide tokens to improve TTS word recall">]>;
+            readonly 'embd-bge-small-en-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的bge-small-en-v1.5模型（注意：可以从互联网下载权重）\n- use default bge-small-en-v1.5 model (note: can download weights from the internet)">]>;
+            readonly 'embd-e5-small-en-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的e5-small-v2模型（注意：可以从互联网下载权重）\n- use default e5-small-v2 model (note: can download weights from the internet)">]>;
+            readonly 'embd-gte-small-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的gte-small模型（注意：可以从互联网下载权重）\n- use default gte-small model (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-1.5b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的Qwen 2.5 Coder 1.5B\n- use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-3b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的Qwen 2.5 Coder 3B（注意：可以从互联网下载权重）\n- use default Qwen 2.5 Coder 3B (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-7b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的Qwen 2.5 Coder 7B（注意：可以从互联网下载权重）\n- use default Qwen 2.5 Coder 7B (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-7b-spec': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用Qwen 2.5 Coder 7B + 0.5B draft进行推测解码（注意：可以从互联网下载权重）\n- use Qwen 2.5 Coder 7B + 0.5B draft for speculative decoding (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-14b-spec': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用Qwen 2.5 Coder 14B + 0.5B draft进行推测解码（注意：可以从互联网下载权重）\n- use Qwen 2.5 Coder 14B + 0.5B draft for speculative decoding (note: can download weights from the internet)">]>;
+            readonly 'fim-qwen-30b-default': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 使用默认的 Qwen 3 Coder 30B A3B Instruct；请注意：可以从互联网上下载相应的权重数据。\n- use default Qwen 3 Coder 30B A3B Instruct (note: can download weights from the internet)">]>;
+        }, undefined>, undefined>;
+        readonly sampling: v.OptionalSchema<v.ObjectSchema<{
+            readonly samplers: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SAMPLERS">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- samplers that will be used for generation in the order, separated by ';'  (default: penalties;dry;top_n_sigma;top_k;typ_p;top_p;min_p;xtc;temperature)\n- samplers that will be used for generation in the order, separated by ';'  (default: penalties;dry;top_n_sigma;top_k;typ_p;top_p;min_p;xtc;temperature)">]>;
+            readonly seed: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SEED">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- RNG种子（默认：-1，使用随机种子）\n- RNG seed (default: -1, use random seed for -1)">]>;
+            readonly 'sampling-seq': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SEQUENCE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 简化的采样序列，用于采样器（默认：edskypmxt）\n- simplified sequence for samplers that will be used (default: edskypmxt)">]>;
+            readonly 'ignore-eos': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+            } | undefined, "- 忽略流结束标记并继续生成（隐含--logit-bias EOS-inf）\n- ignore end of stream token and continue generating (implies --logit-bias EOS-inf)">]>;
+            readonly temp: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- temperature (default: 0.8)\n- temperature (default: 0.8)">]>;
+            readonly 'top-k': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- top-k 采样 (默认: 40, 0 = 禁用)\n- top-k sampling (default: 40, 0 = disabled)">]>;
+            readonly 'top-p': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- top-p 采样（默认：0.9，1.0 = 禁用）\n- top-p sampling (default: 0.9, 1.0 = disabled)">]>;
+            readonly 'min-p': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- min-p 采样 (默认: 0.1, 0.0 = 禁用)\n- min-p sampling (default: 0.1, 0.0 = disabled)">]>;
+            readonly 'top-nsigma': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- Top-N-Sigma抽样方法（默认值：-1.0；当两个参数都为-1.0时，该方法被禁用）\n- top-n-sigma sampling (default: -1.0, -1.0 = disabled)">]>;
+            readonly 'xtc-probability': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- xtc probability (默认: 0.0, 0.0 = 禁用)\n- xtc probability (default: 0.0, 0.0 = disabled)">]>;
+            readonly 'xtc-threshold': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- xtc threshold (默认: 0.1, 1.0 = 禁用)\n- xtc threshold (default: 0.1, 1.0 = disabled)">]>;
+            readonly typical: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 局部典型抽样，参数 p（默认：1.0，1.0 = 禁用）\n- locally typical sampling, parameter p (default: 1.0, 1.0 = disabled)">]>;
+            readonly 'repeat-last-n': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 用于惩罚的最后n个token（默认：64，0 = 禁用，-1 = 上下文大小）\n- last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size)">]>;
+            readonly 'repeat-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 惩罚重复的token序列（默认：1.0，1.0 = 禁用）\n- penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled)">]>;
+            readonly 'presence-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 重复 alpha 存在惩罚（默认：0.0，0.0 = 禁用）\n- repeat alpha presence penalty (default: 0.0, 0.0 = disabled)">]>;
+            readonly 'frequency-penalty': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 重复 alpha 频率惩罚（默认：0.0，0.0 = 禁用）\n- repeat alpha frequency penalty (default: 0.0, 0.0 = disabled)">]>;
+            readonly 'dry-multiplier': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 设置 DRY 采样乘数（默认：0.0，0.0 = 禁用）\n- set DRY sampling multiplier (default: 0.0, 0.0 = disabled)">]>;
+            readonly 'dry-base': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 设置 DRY 采样基础值 (默认: 1.75)\n- set DRY sampling base value (default: 1.75)">]>;
+            readonly 'dry-allowed-length': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 设置DRY采样的允许长度（默认：2）\n- set allowed length for DRY sampling (default: 2)">]>;
+            readonly 'dry-penalty-last-n': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 设置最后n个token的DRY惩罚（默认：-1，0 = 禁用，-1 = 上下文大小）\n- set DRY penalty for the last n tokens (default: -1, 0 = disable, -1 = context size)">]>;
+            readonly 'dry-sequence-breaker': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "STRING">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 添加序列破环器用于DRY抽样，清除默认的破环器（'\\n', ':', '\"', '*'）；使用 \"none\" 不使用任何序列破环器\n- add sequence breaker for DRY sampling, clearing out default breakers ('\\n', ':', '\"', '*') in the process; use \"none\" to not use any sequence breakers">]>;
+            readonly 'dynatemp-range': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 动态温度范围 (默认: 0.0, 0.0 = 禁用)\n- dynamic temperature range (default: 0.0, 0.0 = disabled)">]>;
+            readonly 'dynatemp-exp': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 动态温度指数（默认：1.0）\n- dynamic temperature exponent (default: 1.0)">]>;
+            readonly mirostat: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- 使用Mirostat抽样。如果使用了Top K、Nucleus和Locally Typical抽样器，则会被忽略。（默认：0，0=禁用，1=Mirostat，2=Mirostat 2.0）\n- use Mirostat sampling.  Top K, Nucleus and Locally Typical samplers are ignored if used.  (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)">]>;
+            readonly 'mirostat-lr': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- Mirostat学习率，参数eta（默认：0.1）\n- Mirostat learning rate, parameter eta (default: 0.1)">]>;
+            readonly 'mirostat-ent': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.DescriptionAction<number, "N">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [number] | undefined;
+            } | undefined, "- Mirostat目标熵，参数tau（默认：5.0）\n- Mirostat target entropy, parameter tau (default: 5.0)">]>;
+            readonly 'logit-bias': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "TOKEN_ID(+/-)BIAS">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 修改标记出现在完成中的可能性，例如：--logit-bias 15043+1 增加标记 ' Hello' 的可能性，或 --logit 15043-1 减少标记 ' Hello' 的可能性\n- modifies the likelihood of token appearing in the completion,  i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',  or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'">]>;
+            readonly grammar: v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "GRAMMAR">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- BNF类似语法约束生成（参见grammars目录中的示例）（默认：''）\n- BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '')">]>;
+            readonly 'grammar-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FNAME">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 从文件读取语法\n- file to read grammar from">]>;
+            readonly 'json-schema': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "SCHEMA">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- JSON schema 用于约束生成（https://json-schema.org/），例如 `{}` 表示任何 JSON 对象。对于包含外部 $refs 的 schema，使用 --grammar + example/json_schema_to_grammar.py 代替\n- JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object  For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead">]>;
+            readonly 'json-schema-file': v.SchemaWithPipe<readonly [v.OptionalSchema<v.ObjectSchema<{
+                readonly enable: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
+                readonly value: v.SchemaWithPipe<readonly [v.OptionalSchema<v.TupleSchema<[v.SchemaWithPipe<readonly [v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TrimAction]>, v.DescriptionAction<string, "FILE">]>], undefined>, undefined>]>;
+            }, undefined>, undefined>, v.DescriptionAction<{
+                enable: boolean;
+                value?: [string] | undefined;
+            } | undefined, "- 包含用于约束生成的JSON架构的文件（https://json-schema.org/），例如{}表示任何JSON对象  对于具有外部$refs的架构，使用--grammar + example/json_schema_to_grammar.py代替\n- File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object  For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead">]>;
+        }, undefined>, undefined>;
+    }, undefined>, undefined>;
+    readonly aliases: v.OptionalSchema<v.ArraySchema<v.StringSchema<undefined>, undefined>, undefined>;
+    readonly env: v.OptionalSchema<v.RecordSchema<v.StringSchema<undefined>, v.AnySchema, undefined>, undefined>;
+    readonly ttl: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
+    readonly checkEndpoint: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
+    readonly useModelName: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
+    readonly exec: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
+        readonly version: v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TitleAction<string, "llama.cpp 版本号">, v.DescriptionAction<string, "llama.cpp的版本号,可以从 https://github.com/ggerganov/llama.cpp/releases 获得">]>;
+        readonly device: v.SchemaWithPipe<readonly [v.PicklistSchema<["cpu", "cuda12.4", "hip-radeon", "sycl", "vulkan"], undefined>, v.TitleAction<"cpu" | "cuda12.4" | "hip-radeon" | "sycl" | "vulkan", "llama.cpp 使用设备">]>;
+    }, undefined>], undefined>, undefined>;
+    readonly model: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
+    readonly proxy: v.SchemaWithPipe<readonly [v.OptionalSchema<v.StringSchema<undefined>, undefined>, v.DescriptionAction<string | undefined, "正常情况下无需配置会自动生成">]>;
+}, undefined>;
+export type LlamaServerSwapItemType = v.InferOutput<typeof LlamaServerSwapItemDefine>;
+export type LlamaServerSwapItemInputType = v.InferInput<typeof LlamaServerSwapItemDefine>;