@fugood/llama.node 1.3.7 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +18 -1
- package/lib/binding.ts +19 -1
- package/lib/index.js +3 -3
- package/lib/index.ts +1 -1
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +7 -7
- package/src/LlamaCompletionWorker.cpp +2 -2
- package/src/llama.cpp/common/arg.cpp +27 -2
- package/src/llama.cpp/common/chat-parser.cpp +968 -0
- package/src/llama.cpp/common/chat.cpp +0 -952
- package/src/llama.cpp/common/common.cpp +55 -0
- package/src/llama.cpp/common/common.h +18 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +6 -4
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml.h +12 -4
- package/src/llama.cpp/ggml/src/CMakeLists.txt +26 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -15
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +721 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +71 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +243 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +84 -85
- package/src/llama.cpp/include/llama.h +18 -0
- package/src/llama.cpp/src/CMakeLists.txt +2 -0
- package/src/llama.cpp/src/llama-arch.cpp +95 -16
- package/src/llama.cpp/src/llama-arch.h +15 -0
- package/src/llama.cpp/src/llama-context.cpp +7 -3
- package/src/llama.cpp/src/llama-graph.cpp +3 -3
- package/src/llama.cpp/src/llama-hparams.h +1 -1
- package/src/llama.cpp/src/llama-model.cpp +141 -6
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +13 -5
- package/src/llama.cpp/src/models/lfm2.cpp +5 -3
- package/src/llama.cpp/src/models/models.h +55 -1
- package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
- package/src/llama.cpp/src/models/rnd1.cpp +126 -0
package/lib/binding.js
CHANGED
|
@@ -41,8 +41,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
41
41
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
42
|
});
|
|
43
43
|
};
|
|
44
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
45
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
46
|
+
};
|
|
44
47
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
48
|
exports.isLibVariantAvailable = exports.loadModule = void 0;
|
|
49
|
+
const path_1 = __importDefault(require("path"));
|
|
46
50
|
const getPlatformPackageName = (variant) => {
|
|
47
51
|
const platform = process.platform;
|
|
48
52
|
const arch = process.arch;
|
|
@@ -58,7 +62,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
|
|
|
58
62
|
}
|
|
59
63
|
});
|
|
60
64
|
const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
61
|
-
|
|
65
|
+
const packageName = getPlatformPackageName(variant);
|
|
66
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
67
|
+
if (variant === 'snapdragon') {
|
|
68
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
|
|
69
|
+
if (!adspLibraryPath) {
|
|
70
|
+
try {
|
|
71
|
+
process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
|
|
72
|
+
}
|
|
73
|
+
catch (_a) {
|
|
74
|
+
/* no-op */
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
let module = yield loadPlatformPackage(packageName);
|
|
62
79
|
if (module) {
|
|
63
80
|
return module;
|
|
64
81
|
}
|
package/lib/binding.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import path from 'path'
|
|
2
|
+
|
|
1
3
|
export type MessagePart = {
|
|
2
4
|
type: string
|
|
3
5
|
text?: string
|
|
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
|
|
|
600
602
|
}
|
|
601
603
|
|
|
602
604
|
export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
603
|
-
|
|
605
|
+
const packageName = getPlatformPackageName(variant)
|
|
606
|
+
|
|
607
|
+
// Set ADSP_LIBRARY_PATH for load HTP libs
|
|
608
|
+
if (variant === 'snapdragon') {
|
|
609
|
+
const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
|
|
610
|
+
if (!adspLibraryPath) {
|
|
611
|
+
try {
|
|
612
|
+
process.env.ADSP_LIBRARY_PATH = path.dirname(
|
|
613
|
+
require.resolve(packageName),
|
|
614
|
+
)
|
|
615
|
+
} catch {
|
|
616
|
+
/* no-op */
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
let module = await loadPlatformPackage(packageName)
|
|
604
622
|
if (module) {
|
|
605
623
|
return module
|
|
606
624
|
}
|
package/lib/index.js
CHANGED
|
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
|
|
|
87
87
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
88
88
|
}
|
|
89
89
|
getFormattedChat(messages, template, params) {
|
|
90
|
-
var _a;
|
|
90
|
+
var _a, _b;
|
|
91
91
|
const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
|
|
92
|
-
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
92
|
+
const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
|
|
93
93
|
let tmpl;
|
|
94
94
|
if (template)
|
|
95
95
|
tmpl = template; // Force replace if provided
|
|
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
|
|
|
99
99
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
100
100
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
101
101
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
102
|
-
enable_thinking: (
|
|
102
|
+
enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
|
|
103
103
|
add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
|
|
104
104
|
now: params === null || params === void 0 ? void 0 : params.now,
|
|
105
105
|
chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
|
package/lib/index.ts
CHANGED
|
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
|
|
|
124
124
|
): FormattedChatResult {
|
|
125
125
|
const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
|
|
126
126
|
|
|
127
|
-
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
127
|
+
const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
|
|
128
128
|
let tmpl
|
|
129
129
|
if (template) tmpl = template // Force replace if provided
|
|
130
130
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.4.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
82
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
83
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
86
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
87
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
88
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.4.0",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.0",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64": "1.4.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.0",
|
|
81
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.0",
|
|
82
|
+
"@fugood/node-llama-win32-x64": "1.4.0",
|
|
83
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.0",
|
|
84
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64": "1.4.0",
|
|
86
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.0",
|
|
87
|
+
"@fugood/node-llama-darwin-x64": "1.4.0",
|
|
88
|
+
"@fugood/node-llama-darwin-arm64": "1.4.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
24
|
+
index b4a0f985e..2383d2ea9 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
27
|
@@ -6,9 +6,6 @@
|
|
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
|
|
|
51
51
|
struct templates_params {
|
|
52
52
|
json messages;
|
|
53
53
|
json tools;
|
|
54
|
-
@@ -
|
|
54
|
+
@@ -709,7 +696,7 @@ static std::string apply(
|
|
55
55
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
56
56
|
}
|
|
57
57
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -85,10 +85,10 @@ index 754c411e2..71241a6cc 100644
|
|
|
85
85
|
struct common_chat_tool_call {
|
|
86
86
|
std::string name;
|
|
87
87
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
88
|
-
index
|
|
88
|
+
index 0d7fd9a93..6bf3cc7ab 100644
|
|
89
89
|
--- a/src/llama.cpp/common/common.cpp
|
|
90
90
|
+++ b/src/llama.cpp/common/common.cpp
|
|
91
|
-
@@ -
|
|
91
|
+
@@ -1217,6 +1217,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
92
92
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -97,10 +97,10 @@ index f3cc55247..65398844f 100644
|
|
|
97
97
|
mparams.split_mode = params.split_mode;
|
|
98
98
|
mparams.tensor_split = params.tensor_split;
|
|
99
99
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
100
|
-
index
|
|
100
|
+
index 2f23d0baa..e4e6c795e 100644
|
|
101
101
|
--- a/src/llama.cpp/common/common.h
|
|
102
102
|
+++ b/src/llama.cpp/common/common.h
|
|
103
|
-
@@ -
|
|
103
|
+
@@ -299,6 +299,7 @@ struct lr_opt {
|
|
104
104
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
105
105
|
|
|
106
106
|
struct common_params {
|
|
@@ -109,7 +109,7 @@ index de5b404dd..d30d252c9 100644
|
|
|
109
109
|
int32_t n_ctx = 4096; // context size
|
|
110
110
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
111
111
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
112
|
-
index
|
|
112
|
+
index 7e53a57b7..a328d4db4 100644
|
|
113
113
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
114
114
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
115
115
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
|
|
|
10
10
|
const auto &prob = probs[i];
|
|
11
11
|
Napi::Object token_obj = Napi::Object::New(env);
|
|
12
12
|
|
|
13
|
-
std::string token_str =
|
|
13
|
+
std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
|
|
14
14
|
token_obj.Set("content", Napi::String::New(env, token_str));
|
|
15
15
|
|
|
16
16
|
Napi::Array token_probs = Napi::Array::New(env);
|
|
17
17
|
for (size_t j = 0; j < prob.probs.size(); j++) {
|
|
18
18
|
const auto &p = prob.probs[j];
|
|
19
19
|
Napi::Object prob_obj = Napi::Object::New(env);
|
|
20
|
-
std::string tok_str =
|
|
20
|
+
std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
|
|
21
21
|
prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
|
|
22
22
|
prob_obj.Set("prob", Napi::Number::New(env, p.prob));
|
|
23
23
|
token_probs.Set(j, prob_obj);
|
|
@@ -694,6 +694,12 @@ static bool is_autoy(const std::string & value) {
|
|
|
694
694
|
}
|
|
695
695
|
|
|
696
696
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
|
697
|
+
// default values specific to example
|
|
698
|
+
// note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
|
|
699
|
+
if (ex == LLAMA_EXAMPLE_SERVER) {
|
|
700
|
+
params.use_jinja = true;
|
|
701
|
+
}
|
|
702
|
+
|
|
697
703
|
// load dynamic backends
|
|
698
704
|
ggml_backend_load_all();
|
|
699
705
|
|
|
@@ -974,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
974
980
|
[](common_params & params) {
|
|
975
981
|
params.kv_unified = true;
|
|
976
982
|
}
|
|
977
|
-
).set_env("
|
|
983
|
+
).set_env("LLAMA_ARG_KV_UNIFIED"));
|
|
978
984
|
add_opt(common_arg(
|
|
979
985
|
{"--no-context-shift"},
|
|
980
986
|
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
|
|
@@ -1232,6 +1238,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1232
1238
|
[](common_params & params, const std::string & value) {
|
|
1233
1239
|
const auto sampler_names = string_split<std::string>(value, ';');
|
|
1234
1240
|
params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
|
|
1241
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS;
|
|
1235
1242
|
}
|
|
1236
1243
|
).set_sparam());
|
|
1237
1244
|
add_opt(common_arg(
|
|
@@ -1261,6 +1268,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1261
1268
|
[](common_params & params, const std::string & value) {
|
|
1262
1269
|
params.sampling.temp = std::stof(value);
|
|
1263
1270
|
params.sampling.temp = std::max(params.sampling.temp, 0.0f);
|
|
1271
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP;
|
|
1264
1272
|
}
|
|
1265
1273
|
).set_sparam());
|
|
1266
1274
|
add_opt(common_arg(
|
|
@@ -1268,6 +1276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1268
1276
|
string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k),
|
|
1269
1277
|
[](common_params & params, int value) {
|
|
1270
1278
|
params.sampling.top_k = value;
|
|
1279
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K;
|
|
1271
1280
|
}
|
|
1272
1281
|
).set_sparam());
|
|
1273
1282
|
add_opt(common_arg(
|
|
@@ -1275,6 +1284,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1275
1284
|
string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
|
|
1276
1285
|
[](common_params & params, const std::string & value) {
|
|
1277
1286
|
params.sampling.top_p = std::stof(value);
|
|
1287
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
|
|
1278
1288
|
}
|
|
1279
1289
|
).set_sparam());
|
|
1280
1290
|
add_opt(common_arg(
|
|
@@ -1282,6 +1292,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1282
1292
|
string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
|
|
1283
1293
|
[](common_params & params, const std::string & value) {
|
|
1284
1294
|
params.sampling.min_p = std::stof(value);
|
|
1295
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
|
|
1285
1296
|
}
|
|
1286
1297
|
).set_sparam());
|
|
1287
1298
|
add_opt(common_arg(
|
|
@@ -1296,6 +1307,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1296
1307
|
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
|
|
1297
1308
|
[](common_params & params, const std::string & value) {
|
|
1298
1309
|
params.sampling.xtc_probability = std::stof(value);
|
|
1310
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
|
|
1299
1311
|
}
|
|
1300
1312
|
).set_sparam());
|
|
1301
1313
|
add_opt(common_arg(
|
|
@@ -1303,6 +1315,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1303
1315
|
string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
|
|
1304
1316
|
[](common_params & params, const std::string & value) {
|
|
1305
1317
|
params.sampling.xtc_threshold = std::stof(value);
|
|
1318
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
|
|
1306
1319
|
}
|
|
1307
1320
|
).set_sparam());
|
|
1308
1321
|
add_opt(common_arg(
|
|
@@ -1321,6 +1334,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1321
1334
|
}
|
|
1322
1335
|
params.sampling.penalty_last_n = value;
|
|
1323
1336
|
params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
|
|
1337
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N;
|
|
1324
1338
|
}
|
|
1325
1339
|
).set_sparam());
|
|
1326
1340
|
add_opt(common_arg(
|
|
@@ -1328,6 +1342,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1328
1342
|
string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
|
|
1329
1343
|
[](common_params & params, const std::string & value) {
|
|
1330
1344
|
params.sampling.penalty_repeat = std::stof(value);
|
|
1345
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
|
|
1331
1346
|
}
|
|
1332
1347
|
).set_sparam());
|
|
1333
1348
|
add_opt(common_arg(
|
|
@@ -1425,6 +1440,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1425
1440
|
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat),
|
|
1426
1441
|
[](common_params & params, int value) {
|
|
1427
1442
|
params.sampling.mirostat = value;
|
|
1443
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT;
|
|
1428
1444
|
}
|
|
1429
1445
|
).set_sparam());
|
|
1430
1446
|
add_opt(common_arg(
|
|
@@ -1432,6 +1448,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1432
1448
|
string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
|
|
1433
1449
|
[](common_params & params, const std::string & value) {
|
|
1434
1450
|
params.sampling.mirostat_eta = std::stof(value);
|
|
1451
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
|
|
1435
1452
|
}
|
|
1436
1453
|
).set_sparam());
|
|
1437
1454
|
add_opt(common_arg(
|
|
@@ -1439,6 +1456,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1439
1456
|
string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
|
|
1440
1457
|
[](common_params & params, const std::string & value) {
|
|
1441
1458
|
params.sampling.mirostat_tau = std::stof(value);
|
|
1459
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
|
|
1442
1460
|
}
|
|
1443
1461
|
).set_sparam());
|
|
1444
1462
|
add_opt(common_arg(
|
|
@@ -2476,11 +2494,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2476
2494
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
2477
2495
|
add_opt(common_arg(
|
|
2478
2496
|
{"--jinja"},
|
|
2479
|
-
"use jinja template for chat (default:
|
|
2497
|
+
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
|
2480
2498
|
[](common_params & params) {
|
|
2481
2499
|
params.use_jinja = true;
|
|
2482
2500
|
}
|
|
2483
2501
|
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
|
|
2502
|
+
add_opt(common_arg(
|
|
2503
|
+
{"--no-jinja"},
|
|
2504
|
+
string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
|
2505
|
+
[](common_params & params) {
|
|
2506
|
+
params.use_jinja = false;
|
|
2507
|
+
}
|
|
2508
|
+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_NO_JINJA"));
|
|
2484
2509
|
add_opt(common_arg(
|
|
2485
2510
|
{"--reasoning-format"}, "FORMAT",
|
|
2486
2511
|
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
|