@fugood/llama.node 1.3.7 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/lib/binding.js +18 -1
  2. package/lib/binding.ts +19 -1
  3. package/lib/index.js +3 -3
  4. package/lib/index.ts +1 -1
  5. package/package.json +15 -15
  6. package/scripts/llama.cpp.patch +7 -7
  7. package/src/LlamaCompletionWorker.cpp +2 -2
  8. package/src/llama.cpp/common/arg.cpp +27 -2
  9. package/src/llama.cpp/common/chat-parser.cpp +968 -0
  10. package/src/llama.cpp/common/chat.cpp +0 -952
  11. package/src/llama.cpp/common/common.cpp +55 -0
  12. package/src/llama.cpp/common/common.h +18 -0
  13. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
  14. package/src/llama.cpp/ggml/CMakeLists.txt +6 -4
  15. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
  16. package/src/llama.cpp/ggml/include/ggml.h +12 -4
  17. package/src/llama.cpp/ggml/src/CMakeLists.txt +26 -4
  18. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -15
  19. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +721 -0
  20. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  21. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -2
  22. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
  23. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +71 -4
  24. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  25. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +243 -4
  26. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
  27. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +84 -85
  28. package/src/llama.cpp/include/llama.h +18 -0
  29. package/src/llama.cpp/src/CMakeLists.txt +2 -0
  30. package/src/llama.cpp/src/llama-arch.cpp +95 -16
  31. package/src/llama.cpp/src/llama-arch.h +15 -0
  32. package/src/llama.cpp/src/llama-context.cpp +7 -3
  33. package/src/llama.cpp/src/llama-graph.cpp +3 -3
  34. package/src/llama.cpp/src/llama-hparams.h +1 -1
  35. package/src/llama.cpp/src/llama-model.cpp +141 -6
  36. package/src/llama.cpp/src/llama-model.h +4 -0
  37. package/src/llama.cpp/src/llama-quant.cpp +13 -5
  38. package/src/llama.cpp/src/models/lfm2.cpp +5 -3
  39. package/src/llama.cpp/src/models/models.h +55 -1
  40. package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
  41. package/src/llama.cpp/src/models/rnd1.cpp +126 -0
package/lib/binding.js CHANGED
@@ -41,8 +41,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
41
41
  step((generator = generator.apply(thisArg, _arguments || [])).next());
42
42
  });
43
43
  };
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
44
47
  Object.defineProperty(exports, "__esModule", { value: true });
45
48
  exports.isLibVariantAvailable = exports.loadModule = void 0;
49
+ const path_1 = __importDefault(require("path"));
46
50
  const getPlatformPackageName = (variant) => {
47
51
  const platform = process.platform;
48
52
  const arch = process.arch;
@@ -58,7 +62,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
58
62
  }
59
63
  });
60
64
  const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
61
- let module = yield loadPlatformPackage(getPlatformPackageName(variant));
65
+ const packageName = getPlatformPackageName(variant);
66
+ // Set ADSP_LIBRARY_PATH for load HTP libs
67
+ if (variant === 'snapdragon') {
68
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
69
+ if (!adspLibraryPath) {
70
+ try {
71
+ process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
72
+ }
73
+ catch (_a) {
74
+ /* no-op */
75
+ }
76
+ }
77
+ }
78
+ let module = yield loadPlatformPackage(packageName);
62
79
  if (module) {
63
80
  return module;
64
81
  }
package/lib/binding.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import path from 'path'
2
+
1
3
  export type MessagePart = {
2
4
  type: string
3
5
  text?: string
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
600
602
  }
601
603
 
602
604
  export const loadModule = async (variant?: LibVariant): Promise<Module> => {
603
- let module = await loadPlatformPackage(getPlatformPackageName(variant))
605
+ const packageName = getPlatformPackageName(variant)
606
+
607
+ // Set ADSP_LIBRARY_PATH for load HTP libs
608
+ if (variant === 'snapdragon') {
609
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
610
+ if (!adspLibraryPath) {
611
+ try {
612
+ process.env.ADSP_LIBRARY_PATH = path.dirname(
613
+ require.resolve(packageName),
614
+ )
615
+ } catch {
616
+ /* no-op */
617
+ }
618
+ }
619
+ }
620
+
621
+ let module = await loadPlatformPackage(packageName)
604
622
  if (module) {
605
623
  return module
606
624
  }
package/lib/index.js CHANGED
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
87
87
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
88
88
  }
89
89
  getFormattedChat(messages, template, params) {
90
- var _a;
90
+ var _a, _b;
91
91
  const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
92
- const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
92
+ const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
93
93
  let tmpl;
94
94
  if (template)
95
95
  tmpl = template; // Force replace if provided
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
99
99
  tools: params === null || params === void 0 ? void 0 : params.tools,
100
100
  parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
101
101
  tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
102
- enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
102
+ enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
103
103
  add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
104
104
  now: params === null || params === void 0 ? void 0 : params.now,
105
105
  chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
package/lib/index.ts CHANGED
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
124
124
  ): FormattedChatResult {
125
125
  const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
126
126
 
127
- const useJinja = this.isJinjaSupported() && params?.jinja
127
+ const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
128
128
  let tmpl
129
129
  if (template) tmpl = template // Force replace if provided
130
130
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.7",
4
+ "version": "1.4.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.7",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.7",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.7",
78
- "@fugood/node-llama-linux-arm64-snapdragon": "1.3.7",
79
- "@fugood/node-llama-linux-arm64": "1.3.7",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.7",
81
- "@fugood/node-llama-linux-arm64-cuda": "1.3.7",
82
- "@fugood/node-llama-win32-x64": "1.3.7",
83
- "@fugood/node-llama-win32-x64-vulkan": "1.3.7",
84
- "@fugood/node-llama-win32-x64-cuda": "1.3.7",
85
- "@fugood/node-llama-win32-arm64": "1.3.7",
86
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.7",
87
- "@fugood/node-llama-darwin-x64": "1.3.7",
88
- "@fugood/node-llama-darwin-arm64": "1.3.7"
75
+ "@fugood/node-llama-linux-x64": "1.4.0",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.0",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.4.0",
78
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.0",
79
+ "@fugood/node-llama-linux-arm64": "1.4.0",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.0",
81
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.0",
82
+ "@fugood/node-llama-win32-x64": "1.4.0",
83
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.0",
84
+ "@fugood/node-llama-win32-x64-cuda": "1.4.0",
85
+ "@fugood/node-llama-win32-arm64": "1.4.0",
86
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.0",
87
+ "@fugood/node-llama-darwin-x64": "1.4.0",
88
+ "@fugood/node-llama-darwin-arm64": "1.4.0"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index 6fa05a604..87dfa7a8b 100644
24
+ index b4a0f985e..2383d2ea9 100644
25
25
  --- a/src/llama.cpp/common/chat.cpp
26
26
  +++ b/src/llama.cpp/common/chat.cpp
27
27
  @@ -6,9 +6,6 @@
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
51
51
  struct templates_params {
52
52
  json messages;
53
53
  json tools;
54
- @@ -817,7 +804,7 @@ static std::string apply(
54
+ @@ -709,7 +696,7 @@ static std::string apply(
55
55
  tmpl_inputs.extra_context.merge_patch(*additional_context);
56
56
  }
57
57
  // TODO: add flag to control date/time, if only for testing purposes.
@@ -85,10 +85,10 @@ index 754c411e2..71241a6cc 100644
85
85
  struct common_chat_tool_call {
86
86
  std::string name;
87
87
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
88
- index f3cc55247..65398844f 100644
88
+ index 0d7fd9a93..6bf3cc7ab 100644
89
89
  --- a/src/llama.cpp/common/common.cpp
90
90
  +++ b/src/llama.cpp/common/common.cpp
91
- @@ -1162,6 +1162,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
91
+ @@ -1217,6 +1217,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
92
92
  mparams.n_gpu_layers = params.n_gpu_layers;
93
93
  }
94
94
 
@@ -97,10 +97,10 @@ index f3cc55247..65398844f 100644
97
97
  mparams.split_mode = params.split_mode;
98
98
  mparams.tensor_split = params.tensor_split;
99
99
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
100
- index de5b404dd..d30d252c9 100644
100
+ index 2f23d0baa..e4e6c795e 100644
101
101
  --- a/src/llama.cpp/common/common.h
102
102
  +++ b/src/llama.cpp/common/common.h
103
- @@ -281,6 +281,7 @@ struct lr_opt {
103
+ @@ -299,6 +299,7 @@ struct lr_opt {
104
104
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
105
105
 
106
106
  struct common_params {
@@ -109,7 +109,7 @@ index de5b404dd..d30d252c9 100644
109
109
  int32_t n_ctx = 4096; // context size
110
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
111
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
112
- index d0cab0bcb..48d532838 100644
112
+ index 7e53a57b7..a328d4db4 100644
113
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
114
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
10
10
  const auto &prob = probs[i];
11
11
  Napi::Object token_obj = Napi::Object::New(env);
12
12
 
13
- std::string token_str = common_token_to_piece(ctx, prob.tok);
13
+ std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
14
14
  token_obj.Set("content", Napi::String::New(env, token_str));
15
15
 
16
16
  Napi::Array token_probs = Napi::Array::New(env);
17
17
  for (size_t j = 0; j < prob.probs.size(); j++) {
18
18
  const auto &p = prob.probs[j];
19
19
  Napi::Object prob_obj = Napi::Object::New(env);
20
- std::string tok_str = common_token_to_piece(ctx, p.tok);
20
+ std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
21
21
  prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
22
22
  prob_obj.Set("prob", Napi::Number::New(env, p.prob));
23
23
  token_probs.Set(j, prob_obj);
@@ -694,6 +694,12 @@ static bool is_autoy(const std::string & value) {
694
694
  }
695
695
 
696
696
  common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
697
+ // default values specific to example
698
+ // note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
699
+ if (ex == LLAMA_EXAMPLE_SERVER) {
700
+ params.use_jinja = true;
701
+ }
702
+
697
703
  // load dynamic backends
698
704
  ggml_backend_load_all();
699
705
 
@@ -974,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
974
980
  [](common_params & params) {
975
981
  params.kv_unified = true;
976
982
  }
977
- ).set_env("LLAMA_ARG_KV_SPLIT"));
983
+ ).set_env("LLAMA_ARG_KV_UNIFIED"));
978
984
  add_opt(common_arg(
979
985
  {"--no-context-shift"},
980
986
  string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
@@ -1232,6 +1238,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1232
1238
  [](common_params & params, const std::string & value) {
1233
1239
  const auto sampler_names = string_split<std::string>(value, ';');
1234
1240
  params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
1241
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS;
1235
1242
  }
1236
1243
  ).set_sparam());
1237
1244
  add_opt(common_arg(
@@ -1261,6 +1268,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1261
1268
  [](common_params & params, const std::string & value) {
1262
1269
  params.sampling.temp = std::stof(value);
1263
1270
  params.sampling.temp = std::max(params.sampling.temp, 0.0f);
1271
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP;
1264
1272
  }
1265
1273
  ).set_sparam());
1266
1274
  add_opt(common_arg(
@@ -1268,6 +1276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1268
1276
  string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k),
1269
1277
  [](common_params & params, int value) {
1270
1278
  params.sampling.top_k = value;
1279
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K;
1271
1280
  }
1272
1281
  ).set_sparam());
1273
1282
  add_opt(common_arg(
@@ -1275,6 +1284,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1275
1284
  string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
1276
1285
  [](common_params & params, const std::string & value) {
1277
1286
  params.sampling.top_p = std::stof(value);
1287
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
1278
1288
  }
1279
1289
  ).set_sparam());
1280
1290
  add_opt(common_arg(
@@ -1282,6 +1292,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1282
1292
  string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
1283
1293
  [](common_params & params, const std::string & value) {
1284
1294
  params.sampling.min_p = std::stof(value);
1295
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
1285
1296
  }
1286
1297
  ).set_sparam());
1287
1298
  add_opt(common_arg(
@@ -1296,6 +1307,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1296
1307
  string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
1297
1308
  [](common_params & params, const std::string & value) {
1298
1309
  params.sampling.xtc_probability = std::stof(value);
1310
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
1299
1311
  }
1300
1312
  ).set_sparam());
1301
1313
  add_opt(common_arg(
@@ -1303,6 +1315,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1303
1315
  string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
1304
1316
  [](common_params & params, const std::string & value) {
1305
1317
  params.sampling.xtc_threshold = std::stof(value);
1318
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
1306
1319
  }
1307
1320
  ).set_sparam());
1308
1321
  add_opt(common_arg(
@@ -1321,6 +1334,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1321
1334
  }
1322
1335
  params.sampling.penalty_last_n = value;
1323
1336
  params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
1337
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N;
1324
1338
  }
1325
1339
  ).set_sparam());
1326
1340
  add_opt(common_arg(
@@ -1328,6 +1342,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1328
1342
  string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
1329
1343
  [](common_params & params, const std::string & value) {
1330
1344
  params.sampling.penalty_repeat = std::stof(value);
1345
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
1331
1346
  }
1332
1347
  ).set_sparam());
1333
1348
  add_opt(common_arg(
@@ -1425,6 +1440,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1425
1440
  "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat),
1426
1441
  [](common_params & params, int value) {
1427
1442
  params.sampling.mirostat = value;
1443
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT;
1428
1444
  }
1429
1445
  ).set_sparam());
1430
1446
  add_opt(common_arg(
@@ -1432,6 +1448,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1432
1448
  string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
1433
1449
  [](common_params & params, const std::string & value) {
1434
1450
  params.sampling.mirostat_eta = std::stof(value);
1451
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
1435
1452
  }
1436
1453
  ).set_sparam());
1437
1454
  add_opt(common_arg(
@@ -1439,6 +1456,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1439
1456
  string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
1440
1457
  [](common_params & params, const std::string & value) {
1441
1458
  params.sampling.mirostat_tau = std::stof(value);
1459
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
1442
1460
  }
1443
1461
  ).set_sparam());
1444
1462
  add_opt(common_arg(
@@ -2476,11 +2494,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2476
2494
  ).set_examples({LLAMA_EXAMPLE_SERVER}));
2477
2495
  add_opt(common_arg(
2478
2496
  {"--jinja"},
2479
- "use jinja template for chat (default: disabled)",
2497
+ string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2480
2498
  [](common_params & params) {
2481
2499
  params.use_jinja = true;
2482
2500
  }
2483
2501
  ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
2502
+ add_opt(common_arg(
2503
+ {"--no-jinja"},
2504
+ string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2505
+ [](common_params & params) {
2506
+ params.use_jinja = false;
2507
+ }
2508
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_NO_JINJA"));
2484
2509
  add_opt(common_arg(
2485
2510
  {"--reasoning-format"}, "FORMAT",
2486
2511
  "controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"