@fugood/llama.node 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +47 -8
  19. package/lib/index.js +21 -1
  20. package/lib/index.ts +31 -1
  21. package/package.json +12 -3
  22. package/src/LlamaCompletionWorker.cpp +33 -6
  23. package/src/LlamaCompletionWorker.h +3 -1
  24. package/src/LlamaContext.cpp +336 -28
  25. package/src/LlamaContext.h +2 -0
  26. package/src/common.hpp +19 -2
  27. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  29. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  31. package/src/llama.cpp/CMakeLists.txt +10 -19
  32. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  33. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  34. package/src/llama.cpp/common/arg.cpp +66 -16
  35. package/src/llama.cpp/common/chat-template.hpp +515 -0
  36. package/src/llama.cpp/common/chat.cpp +966 -0
  37. package/src/llama.cpp/common/chat.hpp +52 -0
  38. package/src/llama.cpp/common/common.cpp +159 -36
  39. package/src/llama.cpp/common/common.h +56 -14
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  41. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  42. package/src/llama.cpp/common/llguidance.cpp +270 -0
  43. package/src/llama.cpp/common/log.cpp +1 -10
  44. package/src/llama.cpp/common/log.h +10 -0
  45. package/src/llama.cpp/common/minja.hpp +2868 -0
  46. package/src/llama.cpp/common/sampling.cpp +22 -1
  47. package/src/llama.cpp/common/sampling.h +3 -0
  48. package/src/llama.cpp/docs/build.md +54 -9
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  50. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  51. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  52. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  54. package/src/llama.cpp/examples/llava/clip.h +2 -0
  55. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  56. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  57. package/src/llama.cpp/examples/main/main.cpp +26 -25
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  59. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  60. package/src/llama.cpp/examples/run/run.cpp +224 -69
  61. package/src/llama.cpp/examples/server/server.cpp +252 -81
  62. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  63. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  64. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  65. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  66. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  67. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  68. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  71. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  73. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  74. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  75. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  77. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  81. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  82. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  83. package/src/llama.cpp/include/llama.h +14 -1
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  85. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  86. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  87. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  88. package/src/llama.cpp/src/llama-arch.h +3 -1
  89. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  90. package/src/llama.cpp/src/llama-chat.h +1 -0
  91. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  92. package/src/llama.cpp/src/llama-grammar.h +22 -1
  93. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  94. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  95. package/src/llama.cpp/src/llama-model.cpp +76 -6
  96. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  97. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  98. package/src/llama.cpp/src/llama.cpp +181 -123
  99. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  100. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  101. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  102. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  103. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  104. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  105. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  106. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
@@ -17,7 +17,7 @@ jobs:
17
17
  steps:
18
18
  - uses: actions/stale@v5
19
19
  with:
20
- exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
20
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
21
21
  days-before-issue-stale: 30
22
22
  days-before-issue-close: 14
23
23
  stale-issue-label: "stale"
@@ -28,10 +28,11 @@ jobs:
28
28
  push_to_registry:
29
29
  name: Push Docker image to Docker Hub
30
30
 
31
- runs-on: ubuntu-latest
31
+ runs-on: ubuntu-22.04
32
32
  env:
33
33
  COMMIT_SHA: ${{ github.sha }}
34
34
  strategy:
35
+ fail-fast: false
35
36
  matrix:
36
37
  config:
37
38
  # Multi-stage build
@@ -81,13 +81,36 @@ jobs:
81
81
  with:
82
82
  node-version: '22.11.0'
83
83
 
84
+ - name: WebUI - Install dependencies
85
+ id: webui_lint
86
+ run: |
87
+ cd examples/server/webui
88
+ npm ci
89
+
90
+ - name: WebUI - Check code format
91
+ id: webui_format
92
+ run: |
93
+ git config --global --add safe.directory $(realpath .)
94
+ cd examples/server/webui
95
+ git status
96
+
97
+ npm run format
98
+ git status
99
+ modified_files="$(git status -s)"
100
+ echo "Modified files: ${modified_files}"
101
+ if [ -n "${modified_files}" ]; then
102
+ echo "Files do not follow coding style. To fix: npm run format"
103
+ echo "${modified_files}"
104
+ exit 1
105
+ fi
106
+
84
107
  - name: Verify bundled index.html
85
108
  id: verify_server_index_html
86
109
  run: |
87
110
  git config --global --add safe.directory $(realpath .)
88
111
  cd examples/server/webui
89
112
  git status
90
- npm ci
113
+
91
114
  npm run build
92
115
  git status
93
116
  modified_files="$(git status -s)"
@@ -205,7 +228,7 @@ jobs:
205
228
  run: |
206
229
  cd examples/server/tests
207
230
  $env:PYTHONIOENCODING = ":replace"
208
- pytest -v -x
231
+ pytest -v -x -m "not slow"
209
232
 
210
233
  - name: Slow tests
211
234
  id: server_integration_tests_slow
@@ -16,6 +16,7 @@ endif()
16
16
  list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
17
17
 
18
18
  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
19
+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
19
20
 
20
21
  if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
21
22
  set(LLAMA_STANDALONE ON)
@@ -49,6 +50,8 @@ endif()
49
50
  if (MSVC)
50
51
  add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
51
52
  add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
53
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
54
+ add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
52
55
  endif()
53
56
 
54
57
  #
@@ -77,6 +80,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
77
80
 
78
81
  # 3rd party libs
79
82
  option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
83
+ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
80
84
 
81
85
  # Required for relocatable CMake package
82
86
  include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
@@ -185,27 +189,14 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
185
189
  set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
186
190
  set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
187
191
 
188
- # At the moment some compile definitions are placed within the ggml/src
189
- # directory but not exported on the `ggml` target. This could be improved by
190
- # determining _precisely_ which defines are necessary for the llama-config
191
- # package.
192
- #
193
- set(GGML_TRANSIENT_DEFINES)
194
- get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
195
- get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
196
- if (GGML_DIR_DEFINES)
197
- list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
198
- endif()
199
- get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
200
- if (GGML_TARGET_DEFINES)
201
- list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
202
- endif()
203
- get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
204
- # all public headers
205
192
  set(LLAMA_PUBLIC_HEADERS
206
193
  ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
207
194
  ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
208
- set_target_properties(llama PROPERTIES PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
195
+
196
+ set_target_properties(llama
197
+ PROPERTIES
198
+ PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
199
+
209
200
  install(TARGETS llama LIBRARY PUBLIC_HEADER)
210
201
 
211
202
  configure_package_config_file(
@@ -242,4 +233,4 @@ configure_file(cmake/llama.pc.in
242
233
  @ONLY)
243
234
 
244
235
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
245
- DESTINATION lib/pkgconfig)
236
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
@@ -44,7 +44,7 @@ if(MSVC)
44
44
  set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
45
45
  else()
46
46
  execute_process(
47
- COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
47
+ COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
48
48
  OUTPUT_VARIABLE OUT
49
49
  OUTPUT_STRIP_TRAILING_WHITESPACE
50
50
  )
@@ -56,14 +56,19 @@ add_library(${TARGET} STATIC
56
56
  arg.cpp
57
57
  arg.h
58
58
  base64.hpp
59
+ chat.cpp
60
+ chat.hpp
61
+ chat-template.hpp
59
62
  common.cpp
60
63
  common.h
61
64
  console.cpp
62
65
  console.h
63
66
  json-schema-to-grammar.cpp
64
67
  json.hpp
68
+ llguidance.cpp
65
69
  log.cpp
66
70
  log.h
71
+ minja.hpp
67
72
  ngram-cache.cpp
68
73
  ngram-cache.h
69
74
  sampling.cpp
@@ -87,6 +92,33 @@ if (LLAMA_CURL)
87
92
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
88
93
  endif ()
89
94
 
95
+ if (LLAMA_LLGUIDANCE)
96
+ include(ExternalProject)
97
+ set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
98
+ set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
99
+ ExternalProject_Add(llguidance_ext
100
+ GIT_REPOSITORY https://github.com/guidance-ai/llguidance
101
+ # v0.6.12:
102
+ GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
103
+ PREFIX ${CMAKE_BINARY_DIR}/llguidance
104
+ SOURCE_DIR ${LLGUIDANCE_SRC}
105
+ BUILD_IN_SOURCE TRUE
106
+ CONFIGURE_COMMAND ""
107
+ BUILD_COMMAND cargo build --release
108
+ INSTALL_COMMAND ""
109
+ BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
110
+ UPDATE_COMMAND ""
111
+ )
112
+ target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
113
+
114
+ add_library(llguidance STATIC IMPORTED)
115
+ set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
116
+ add_dependencies(llguidance llguidance_ext)
117
+
118
+ target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
119
+ set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
120
+ endif ()
121
+
90
122
  target_include_directories(${TARGET} PUBLIC .)
91
123
  target_compile_features (${TARGET} PUBLIC cxx_std_17)
92
124
  target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
@@ -133,7 +133,8 @@ static void common_params_handle_model_default(
133
133
  const std::string & model_url,
134
134
  std::string & hf_repo,
135
135
  std::string & hf_file,
136
- const std::string & hf_token) {
136
+ const std::string & hf_token,
137
+ const std::string & model_default) {
137
138
  if (!hf_repo.empty()) {
138
139
  // short-hand to avoid specifying --hf-file -> default it to --model
139
140
  if (hf_file.empty()) {
@@ -163,7 +164,7 @@ static void common_params_handle_model_default(
163
164
  model = fs_get_cache_file(string_split<std::string>(f, '/').back());
164
165
  }
165
166
  } else if (model.empty()) {
166
- model = DEFAULT_MODEL_PATH;
167
+ model = model_default;
167
168
  }
168
169
  }
169
170
 
@@ -299,8 +300,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
299
300
  }
300
301
 
301
302
  // TODO: refactor model params in a common struct
302
- common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
303
- common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
303
+ common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH);
304
+ common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, "");
305
+ common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, "");
304
306
 
305
307
  if (params.escape) {
306
308
  string_process_escapes(params.prompt);
@@ -323,6 +325,14 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
323
325
  throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both");
324
326
  }
325
327
 
328
+ if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) {
329
+ throw std::runtime_error(string_format(
330
+ "error: the supplied chat template is not supported: %s%s\n",
331
+ params.chat_template.c_str(),
332
+ params.use_jinja ? "" : "\nnote: llama.cpp was started without --jinja, we only support commonly used templates"
333
+ ));
334
+ }
335
+
326
336
  return true;
327
337
  }
328
338
 
@@ -867,7 +877,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
867
877
  [](common_params & params) {
868
878
  params.warmup = false;
869
879
  }
870
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
880
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING}));
871
881
  add_opt(common_arg(
872
882
  {"--spm-infill"},
873
883
  string_format(
@@ -1455,15 +1465,28 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1455
1465
  {"--list-devices"},
1456
1466
  "print list of available devices and exit",
1457
1467
  [](common_params &) {
1458
- printf("Available devices:\n");
1468
+ std::vector<ggml_backend_dev_t> rpc_devices;
1469
+ std::vector<ggml_backend_dev_t> all_devices;
1459
1470
  for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
1460
1471
  auto * dev = ggml_backend_dev_get(i);
1461
1472
  if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
1462
- size_t free, total;
1463
- ggml_backend_dev_memory(dev, &free, &total);
1464
- printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
1473
+ ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
1474
+ if (ggml_backend_reg_name(reg) == std::string("RPC")) {
1475
+ rpc_devices.push_back(dev);
1476
+ } else {
1477
+ all_devices.push_back(dev);
1478
+ }
1465
1479
  }
1466
1480
  }
1481
+ // insert RPC devices in front
1482
+ all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end());
1483
+ printf("Available devices:\n");
1484
+ for (size_t i = 0; i < all_devices.size(); ++i) {
1485
+ auto * dev = all_devices[i];
1486
+ size_t free, total;
1487
+ ggml_backend_dev_memory(dev, &free, &total);
1488
+ printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
1489
+ }
1467
1490
  exit(0);
1468
1491
  }
1469
1492
  ));
@@ -1629,6 +1652,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1629
1652
  params.hf_repo = value;
1630
1653
  }
1631
1654
  ).set_env("LLAMA_ARG_HF_REPO"));
1655
+ add_opt(common_arg(
1656
+ {"-hfd", "-hfrd", "--hf-repo-draft"}, "<user>/<model>[:quant]",
1657
+ "Same as --hf-repo, but for the draft model (default: unused)",
1658
+ [](common_params & params, const std::string & value) {
1659
+ params.speculative.hf_repo = value;
1660
+ }
1661
+ ).set_env("LLAMA_ARG_HFD_REPO"));
1632
1662
  add_opt(common_arg(
1633
1663
  {"-hff", "--hf-file"}, "FILE",
1634
1664
  "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
@@ -1938,24 +1968,44 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1938
1968
  }
1939
1969
  }
1940
1970
  ).set_examples({LLAMA_EXAMPLE_SERVER}));
1971
+ add_opt(common_arg(
1972
+ {"--jinja"},
1973
+ "use jinja template for chat (default: disabled)",
1974
+ [](common_params & params) {
1975
+ params.use_jinja = true;
1976
+ }
1977
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
1941
1978
  add_opt(common_arg(
1942
1979
  {"--chat-template"}, "JINJA_TEMPLATE",
1943
1980
  string_format(
1944
1981
  "set custom jinja chat template (default: template taken from model's metadata)\n"
1945
1982
  "if suffix/prefix are specified, template will be disabled\n"
1983
+ "only commonly used templates are accepted (unless --jinja is set before this flag):\n"
1946
1984
  "list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
1947
1985
  ),
1948
1986
  [](common_params & params, const std::string & value) {
1949
- if (!common_chat_verify_template(value)) {
1950
- throw std::runtime_error(string_format(
1951
- "error: the supplied chat template is not supported: %s\n"
1952
- "note: llama.cpp does not use jinja parser, we only support commonly used templates\n",
1953
- value.c_str()
1954
- ));
1955
- }
1956
1987
  params.chat_template = value;
1957
1988
  }
1958
1989
  ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
1990
+ add_opt(common_arg(
1991
+ {"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
1992
+ string_format(
1993
+ "set custom jinja chat template file (default: template taken from model's metadata)\n"
1994
+ "if suffix/prefix are specified, template will be disabled\n"
1995
+ "only commonly used templates are accepted (unless --jinja is set before this flag):\n"
1996
+ "list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
1997
+ ),
1998
+ [](common_params & params, const std::string & value) {
1999
+ std::ifstream file(value);
2000
+ if (!file) {
2001
+ throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
2002
+ }
2003
+ std::copy(
2004
+ std::istreambuf_iterator<char>(file),
2005
+ std::istreambuf_iterator<char>(),
2006
+ std::back_inserter(params.chat_template));
2007
+ }
2008
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
1959
2009
  add_opt(common_arg(
1960
2010
  {"-sps", "--slot-prompt-similarity"}, "SIMILARITY",
1961
2011
  string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity),