cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,587 +1,640 @@
1
- #include "llama-chat.h"
2
-
3
- #include "llama.h"
4
-
5
- #include <map>
6
- #include <sstream>
7
-
8
- #if __cplusplus >= 202000L
9
- #define LU8(x) (const char*)(u8##x)
10
- #else
11
- #define LU8(x) u8##x
12
- #endif
13
-
14
- // trim whitespace from the beginning and end of a string
15
- static std::string trim(const std::string & str) {
16
- size_t start = 0;
17
- size_t end = str.size();
18
- while (start < end && isspace(str[start])) {
19
- start += 1;
20
- }
21
- while (end > start && isspace(str[end - 1])) {
22
- end -= 1;
23
- }
24
- return str.substr(start, end - start);
25
- }
26
-
27
- static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
28
- { "chatml", LLM_CHAT_TEMPLATE_CHATML },
29
- { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
30
- { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
31
- { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
32
- { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
33
- { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
34
- { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
35
- { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
36
- { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
37
- { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
38
- { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
39
- { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
40
- { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
41
- { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
42
- { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
43
- { "orion", LLM_CHAT_TEMPLATE_ORION },
44
- { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
45
- { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
46
- { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
47
- { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
48
- { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
49
- { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
50
- { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
51
- { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
52
- { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
53
- { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
54
- { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
55
- { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
56
- { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
57
- { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
58
- { "granite", LLM_CHAT_TEMPLATE_GRANITE },
59
- { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
60
- { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
61
- };
62
-
63
- llm_chat_template llm_chat_template_from_str(const std::string & name) {
64
- return LLM_CHAT_TEMPLATES.at(name);
65
- }
66
-
67
- llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
68
- try {
69
- return llm_chat_template_from_str(tmpl);
70
- } catch (const std::out_of_range &) {
71
- // ignore
72
- }
73
-
74
- auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
75
- return tmpl.find(haystack) != std::string::npos;
76
- };
77
- if (tmpl_contains("<|im_start|>")) {
78
- return tmpl_contains("<|im_sep|>")
79
- ? LLM_CHAT_TEMPLATE_PHI_4
80
- : LLM_CHAT_TEMPLATE_CHATML;
81
- } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
82
- if (tmpl_contains("[SYSTEM_PROMPT]")) {
83
- return LLM_CHAT_TEMPLATE_MISTRAL_V7;
84
- } else if (
85
- // catches official 'v1' template
86
- tmpl_contains("' [INST] ' + system_message")
87
- // catches official 'v3' and 'v3-tekken' templates
88
- || tmpl_contains("[AVAILABLE_TOOLS]")
89
- ) {
90
- // Official mistral 'v1', 'v3' and 'v3-tekken' templates
91
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
92
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
93
- if (tmpl_contains(" [INST]")) {
94
- return LLM_CHAT_TEMPLATE_MISTRAL_V1;
95
- } else if (tmpl_contains("\"[INST]\"")) {
96
- return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
97
- }
98
- return LLM_CHAT_TEMPLATE_MISTRAL_V3;
99
- } else {
100
- // llama2 template and its variants
101
- // [variant] support system message
102
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
103
- bool support_system_message = tmpl_contains("<<SYS>>");
104
- bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
105
- bool strip_message = tmpl_contains("content.strip()");
106
- if (strip_message) {
107
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
108
- } else if (add_bos_inside_history) {
109
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
110
- } else if (support_system_message) {
111
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
112
- } else {
113
- return LLM_CHAT_TEMPLATE_LLAMA_2;
114
- }
115
- }
116
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
117
- return LLM_CHAT_TEMPLATE_PHI_3;
118
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
119
- return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
120
- } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
121
- return LLM_CHAT_TEMPLATE_ZEPHYR;
122
- } else if (tmpl_contains("bos_token + message['role']")) {
123
- return LLM_CHAT_TEMPLATE_MONARCH;
124
- } else if (tmpl_contains("<start_of_turn>")) {
125
- return LLM_CHAT_TEMPLATE_GEMMA;
126
- } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
127
- // OrionStarAI/Orion-14B-Chat
128
- return LLM_CHAT_TEMPLATE_ORION;
129
- } else if (tmpl_contains("GPT4 Correct ")) {
130
- // openchat/openchat-3.5-0106
131
- return LLM_CHAT_TEMPLATE_OPENCHAT;
132
- } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
133
- // eachadea/vicuna-13b-1.1 (and Orca variant)
134
- if (tmpl_contains("SYSTEM: ")) {
135
- return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
136
- }
137
- return LLM_CHAT_TEMPLATE_VICUNA;
138
- } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
139
- // deepseek-ai/deepseek-coder-33b-instruct
140
- return LLM_CHAT_TEMPLATE_DEEPSEEK;
141
- } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
142
- // CohereForAI/c4ai-command-r-plus
143
- return LLM_CHAT_TEMPLATE_COMMAND_R;
144
- } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
145
- return LLM_CHAT_TEMPLATE_LLAMA_3;
146
- } else if (tmpl_contains("[gMASK]sop")) {
147
- // chatglm3-6b
148
- return LLM_CHAT_TEMPLATE_CHATGML_3;
149
- } else if (tmpl_contains("[gMASK]<sop>")) {
150
- return LLM_CHAT_TEMPLATE_CHATGML_4;
151
- } else if (tmpl_contains(LU8("<用户>"))) {
152
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
153
- return LLM_CHAT_TEMPLATE_MINICPM;
154
- } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
155
- return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
156
- } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
157
- return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
158
- } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
159
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
160
- // EXAONE-3.0-7.8B-Instruct
161
- return LLM_CHAT_TEMPLATE_EXAONE_3;
162
- } else if (tmpl_contains("rwkv-world")) {
163
- return LLM_CHAT_TEMPLATE_RWKV_WORLD;
164
- } else if (tmpl_contains("<|start_of_role|>")) {
165
- return LLM_CHAT_TEMPLATE_GRANITE;
166
- } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
167
- return LLM_CHAT_TEMPLATE_GIGACHAT;
168
- } else if (tmpl_contains("<|role_start|>")) {
169
- return LLM_CHAT_TEMPLATE_MEGREZ;
170
- }
171
- return LLM_CHAT_TEMPLATE_UNKNOWN;
172
- }
173
-
174
- // Simple version of "llama_apply_chat_template" that only works with strings
175
- // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
176
- int32_t llm_chat_apply_template(
177
- llm_chat_template tmpl,
178
- const std::vector<const llama_chat_message *> & chat,
179
- std::string & dest, bool add_ass) {
180
- // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
181
- std::stringstream ss;
182
- if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
183
- // chatml template
184
- for (auto message : chat) {
185
- ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
186
- }
187
- if (add_ass) {
188
- ss << "<|im_start|>assistant\n";
189
- }
190
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
191
- // Official mistral 'v7' template
192
- // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
193
- for (auto message : chat) {
194
- std::string role(message->role);
195
- std::string content(message->content);
196
- if (role == "system") {
197
- ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
198
- } else if (role == "user") {
199
- ss << "[INST] " << content << "[/INST]";
200
- }
201
- else {
202
- ss << " " << content << "</s>";
203
- }
204
- }
205
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
206
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
207
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
208
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
209
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
210
- std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
211
- std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
212
- bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
213
- bool is_inside_turn = false;
214
- for (auto message : chat) {
215
- if (!is_inside_turn) {
216
- ss << leading_space << "[INST]" << trailing_space;
217
- is_inside_turn = true;
218
- }
219
- std::string role(message->role);
220
- std::string content(message->content);
221
- if (role == "system") {
222
- ss << content << "\n\n";
223
- } else if (role == "user") {
224
- ss << content << leading_space << "[/INST]";
225
- } else {
226
- ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
227
- is_inside_turn = false;
228
- }
229
- }
230
- } else if (
231
- tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
232
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
233
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
234
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
235
- // llama2 template and its variants
236
- // [variant] support system message
237
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
238
- bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
239
- // [variant] add BOS inside history
240
- bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
241
- // [variant] trim spaces from the input message
242
- bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
243
- // construct the prompt
244
- bool is_inside_turn = true; // skip BOS at the beginning
245
- ss << "[INST] ";
246
- for (auto message : chat) {
247
- std::string content = strip_message ? trim(message->content) : message->content;
248
- std::string role(message->role);
249
- if (!is_inside_turn) {
250
- is_inside_turn = true;
251
- ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
252
- }
253
- if (role == "system") {
254
- if (support_system_message) {
255
- ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
256
- } else {
257
- // if the model does not support system message, we still include it in the first message, but without <<SYS>>
258
- ss << content << "\n";
259
- }
260
- } else if (role == "user") {
261
- ss << content << " [/INST]";
262
- } else {
263
- ss << content << "</s>";
264
- is_inside_turn = false;
265
- }
266
- }
267
- } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
268
- // Phi 3
269
- for (auto message : chat) {
270
- std::string role(message->role);
271
- ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
272
- }
273
- if (add_ass) {
274
- ss << "<|assistant|>\n";
275
- }
276
- } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
277
- // chatml template
278
- for (auto message : chat) {
279
- ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
280
- }
281
- if (add_ass) {
282
- ss << "<|im_start|>assistant<|im_sep|>";
283
- }
284
- } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
285
- // Falcon 3
286
- for (auto message : chat) {
287
- std::string role(message->role);
288
- ss << "<|" << role << "|>\n" << message->content << "\n";
289
- }
290
- if (add_ass) {
291
- ss << "<|assistant|>\n";
292
- }
293
- } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
294
- // zephyr template
295
- for (auto message : chat) {
296
- ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
297
- }
298
- if (add_ass) {
299
- ss << "<|assistant|>\n";
300
- }
301
- } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
302
- // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
303
- for (auto message : chat) {
304
- std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
305
- ss << bos << message->role << "\n" << message->content << "</s>\n";
306
- }
307
- if (add_ass) {
308
- ss << "<s>assistant\n";
309
- }
310
- } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
311
- // google/gemma-7b-it
312
- std::string system_prompt = "";
313
- for (auto message : chat) {
314
- std::string role(message->role);
315
- if (role == "system") {
316
- // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
317
- system_prompt = trim(message->content);
318
- continue;
319
- }
320
- // in gemma, "assistant" is "model"
321
- role = role == "assistant" ? "model" : message->role;
322
- ss << "<start_of_turn>" << role << "\n";
323
- if (!system_prompt.empty() && role != "model") {
324
- ss << system_prompt << "\n\n";
325
- system_prompt = "";
326
- }
327
- ss << trim(message->content) << "<end_of_turn>\n";
328
- }
329
- if (add_ass) {
330
- ss << "<start_of_turn>model\n";
331
- }
332
- } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
333
- // OrionStarAI/Orion-14B-Chat
334
- std::string system_prompt = "";
335
- for (auto message : chat) {
336
- std::string role(message->role);
337
- if (role == "system") {
338
- // there is no system message support, we will merge it with user prompt
339
- system_prompt = message->content;
340
- continue;
341
- } else if (role == "user") {
342
- ss << "Human: ";
343
- if (!system_prompt.empty()) {
344
- ss << system_prompt << "\n\n";
345
- system_prompt = "";
346
- }
347
- ss << message->content << "\n\nAssistant: </s>";
348
- } else {
349
- ss << message->content << "</s>";
350
- }
351
- }
352
- } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
353
- // openchat/openchat-3.5-0106,
354
- for (auto message : chat) {
355
- std::string role(message->role);
356
- if (role == "system") {
357
- ss << message->content << "<|end_of_turn|>";
358
- } else {
359
- role[0] = toupper(role[0]);
360
- ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
361
- }
362
- }
363
- if (add_ass) {
364
- ss << "GPT4 Correct Assistant:";
365
- }
366
- } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
367
- // eachadea/vicuna-13b-1.1 (and Orca variant)
368
- for (auto message : chat) {
369
- std::string role(message->role);
370
- if (role == "system") {
371
- // Orca-Vicuna variant uses a system prefix
372
- if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
373
- ss << "SYSTEM: " << message->content << "\n";
374
- } else {
375
- ss << message->content << "\n\n";
376
- }
377
- } else if (role == "user") {
378
- ss << "USER: " << message->content << "\n";
379
- } else if (role == "assistant") {
380
- ss << "ASSISTANT: " << message->content << "</s>\n";
381
- }
382
- }
383
- if (add_ass) {
384
- ss << "ASSISTANT:";
385
- }
386
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
387
- // deepseek-ai/deepseek-coder-33b-instruct
388
- for (auto message : chat) {
389
- std::string role(message->role);
390
- if (role == "system") {
391
- ss << message->content;
392
- } else if (role == "user") {
393
- ss << "### Instruction:\n" << message->content << "\n";
394
- } else if (role == "assistant") {
395
- ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
396
- }
397
- }
398
- if (add_ass) {
399
- ss << "### Response:\n";
400
- }
401
- } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
402
- // CohereForAI/c4ai-command-r-plus
403
- for (auto message : chat) {
404
- std::string role(message->role);
405
- if (role == "system") {
406
- ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
407
- } else if (role == "user") {
408
- ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
409
- } else if (role == "assistant") {
410
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
411
- }
412
- }
413
- if (add_ass) {
414
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
415
- }
416
- } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
417
- // Llama 3
418
- for (auto message : chat) {
419
- std::string role(message->role);
420
- ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
421
- }
422
- if (add_ass) {
423
- ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
424
- }
425
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
426
- // chatglm3-6b
427
- ss << "[gMASK]" << "sop";
428
- for (auto message : chat) {
429
- std::string role(message->role);
430
- ss << "<|" << role << "|>" << "\n " << message->content;
431
- }
432
- if (add_ass) {
433
- ss << "<|assistant|>";
434
- }
435
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
436
- ss << "[gMASK]" << "<sop>";
437
- for (auto message : chat) {
438
- std::string role(message->role);
439
- ss << "<|" << role << "|>" << "\n" << message->content;
440
- }
441
- if (add_ass) {
442
- ss << "<|assistant|>";
443
- }
444
- } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
445
- for (auto message : chat) {
446
- std::string role(message->role);
447
- ss << "<|" << role << "|>" << "\n" << message->content;
448
- }
449
- if (add_ass) {
450
- ss << "<|assistant|>";
451
- }
452
- } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
453
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
454
- for (auto message : chat) {
455
- std::string role(message->role);
456
- if (role == "user") {
457
- ss << LU8("<用户>");
458
- ss << trim(message->content);
459
- ss << "<AI>";
460
- } else {
461
- ss << trim(message->content);
462
- }
463
- }
464
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
465
- // DeepSeek-V2
466
- for (auto message : chat) {
467
- std::string role(message->role);
468
- if (role == "system") {
469
- ss << message->content << "\n\n";
470
- } else if (role == "user") {
471
- ss << "User: " << message->content << "\n\n";
472
- } else if (role == "assistant") {
473
- ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
474
- }
475
- }
476
- if (add_ass) {
477
- ss << "Assistant:";
478
- }
479
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
480
- // DeepSeek-V3
481
- for (auto message : chat) {
482
- std::string role(message->role);
483
- if (role == "system") {
484
- ss << message->content << "\n\n";
485
- } else if (role == "user") {
486
- ss << LU8("<|User|>") << message->content;
487
- } else if (role == "assistant") {
488
- ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
489
- }
490
- }
491
- if (add_ass) {
492
- ss << LU8("<|Assistant|>");
493
- }
494
- } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
495
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
496
- // EXAONE-3.0-7.8B-Instruct
497
- for (auto message : chat) {
498
- std::string role(message->role);
499
- if (role == "system") {
500
- ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
501
- } else if (role == "user") {
502
- ss << "[|user|]" << trim(message->content) << "\n";
503
- } else if (role == "assistant") {
504
- ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
505
- }
506
- }
507
- if (add_ass) {
508
- ss << "[|assistant|]";
509
- }
510
- } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
511
- // this template requires the model to have "\n\n" as EOT token
512
- for (auto message : chat) {
513
- std::string role(message->role);
514
- if (role == "user") {
515
- ss << "User: " << message->content << "\n\nAssistant:";
516
- } else {
517
- ss << message->content << "\n\n";
518
- }
519
- }
520
- } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
521
- // IBM Granite template
522
- for (const auto & message : chat) {
523
- std::string role(message->role);
524
- ss << "<|start_of_role|>" << role << "<|end_of_role|>";
525
- if (role == "assistant_tool_call") {
526
- ss << "<|tool_call|>";
527
- }
528
- ss << message->content << "<|end_of_text|>\n";
529
- }
530
- if (add_ass) {
531
- ss << "<|start_of_role|>assistant<|end_of_role|>\n";
532
- }
533
- } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
534
- // GigaChat template
535
- bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
536
-
537
- // Handle system message if present
538
- if (has_system) {
539
- ss << "<s>" << chat[0]->content << "<|message_sep|>";
540
- } else {
541
- ss << "<s>";
542
- }
543
-
544
- // Process remaining messages
545
- for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
546
- std::string role(chat[i]->role);
547
- if (role == "user") {
548
- ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
549
- << "available functions<|role_sep|>[]<|message_sep|>";
550
- } else if (role == "assistant") {
551
- ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
552
- }
553
- }
554
-
555
- // Add generation prompt if needed
556
- if (add_ass) {
557
- ss << "assistant<|role_sep|>";
558
- }
559
- } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
560
- // Megrez template
561
- for (auto message : chat) {
562
- std::string role(message->role);
563
- ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
564
- }
565
-
566
- if (add_ass) {
567
- ss << "<|role_start|>assistant<|role_end|>";
568
- }
569
- } else {
570
- // template not supported
571
- return -1;
572
- }
573
- dest = ss.str();
574
- return dest.size();
575
- }
576
-
577
- // public interface
578
-
579
- int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
580
- auto it = LLM_CHAT_TEMPLATES.begin();
581
- for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
582
- output[i] = it->first.c_str();
583
- std::advance(it, 1);
584
- }
585
- return (int32_t) LLM_CHAT_TEMPLATES.size();
586
- }
587
-
1
+ #include "llama-chat.h"
2
+
3
+ #include "llama.h"
4
+
5
+ #include <map>
6
+ #include <sstream>
7
+ #include <algorithm>
8
+
9
+ #if __cplusplus >= 202000L
10
+ #define LU8(x) (const char*)(u8##x)
11
+ #else
12
+ #define LU8(x) u8##x
13
+ #endif
14
+
15
+ // trim whitespace from the beginning and end of a string
16
+ static std::string trim(const std::string & str) {
17
+ size_t start = 0;
18
+ size_t end = str.size();
19
+ while (start < end && isspace(str[start])) {
20
+ start += 1;
21
+ }
22
+ while (end > start && isspace(str[end - 1])) {
23
+ end -= 1;
24
+ }
25
+ return str.substr(start, end - start);
26
+ }
27
+
28
+ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29
+ { "chatml", LLM_CHAT_TEMPLATE_CHATML },
30
+ { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
31
+ { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
32
+ { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
33
+ { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34
+ { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
35
+ { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36
+ { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37
+ { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38
+ { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
39
+ { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
40
+ { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
41
+ { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
42
+ { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
43
+ { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
44
+ { "orion", LLM_CHAT_TEMPLATE_ORION },
45
+ { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
46
+ { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
47
+ { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
48
+ { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
49
+ { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
50
+ { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
51
+ { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
52
+ { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
53
+ { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
54
+ { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
55
+ { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
56
+ { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
57
+ { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
58
+ { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
59
+ { "granite", LLM_CHAT_TEMPLATE_GRANITE },
60
+ { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
61
+ { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
62
+ { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
63
+ { "bailing", LLM_CHAT_TEMPLATE_BAILING },
64
+ { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
65
+ };
66
+
67
+ llm_chat_template llm_chat_template_from_str(const std::string & name) {
68
+ return LLM_CHAT_TEMPLATES.at(name);
69
+ }
70
+
71
+ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
72
+ try {
73
+ return llm_chat_template_from_str(tmpl);
74
+ } catch (const std::out_of_range &) {
75
+ // ignore
76
+ }
77
+
78
+ auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
79
+ return tmpl.find(haystack) != std::string::npos;
80
+ };
81
+ if (tmpl_contains("<|im_start|>")) {
82
+ return tmpl_contains("<|im_sep|>")
83
+ ? LLM_CHAT_TEMPLATE_PHI_4
84
+ : LLM_CHAT_TEMPLATE_CHATML;
85
+ } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
86
+ if (tmpl_contains("[SYSTEM_PROMPT]")) {
87
+ return LLM_CHAT_TEMPLATE_MISTRAL_V7;
88
+ } else if (
89
+ // catches official 'v1' template
90
+ tmpl_contains("' [INST] ' + system_message")
91
+ // catches official 'v3' and 'v3-tekken' templates
92
+ || tmpl_contains("[AVAILABLE_TOOLS]")
93
+ ) {
94
+ // Official mistral 'v1', 'v3' and 'v3-tekken' templates
95
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
96
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
97
+ if (tmpl_contains(" [INST]")) {
98
+ return LLM_CHAT_TEMPLATE_MISTRAL_V1;
99
+ } else if (tmpl_contains("\"[INST]\"")) {
100
+ return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
101
+ }
102
+ return LLM_CHAT_TEMPLATE_MISTRAL_V3;
103
+ } else {
104
+ // llama2 template and its variants
105
+ // [variant] support system message
106
+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
107
+ bool support_system_message = tmpl_contains("<<SYS>>");
108
+ bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
109
+ bool strip_message = tmpl_contains("content.strip()");
110
+ if (strip_message) {
111
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
112
+ } else if (add_bos_inside_history) {
113
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
114
+ } else if (support_system_message) {
115
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
116
+ } else {
117
+ return LLM_CHAT_TEMPLATE_LLAMA_2;
118
+ }
119
+ }
120
+ } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
121
+ return LLM_CHAT_TEMPLATE_PHI_3;
122
+ } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
123
+ return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
124
+ } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
125
+ return LLM_CHAT_TEMPLATE_ZEPHYR;
126
+ } else if (tmpl_contains("bos_token + message['role']")) {
127
+ return LLM_CHAT_TEMPLATE_MONARCH;
128
+ } else if (tmpl_contains("<start_of_turn>")) {
129
+ return LLM_CHAT_TEMPLATE_GEMMA;
130
+ } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
131
+ // OrionStarAI/Orion-14B-Chat
132
+ return LLM_CHAT_TEMPLATE_ORION;
133
+ } else if (tmpl_contains("GPT4 Correct ")) {
134
+ // openchat/openchat-3.5-0106
135
+ return LLM_CHAT_TEMPLATE_OPENCHAT;
136
+ } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
137
+ // eachadea/vicuna-13b-1.1 (and Orca variant)
138
+ if (tmpl_contains("SYSTEM: ")) {
139
+ return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
140
+ }
141
+ return LLM_CHAT_TEMPLATE_VICUNA;
142
+ } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
143
+ // deepseek-ai/deepseek-coder-33b-instruct
144
+ return LLM_CHAT_TEMPLATE_DEEPSEEK;
145
+ } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
146
+ // CohereForAI/c4ai-command-r-plus
147
+ return LLM_CHAT_TEMPLATE_COMMAND_R;
148
+ } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
149
+ return LLM_CHAT_TEMPLATE_LLAMA_3;
150
+ } else if (tmpl_contains("[gMASK]sop")) {
151
+ // chatglm3-6b
152
+ return LLM_CHAT_TEMPLATE_CHATGML_3;
153
+ } else if (tmpl_contains("[gMASK]<sop>")) {
154
+ return LLM_CHAT_TEMPLATE_CHATGML_4;
155
+ } else if (tmpl_contains(LU8("<用户>"))) {
156
+ // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
157
+ return LLM_CHAT_TEMPLATE_MINICPM;
158
+ } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
159
+ return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
160
+ } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
161
+ return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
162
+ } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
163
+ // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
164
+ // EXAONE-3.0-7.8B-Instruct
165
+ return LLM_CHAT_TEMPLATE_EXAONE_3;
166
+ } else if (tmpl_contains("rwkv-world")) {
167
+ return LLM_CHAT_TEMPLATE_RWKV_WORLD;
168
+ } else if (tmpl_contains("<|start_of_role|>")) {
169
+ return LLM_CHAT_TEMPLATE_GRANITE;
170
+ } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
171
+ return LLM_CHAT_TEMPLATE_GIGACHAT;
172
+ } else if (tmpl_contains("<|role_start|>")) {
173
+ return LLM_CHAT_TEMPLATE_MEGREZ;
174
+ } else if (tmpl_contains(" Ассистент:")) {
175
+ return LLM_CHAT_TEMPLATE_YANDEX;
176
+ } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
177
+ return LLM_CHAT_TEMPLATE_BAILING;
178
+ } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
179
+ return LLM_CHAT_TEMPLATE_LLAMA4;
180
+ }
181
+ return LLM_CHAT_TEMPLATE_UNKNOWN;
182
+ }
183
+
184
+ // Simple version of "llama_apply_chat_template" that only works with strings
185
+ // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
186
+ int32_t llm_chat_apply_template(
187
+ llm_chat_template tmpl,
188
+ const std::vector<const llama_chat_message *> & chat,
189
+ std::string & dest, bool add_ass) {
190
+ // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
191
+ std::stringstream ss;
192
+ if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
193
+ // chatml template
194
+ for (auto message : chat) {
195
+ ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
196
+ }
197
+ if (add_ass) {
198
+ ss << "<|im_start|>assistant\n";
199
+ }
200
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
201
+ // Official mistral 'v7' template
202
+ // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
203
+ for (auto message : chat) {
204
+ std::string role(message->role);
205
+ std::string content(message->content);
206
+ if (role == "system") {
207
+ ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
208
+ } else if (role == "user") {
209
+ ss << "[INST] " << content << "[/INST]";
210
+ }
211
+ else {
212
+ ss << " " << content << "</s>";
213
+ }
214
+ }
215
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
216
+ || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
217
+ || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
218
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
219
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
220
+ std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
221
+ std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
222
+ bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
223
+ bool is_inside_turn = false;
224
+ for (auto message : chat) {
225
+ if (!is_inside_turn) {
226
+ ss << leading_space << "[INST]" << trailing_space;
227
+ is_inside_turn = true;
228
+ }
229
+ std::string role(message->role);
230
+ std::string content(message->content);
231
+ if (role == "system") {
232
+ ss << content << "\n\n";
233
+ } else if (role == "user") {
234
+ ss << content << leading_space << "[/INST]";
235
+ } else {
236
+ ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
237
+ is_inside_turn = false;
238
+ }
239
+ }
240
+ } else if (
241
+ tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
242
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
243
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
244
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
245
+ // llama2 template and its variants
246
+ // [variant] support system message
247
+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
248
+ bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
249
+ // [variant] add BOS inside history
250
+ bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
251
+ // [variant] trim spaces from the input message
252
+ bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
253
+ // construct the prompt
254
+ bool is_inside_turn = true; // skip BOS at the beginning
255
+ ss << "[INST] ";
256
+ for (auto message : chat) {
257
+ std::string content = strip_message ? trim(message->content) : message->content;
258
+ std::string role(message->role);
259
+ if (!is_inside_turn) {
260
+ is_inside_turn = true;
261
+ ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
262
+ }
263
+ if (role == "system") {
264
+ if (support_system_message) {
265
+ ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
266
+ } else {
267
+ // if the model does not support system message, we still include it in the first message, but without <<SYS>>
268
+ ss << content << "\n";
269
+ }
270
+ } else if (role == "user") {
271
+ ss << content << " [/INST]";
272
+ } else {
273
+ ss << content << "</s>";
274
+ is_inside_turn = false;
275
+ }
276
+ }
277
+ } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
278
+ // Phi 3
279
+ for (auto message : chat) {
280
+ std::string role(message->role);
281
+ ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
282
+ }
283
+ if (add_ass) {
284
+ ss << "<|assistant|>\n";
285
+ }
286
+ } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
287
+ // chatml template
288
+ for (auto message : chat) {
289
+ ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
290
+ }
291
+ if (add_ass) {
292
+ ss << "<|im_start|>assistant<|im_sep|>";
293
+ }
294
+ } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
295
+ // Falcon 3
296
+ for (auto message : chat) {
297
+ std::string role(message->role);
298
+ ss << "<|" << role << "|>\n" << message->content << "\n";
299
+ }
300
+ if (add_ass) {
301
+ ss << "<|assistant|>\n";
302
+ }
303
+ } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
304
+ // zephyr template
305
+ for (auto message : chat) {
306
+ ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
307
+ }
308
+ if (add_ass) {
309
+ ss << "<|assistant|>\n";
310
+ }
311
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
312
+ // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
313
+ for (auto message : chat) {
314
+ std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
315
+ ss << bos << message->role << "\n" << message->content << "</s>\n";
316
+ }
317
+ if (add_ass) {
318
+ ss << "<s>assistant\n";
319
+ }
320
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
321
+ // google/gemma-7b-it
322
+ std::string system_prompt = "";
323
+ for (auto message : chat) {
324
+ std::string role(message->role);
325
+ if (role == "system") {
326
+ // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
327
+ system_prompt = trim(message->content);
328
+ continue;
329
+ }
330
+ // in gemma, "assistant" is "model"
331
+ role = role == "assistant" ? "model" : message->role;
332
+ ss << "<start_of_turn>" << role << "\n";
333
+ if (!system_prompt.empty() && role != "model") {
334
+ ss << system_prompt << "\n\n";
335
+ system_prompt = "";
336
+ }
337
+ ss << trim(message->content) << "<end_of_turn>\n";
338
+ }
339
+ if (add_ass) {
340
+ ss << "<start_of_turn>model\n";
341
+ }
342
+ } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
343
+ // OrionStarAI/Orion-14B-Chat
344
+ std::string system_prompt = "";
345
+ for (auto message : chat) {
346
+ std::string role(message->role);
347
+ if (role == "system") {
348
+ // there is no system message support, we will merge it with user prompt
349
+ system_prompt = message->content;
350
+ continue;
351
+ } else if (role == "user") {
352
+ ss << "Human: ";
353
+ if (!system_prompt.empty()) {
354
+ ss << system_prompt << "\n\n";
355
+ system_prompt = "";
356
+ }
357
+ ss << message->content << "\n\nAssistant: </s>";
358
+ } else {
359
+ ss << message->content << "</s>";
360
+ }
361
+ }
362
+ } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
363
+ // openchat/openchat-3.5-0106,
364
+ for (auto message : chat) {
365
+ std::string role(message->role);
366
+ if (role == "system") {
367
+ ss << message->content << "<|end_of_turn|>";
368
+ } else {
369
+ role[0] = toupper(role[0]);
370
+ ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
371
+ }
372
+ }
373
+ if (add_ass) {
374
+ ss << "GPT4 Correct Assistant:";
375
+ }
376
+ } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
377
+ // eachadea/vicuna-13b-1.1 (and Orca variant)
378
+ for (auto message : chat) {
379
+ std::string role(message->role);
380
+ if (role == "system") {
381
+ // Orca-Vicuna variant uses a system prefix
382
+ if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
383
+ ss << "SYSTEM: " << message->content << "\n";
384
+ } else {
385
+ ss << message->content << "\n\n";
386
+ }
387
+ } else if (role == "user") {
388
+ ss << "USER: " << message->content << "\n";
389
+ } else if (role == "assistant") {
390
+ ss << "ASSISTANT: " << message->content << "</s>\n";
391
+ }
392
+ }
393
+ if (add_ass) {
394
+ ss << "ASSISTANT:";
395
+ }
396
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
397
+ // deepseek-ai/deepseek-coder-33b-instruct
398
+ for (auto message : chat) {
399
+ std::string role(message->role);
400
+ if (role == "system") {
401
+ ss << message->content;
402
+ } else if (role == "user") {
403
+ ss << "### Instruction:\n" << message->content << "\n";
404
+ } else if (role == "assistant") {
405
+ ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
406
+ }
407
+ }
408
+ if (add_ass) {
409
+ ss << "### Response:\n";
410
+ }
411
+ } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
412
+ // CohereForAI/c4ai-command-r-plus
413
+ for (auto message : chat) {
414
+ std::string role(message->role);
415
+ if (role == "system") {
416
+ ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
417
+ } else if (role == "user") {
418
+ ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
419
+ } else if (role == "assistant") {
420
+ ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
421
+ }
422
+ }
423
+ if (add_ass) {
424
+ ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
425
+ }
426
+ } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
427
+ // Llama 3
428
+ for (auto message : chat) {
429
+ std::string role(message->role);
430
+ ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
431
+ }
432
+ if (add_ass) {
433
+ ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
434
+ }
435
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
436
+ // chatglm3-6b
437
+ ss << "[gMASK]" << "sop";
438
+ for (auto message : chat) {
439
+ std::string role(message->role);
440
+ ss << "<|" << role << "|>" << "\n " << message->content;
441
+ }
442
+ if (add_ass) {
443
+ ss << "<|assistant|>";
444
+ }
445
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
446
+ ss << "[gMASK]" << "<sop>";
447
+ for (auto message : chat) {
448
+ std::string role(message->role);
449
+ ss << "<|" << role << "|>" << "\n" << message->content;
450
+ }
451
+ if (add_ass) {
452
+ ss << "<|assistant|>";
453
+ }
454
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
455
+ for (auto message : chat) {
456
+ std::string role(message->role);
457
+ ss << "<|" << role << "|>" << "\n" << message->content;
458
+ }
459
+ if (add_ass) {
460
+ ss << "<|assistant|>";
461
+ }
462
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
463
+ // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
464
+ for (auto message : chat) {
465
+ std::string role(message->role);
466
+ if (role == "user") {
467
+ ss << LU8("<用户>");
468
+ ss << trim(message->content);
469
+ ss << "<AI>";
470
+ } else {
471
+ ss << trim(message->content);
472
+ }
473
+ }
474
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
475
+ // DeepSeek-V2
476
+ for (auto message : chat) {
477
+ std::string role(message->role);
478
+ if (role == "system") {
479
+ ss << message->content << "\n\n";
480
+ } else if (role == "user") {
481
+ ss << "User: " << message->content << "\n\n";
482
+ } else if (role == "assistant") {
483
+ ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
484
+ }
485
+ }
486
+ if (add_ass) {
487
+ ss << "Assistant:";
488
+ }
489
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
490
+ // DeepSeek-V3
491
+ for (auto message : chat) {
492
+ std::string role(message->role);
493
+ if (role == "system") {
494
+ ss << message->content << "\n\n";
495
+ } else if (role == "user") {
496
+ ss << LU8("<|User|>") << message->content;
497
+ } else if (role == "assistant") {
498
+ ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
499
+ }
500
+ }
501
+ if (add_ass) {
502
+ ss << LU8("<|Assistant|>");
503
+ }
504
+ } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
505
+ // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
506
+ // EXAONE-3.0-7.8B-Instruct
507
+ for (auto message : chat) {
508
+ std::string role(message->role);
509
+ if (role == "system") {
510
+ ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
511
+ } else if (role == "user") {
512
+ ss << "[|user|]" << trim(message->content) << "\n";
513
+ } else if (role == "assistant") {
514
+ ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
515
+ }
516
+ }
517
+ if (add_ass) {
518
+ ss << "[|assistant|]";
519
+ }
520
+ } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
521
+ // this template requires the model to have "\n\n" as EOT token
522
+ for (auto message : chat) {
523
+ std::string role(message->role);
524
+ if (role == "user") {
525
+ ss << "User: " << message->content << "\n\nAssistant:";
526
+ } else {
527
+ ss << message->content << "\n\n";
528
+ }
529
+ }
530
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
531
+ // IBM Granite template
532
+ for (const auto & message : chat) {
533
+ std::string role(message->role);
534
+ ss << "<|start_of_role|>" << role << "<|end_of_role|>";
535
+ if (role == "assistant_tool_call") {
536
+ ss << "<|tool_call|>";
537
+ }
538
+ ss << message->content << "<|end_of_text|>\n";
539
+ }
540
+ if (add_ass) {
541
+ ss << "<|start_of_role|>assistant<|end_of_role|>\n";
542
+ }
543
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
544
+ // GigaChat template
545
+ bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
546
+
547
+ // Handle system message if present
548
+ if (has_system) {
549
+ ss << "<s>" << chat[0]->content << "<|message_sep|>";
550
+ } else {
551
+ ss << "<s>";
552
+ }
553
+
554
+ // Process remaining messages
555
+ for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
556
+ std::string role(chat[i]->role);
557
+ if (role == "user") {
558
+ ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
559
+ << "available functions<|role_sep|>[]<|message_sep|>";
560
+ } else if (role == "assistant") {
561
+ ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
562
+ }
563
+ }
564
+
565
+ // Add generation prompt if needed
566
+ if (add_ass) {
567
+ ss << "assistant<|role_sep|>";
568
+ }
569
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
570
+ // Megrez template
571
+ for (auto message : chat) {
572
+ std::string role(message->role);
573
+ ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
574
+ }
575
+
576
+ if (add_ass) {
577
+ ss << "<|role_start|>assistant<|role_end|>";
578
+ }
579
+ } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
580
+ // Yandex template ("\n\n" is defined as EOT token)
581
+
582
+ ss << "<s>";
583
+
584
+ for (size_t i = 0; i < chat.size(); i++) {
585
+ std::string role(chat[i]->role);
586
+ if (role == "user") {
587
+ ss << " Пользователь: " << chat[i]->content << "\n\n";
588
+ } else if (role == "assistant") {
589
+ ss << " Ассистент: " << chat[i]->content << "\n\n";
590
+ }
591
+ }
592
+
593
+ // Add generation prompt if needed
594
+ if (add_ass) {
595
+ ss << " Ассистент:[SEP]";
596
+ }
597
+ } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
598
+ // Bailing (Ling) template
599
+ for (auto message : chat) {
600
+ std::string role(message->role);
601
+
602
+ if (role == "user") {
603
+ role = "HUMAN";
604
+ } else {
605
+ std::transform(role.begin(), role.end(), role.begin(), ::toupper);
606
+ }
607
+
608
+ ss << "<role>" << role << "</role>" << message->content;
609
+ }
610
+
611
+ if (add_ass) {
612
+ ss << "<role>ASSISTANT</role>";
613
+ }
614
+ } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
615
+ // Llama 4
616
+ for (auto message : chat) {
617
+ std::string role(message->role);
618
+ ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
619
+ }
620
+ if (add_ass) {
621
+ ss << "<|header_start|>assistant<|header_end|>\n\n";
622
+ }
623
+ } else {
624
+ // template not supported
625
+ return -1;
626
+ }
627
+ dest = ss.str();
628
+ return dest.size();
629
+ }
630
+
631
+ // public interface
632
+
633
+ int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
634
+ auto it = LLM_CHAT_TEMPLATES.begin();
635
+ for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
636
+ output[i] = it->first.c_str();
637
+ std::advance(it, 1);
638
+ }
639
+ return (int32_t) LLM_CHAT_TEMPLATES.size();
640
+ }