cui-llama.rn 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/android/src/main/CMakeLists.txt +2 -2
  2. package/android/src/main/jni.cpp +12 -10
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/chat-template.hpp +529 -529
  12. package/cpp/chat.cpp +959 -265
  13. package/cpp/chat.h +135 -0
  14. package/cpp/common.cpp +2064 -1996
  15. package/cpp/common.h +700 -744
  16. package/cpp/ggml-alloc.c +1039 -1030
  17. package/cpp/ggml-alloc.h +1 -1
  18. package/cpp/ggml-backend-impl.h +255 -255
  19. package/cpp/ggml-backend-reg.cpp +586 -582
  20. package/cpp/ggml-backend.cpp +2004 -2002
  21. package/cpp/ggml-backend.h +354 -354
  22. package/cpp/ggml-common.h +1851 -1851
  23. package/cpp/ggml-cpp.h +39 -39
  24. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  25. package/cpp/ggml-cpu-aarch64.h +8 -8
  26. package/cpp/ggml-cpu-impl.h +531 -380
  27. package/cpp/ggml-cpu-quants.c +12527 -11517
  28. package/cpp/ggml-cpu-traits.cpp +36 -36
  29. package/cpp/ggml-cpu-traits.h +38 -38
  30. package/cpp/ggml-cpu.c +15766 -14485
  31. package/cpp/ggml-cpu.cpp +655 -633
  32. package/cpp/ggml-cpu.h +138 -135
  33. package/cpp/ggml-impl.h +567 -567
  34. package/cpp/ggml-metal-impl.h +235 -0
  35. package/cpp/ggml-metal.h +66 -66
  36. package/cpp/ggml-metal.m +5146 -5002
  37. package/cpp/ggml-opt.cpp +854 -854
  38. package/cpp/ggml-opt.h +216 -216
  39. package/cpp/ggml-quants.c +5238 -5238
  40. package/cpp/ggml-threading.h +14 -14
  41. package/cpp/ggml.c +6529 -6524
  42. package/cpp/ggml.h +2198 -2194
  43. package/cpp/gguf.cpp +1329 -1329
  44. package/cpp/gguf.h +202 -202
  45. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  46. package/cpp/json-schema-to-grammar.h +21 -22
  47. package/cpp/json.hpp +24766 -24766
  48. package/cpp/llama-adapter.cpp +347 -347
  49. package/cpp/llama-adapter.h +74 -74
  50. package/cpp/llama-arch.cpp +1513 -1492
  51. package/cpp/llama-arch.h +403 -402
  52. package/cpp/llama-batch.cpp +368 -368
  53. package/cpp/llama-batch.h +88 -88
  54. package/cpp/llama-chat.cpp +588 -587
  55. package/cpp/llama-chat.h +53 -53
  56. package/cpp/llama-context.cpp +1775 -1775
  57. package/cpp/llama-context.h +128 -128
  58. package/cpp/llama-cparams.cpp +1 -1
  59. package/cpp/llama-cparams.h +37 -37
  60. package/cpp/llama-cpp.h +30 -30
  61. package/cpp/llama-grammar.cpp +1219 -1219
  62. package/cpp/llama-grammar.h +173 -164
  63. package/cpp/llama-hparams.cpp +71 -71
  64. package/cpp/llama-hparams.h +139 -139
  65. package/cpp/llama-impl.cpp +167 -167
  66. package/cpp/llama-impl.h +61 -61
  67. package/cpp/llama-kv-cache.cpp +718 -718
  68. package/cpp/llama-kv-cache.h +219 -218
  69. package/cpp/llama-mmap.cpp +600 -590
  70. package/cpp/llama-mmap.h +68 -68
  71. package/cpp/llama-model-loader.cpp +1124 -1124
  72. package/cpp/llama-model-loader.h +167 -167
  73. package/cpp/llama-model.cpp +4087 -4023
  74. package/cpp/llama-model.h +370 -370
  75. package/cpp/llama-sampling.cpp +2558 -2525
  76. package/cpp/llama-sampling.h +32 -32
  77. package/cpp/llama-vocab.cpp +3264 -3252
  78. package/cpp/llama-vocab.h +125 -125
  79. package/cpp/llama.cpp +10284 -10137
  80. package/cpp/llama.h +1354 -1340
  81. package/cpp/log.cpp +393 -423
  82. package/cpp/log.h +132 -132
  83. package/cpp/minja/chat-template.hpp +529 -0
  84. package/cpp/minja/minja.hpp +2915 -0
  85. package/cpp/minja.hpp +2915 -2883
  86. package/cpp/rn-llama.cpp +20 -37
  87. package/cpp/rn-llama.h +12 -2
  88. package/cpp/sampling.cpp +570 -532
  89. package/cpp/sgemm.cpp +2598 -2598
  90. package/cpp/sgemm.h +14 -14
  91. package/cpp/speculative.cpp +278 -277
  92. package/cpp/speculative.h +28 -28
  93. package/package.json +1 -1
  94. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  95. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  96. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  97. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  98. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  99. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  100. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  101. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  102. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  103. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  104. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  105. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  106. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  107. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  108. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  109. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  110. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  111. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  112. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  113. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  114. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  115. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  116. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  117. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  118. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  119. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  120. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  122. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  124. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  125. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  126. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  127. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  128. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  129. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  130. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  132. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  134. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  135. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  136. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  194. package/android/src/main/build-arm64/Makefile +0 -1862
  195. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  196. package/cpp/chat.hpp +0 -55
  197. package/cpp/rn-llama.hpp +0 -913
@@ -1,587 +1,588 @@
1
- #include "llama-chat.h"
2
-
3
- #include "llama.h"
4
-
5
- #include <map>
6
- #include <sstream>
7
-
8
- #if __cplusplus >= 202000L
9
- #define LU8(x) (const char*)(u8##x)
10
- #else
11
- #define LU8(x) u8##x
12
- #endif
13
-
14
- // trim whitespace from the beginning and end of a string
15
- static std::string trim(const std::string & str) {
16
- size_t start = 0;
17
- size_t end = str.size();
18
- while (start < end && isspace(str[start])) {
19
- start += 1;
20
- }
21
- while (end > start && isspace(str[end - 1])) {
22
- end -= 1;
23
- }
24
- return str.substr(start, end - start);
25
- }
26
-
27
- static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
28
- { "chatml", LLM_CHAT_TEMPLATE_CHATML },
29
- { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
30
- { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
31
- { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
32
- { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
33
- { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
34
- { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
35
- { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
36
- { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
37
- { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
38
- { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
39
- { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
40
- { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
41
- { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
42
- { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
43
- { "orion", LLM_CHAT_TEMPLATE_ORION },
44
- { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
45
- { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
46
- { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
47
- { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
48
- { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
49
- { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
50
- { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
51
- { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
52
- { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
53
- { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
54
- { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
55
- { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
56
- { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
57
- { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
58
- { "granite", LLM_CHAT_TEMPLATE_GRANITE },
59
- { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
60
- { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
61
- };
62
-
63
- llm_chat_template llm_chat_template_from_str(const std::string & name) {
64
- return LLM_CHAT_TEMPLATES.at(name);
65
- }
66
-
67
- llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
68
- try {
69
- return llm_chat_template_from_str(tmpl);
70
- } catch (const std::out_of_range &) {
71
- // ignore
72
- }
73
-
74
- auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
75
- return tmpl.find(haystack) != std::string::npos;
76
- };
77
- if (tmpl_contains("<|im_start|>")) {
78
- return tmpl_contains("<|im_sep|>")
79
- ? LLM_CHAT_TEMPLATE_PHI_4
80
- : LLM_CHAT_TEMPLATE_CHATML;
81
- } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
82
- if (tmpl_contains("[SYSTEM_PROMPT]")) {
83
- return LLM_CHAT_TEMPLATE_MISTRAL_V7;
84
- } else if (
85
- // catches official 'v1' template
86
- tmpl_contains("' [INST] ' + system_message")
87
- // catches official 'v3' and 'v3-tekken' templates
88
- || tmpl_contains("[AVAILABLE_TOOLS]")
89
- ) {
90
- // Official mistral 'v1', 'v3' and 'v3-tekken' templates
91
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
92
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
93
- if (tmpl_contains(" [INST]")) {
94
- return LLM_CHAT_TEMPLATE_MISTRAL_V1;
95
- } else if (tmpl_contains("\"[INST]\"")) {
96
- return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
97
- }
98
- return LLM_CHAT_TEMPLATE_MISTRAL_V3;
99
- } else {
100
- // llama2 template and its variants
101
- // [variant] support system message
102
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
103
- bool support_system_message = tmpl_contains("<<SYS>>");
104
- bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
105
- bool strip_message = tmpl_contains("content.strip()");
106
- if (strip_message) {
107
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
108
- } else if (add_bos_inside_history) {
109
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
110
- } else if (support_system_message) {
111
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
112
- } else {
113
- return LLM_CHAT_TEMPLATE_LLAMA_2;
114
- }
115
- }
116
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
117
- return LLM_CHAT_TEMPLATE_PHI_3;
118
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
119
- return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
120
- } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
121
- return LLM_CHAT_TEMPLATE_ZEPHYR;
122
- } else if (tmpl_contains("bos_token + message['role']")) {
123
- return LLM_CHAT_TEMPLATE_MONARCH;
124
- } else if (tmpl_contains("<start_of_turn>")) {
125
- return LLM_CHAT_TEMPLATE_GEMMA;
126
- } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
127
- // OrionStarAI/Orion-14B-Chat
128
- return LLM_CHAT_TEMPLATE_ORION;
129
- } else if (tmpl_contains("GPT4 Correct ")) {
130
- // openchat/openchat-3.5-0106
131
- return LLM_CHAT_TEMPLATE_OPENCHAT;
132
- } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
133
- // eachadea/vicuna-13b-1.1 (and Orca variant)
134
- if (tmpl_contains("SYSTEM: ")) {
135
- return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
136
- }
137
- return LLM_CHAT_TEMPLATE_VICUNA;
138
- } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
139
- // deepseek-ai/deepseek-coder-33b-instruct
140
- return LLM_CHAT_TEMPLATE_DEEPSEEK;
141
- } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
142
- // CohereForAI/c4ai-command-r-plus
143
- return LLM_CHAT_TEMPLATE_COMMAND_R;
144
- } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
145
- return LLM_CHAT_TEMPLATE_LLAMA_3;
146
- } else if (tmpl_contains("[gMASK]sop")) {
147
- // chatglm3-6b
148
- return LLM_CHAT_TEMPLATE_CHATGML_3;
149
- } else if (tmpl_contains("[gMASK]<sop>")) {
150
- return LLM_CHAT_TEMPLATE_CHATGML_4;
151
- } else if (tmpl_contains(LU8("<用户>"))) {
152
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
153
- return LLM_CHAT_TEMPLATE_MINICPM;
154
- } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
155
- return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
156
- } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
157
- return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
158
- } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
159
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
160
- // EXAONE-3.0-7.8B-Instruct
161
- return LLM_CHAT_TEMPLATE_EXAONE_3;
162
- } else if (tmpl_contains("rwkv-world")) {
163
- return LLM_CHAT_TEMPLATE_RWKV_WORLD;
164
- } else if (tmpl_contains("<|start_of_role|>")) {
165
- return LLM_CHAT_TEMPLATE_GRANITE;
166
- } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
167
- return LLM_CHAT_TEMPLATE_GIGACHAT;
168
- } else if (tmpl_contains("<|role_start|>")) {
169
- return LLM_CHAT_TEMPLATE_MEGREZ;
170
- }
171
- return LLM_CHAT_TEMPLATE_UNKNOWN;
172
- }
173
-
174
- // Simple version of "llama_apply_chat_template" that only works with strings
175
- // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
176
- int32_t llm_chat_apply_template(
177
- llm_chat_template tmpl,
178
- const std::vector<const llama_chat_message *> & chat,
179
- std::string & dest, bool add_ass) {
180
- // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
181
- std::stringstream ss;
182
- if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
183
- // chatml template
184
- for (auto message : chat) {
185
- ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
186
- }
187
- if (add_ass) {
188
- ss << "<|im_start|>assistant\n";
189
- }
190
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
191
- // Official mistral 'v7' template
192
- // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
193
- for (auto message : chat) {
194
- std::string role(message->role);
195
- std::string content(message->content);
196
- if (role == "system") {
197
- ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
198
- } else if (role == "user") {
199
- ss << "[INST] " << content << "[/INST]";
200
- }
201
- else {
202
- ss << " " << content << "</s>";
203
- }
204
- }
205
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
206
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
207
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
208
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
209
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
210
- std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
211
- std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
212
- bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
213
- bool is_inside_turn = false;
214
- for (auto message : chat) {
215
- if (!is_inside_turn) {
216
- ss << leading_space << "[INST]" << trailing_space;
217
- is_inside_turn = true;
218
- }
219
- std::string role(message->role);
220
- std::string content(message->content);
221
- if (role == "system") {
222
- ss << content << "\n\n";
223
- } else if (role == "user") {
224
- ss << content << leading_space << "[/INST]";
225
- } else {
226
- ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
227
- is_inside_turn = false;
228
- }
229
- }
230
- } else if (
231
- tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
232
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
233
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
234
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
235
- // llama2 template and its variants
236
- // [variant] support system message
237
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
238
- bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
239
- // [variant] add BOS inside history
240
- bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
241
- // [variant] trim spaces from the input message
242
- bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
243
- // construct the prompt
244
- bool is_inside_turn = true; // skip BOS at the beginning
245
- ss << "[INST] ";
246
- for (auto message : chat) {
247
- std::string content = strip_message ? trim(message->content) : message->content;
248
- std::string role(message->role);
249
- if (!is_inside_turn) {
250
- is_inside_turn = true;
251
- ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
252
- }
253
- if (role == "system") {
254
- if (support_system_message) {
255
- ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
256
- } else {
257
- // if the model does not support system message, we still include it in the first message, but without <<SYS>>
258
- ss << content << "\n";
259
- }
260
- } else if (role == "user") {
261
- ss << content << " [/INST]";
262
- } else {
263
- ss << content << "</s>";
264
- is_inside_turn = false;
265
- }
266
- }
267
- } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
268
- // Phi 3
269
- for (auto message : chat) {
270
- std::string role(message->role);
271
- ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
272
- }
273
- if (add_ass) {
274
- ss << "<|assistant|>\n";
275
- }
276
- } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
277
- // chatml template
278
- for (auto message : chat) {
279
- ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
280
- }
281
- if (add_ass) {
282
- ss << "<|im_start|>assistant<|im_sep|>";
283
- }
284
- } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
285
- // Falcon 3
286
- for (auto message : chat) {
287
- std::string role(message->role);
288
- ss << "<|" << role << "|>\n" << message->content << "\n";
289
- }
290
- if (add_ass) {
291
- ss << "<|assistant|>\n";
292
- }
293
- } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
294
- // zephyr template
295
- for (auto message : chat) {
296
- ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
297
- }
298
- if (add_ass) {
299
- ss << "<|assistant|>\n";
300
- }
301
- } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
302
- // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
303
- for (auto message : chat) {
304
- std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
305
- ss << bos << message->role << "\n" << message->content << "</s>\n";
306
- }
307
- if (add_ass) {
308
- ss << "<s>assistant\n";
309
- }
310
- } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
311
- // google/gemma-7b-it
312
- std::string system_prompt = "";
313
- for (auto message : chat) {
314
- std::string role(message->role);
315
- if (role == "system") {
316
- // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
317
- system_prompt = trim(message->content);
318
- continue;
319
- }
320
- // in gemma, "assistant" is "model"
321
- role = role == "assistant" ? "model" : message->role;
322
- ss << "<start_of_turn>" << role << "\n";
323
- if (!system_prompt.empty() && role != "model") {
324
- ss << system_prompt << "\n\n";
325
- system_prompt = "";
326
- }
327
- ss << trim(message->content) << "<end_of_turn>\n";
328
- }
329
- if (add_ass) {
330
- ss << "<start_of_turn>model\n";
331
- }
332
- } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
333
- // OrionStarAI/Orion-14B-Chat
334
- std::string system_prompt = "";
335
- for (auto message : chat) {
336
- std::string role(message->role);
337
- if (role == "system") {
338
- // there is no system message support, we will merge it with user prompt
339
- system_prompt = message->content;
340
- continue;
341
- } else if (role == "user") {
342
- ss << "Human: ";
343
- if (!system_prompt.empty()) {
344
- ss << system_prompt << "\n\n";
345
- system_prompt = "";
346
- }
347
- ss << message->content << "\n\nAssistant: </s>";
348
- } else {
349
- ss << message->content << "</s>";
350
- }
351
- }
352
- } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
353
- // openchat/openchat-3.5-0106,
354
- for (auto message : chat) {
355
- std::string role(message->role);
356
- if (role == "system") {
357
- ss << message->content << "<|end_of_turn|>";
358
- } else {
359
- role[0] = toupper(role[0]);
360
- ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
361
- }
362
- }
363
- if (add_ass) {
364
- ss << "GPT4 Correct Assistant:";
365
- }
366
- } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
367
- // eachadea/vicuna-13b-1.1 (and Orca variant)
368
- for (auto message : chat) {
369
- std::string role(message->role);
370
- if (role == "system") {
371
- // Orca-Vicuna variant uses a system prefix
372
- if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
373
- ss << "SYSTEM: " << message->content << "\n";
374
- } else {
375
- ss << message->content << "\n\n";
376
- }
377
- } else if (role == "user") {
378
- ss << "USER: " << message->content << "\n";
379
- } else if (role == "assistant") {
380
- ss << "ASSISTANT: " << message->content << "</s>\n";
381
- }
382
- }
383
- if (add_ass) {
384
- ss << "ASSISTANT:";
385
- }
386
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
387
- // deepseek-ai/deepseek-coder-33b-instruct
388
- for (auto message : chat) {
389
- std::string role(message->role);
390
- if (role == "system") {
391
- ss << message->content;
392
- } else if (role == "user") {
393
- ss << "### Instruction:\n" << message->content << "\n";
394
- } else if (role == "assistant") {
395
- ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
396
- }
397
- }
398
- if (add_ass) {
399
- ss << "### Response:\n";
400
- }
401
- } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
402
- // CohereForAI/c4ai-command-r-plus
403
- for (auto message : chat) {
404
- std::string role(message->role);
405
- if (role == "system") {
406
- ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
407
- } else if (role == "user") {
408
- ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
409
- } else if (role == "assistant") {
410
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
411
- }
412
- }
413
- if (add_ass) {
414
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
415
- }
416
- } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
417
- // Llama 3
418
- for (auto message : chat) {
419
- std::string role(message->role);
420
- ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
421
- }
422
- if (add_ass) {
423
- ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
424
- }
425
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
426
- // chatglm3-6b
427
- ss << "[gMASK]" << "sop";
428
- for (auto message : chat) {
429
- std::string role(message->role);
430
- ss << "<|" << role << "|>" << "\n " << message->content;
431
- }
432
- if (add_ass) {
433
- ss << "<|assistant|>";
434
- }
435
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
436
- ss << "[gMASK]" << "<sop>";
437
- for (auto message : chat) {
438
- std::string role(message->role);
439
- ss << "<|" << role << "|>" << "\n" << message->content;
440
- }
441
- if (add_ass) {
442
- ss << "<|assistant|>";
443
- }
444
- } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
445
- for (auto message : chat) {
446
- std::string role(message->role);
447
- ss << "<|" << role << "|>" << "\n" << message->content;
448
- }
449
- if (add_ass) {
450
- ss << "<|assistant|>";
451
- }
452
- } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
453
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
454
- for (auto message : chat) {
455
- std::string role(message->role);
456
- if (role == "user") {
457
- ss << LU8("<用户>");
458
- ss << trim(message->content);
459
- ss << "<AI>";
460
- } else {
461
- ss << trim(message->content);
462
- }
463
- }
464
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
465
- // DeepSeek-V2
466
- for (auto message : chat) {
467
- std::string role(message->role);
468
- if (role == "system") {
469
- ss << message->content << "\n\n";
470
- } else if (role == "user") {
471
- ss << "User: " << message->content << "\n\n";
472
- } else if (role == "assistant") {
473
- ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
474
- }
475
- }
476
- if (add_ass) {
477
- ss << "Assistant:";
478
- }
479
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
480
- // DeepSeek-V3
481
- for (auto message : chat) {
482
- std::string role(message->role);
483
- if (role == "system") {
484
- ss << message->content << "\n\n";
485
- } else if (role == "user") {
486
- ss << LU8("<|User|>") << message->content;
487
- } else if (role == "assistant") {
488
- ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
489
- }
490
- }
491
- if (add_ass) {
492
- ss << LU8("<|Assistant|>");
493
- }
494
- } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
495
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
496
- // EXAONE-3.0-7.8B-Instruct
497
- for (auto message : chat) {
498
- std::string role(message->role);
499
- if (role == "system") {
500
- ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
501
- } else if (role == "user") {
502
- ss << "[|user|]" << trim(message->content) << "\n";
503
- } else if (role == "assistant") {
504
- ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
505
- }
506
- }
507
- if (add_ass) {
508
- ss << "[|assistant|]";
509
- }
510
- } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
511
- // this template requires the model to have "\n\n" as EOT token
512
- for (auto message : chat) {
513
- std::string role(message->role);
514
- if (role == "user") {
515
- ss << "User: " << message->content << "\n\nAssistant:";
516
- } else {
517
- ss << message->content << "\n\n";
518
- }
519
- }
520
- } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
521
- // IBM Granite template
522
- for (const auto & message : chat) {
523
- std::string role(message->role);
524
- ss << "<|start_of_role|>" << role << "<|end_of_role|>";
525
- if (role == "assistant_tool_call") {
526
- ss << "<|tool_call|>";
527
- }
528
- ss << message->content << "<|end_of_text|>\n";
529
- }
530
- if (add_ass) {
531
- ss << "<|start_of_role|>assistant<|end_of_role|>\n";
532
- }
533
- } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
534
- // GigaChat template
535
- bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
536
-
537
- // Handle system message if present
538
- if (has_system) {
539
- ss << "<s>" << chat[0]->content << "<|message_sep|>";
540
- } else {
541
- ss << "<s>";
542
- }
543
-
544
- // Process remaining messages
545
- for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
546
- std::string role(chat[i]->role);
547
- if (role == "user") {
548
- ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
549
- << "available functions<|role_sep|>[]<|message_sep|>";
550
- } else if (role == "assistant") {
551
- ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
552
- }
553
- }
554
-
555
- // Add generation prompt if needed
556
- if (add_ass) {
557
- ss << "assistant<|role_sep|>";
558
- }
559
- } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
560
- // Megrez template
561
- for (auto message : chat) {
562
- std::string role(message->role);
563
- ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
564
- }
565
-
566
- if (add_ass) {
567
- ss << "<|role_start|>assistant<|role_end|>";
568
- }
569
- } else {
570
- // template not supported
571
- return -1;
572
- }
573
- dest = ss.str();
574
- return dest.size();
575
- }
576
-
577
- // public interface
578
-
579
- int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
580
- auto it = LLM_CHAT_TEMPLATES.begin();
581
- for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
582
- output[i] = it->first.c_str();
583
- std::advance(it, 1);
584
- }
585
- return (int32_t) LLM_CHAT_TEMPLATES.size();
586
- }
587
-
1
+ #include "llama-chat.h"
2
+
3
+ #include "llama.h"
4
+
5
+ #include <map>
6
+ #include <sstream>
7
+ #include <algorithm>
8
+
9
+ #if __cplusplus >= 202000L
10
+ #define LU8(x) (const char*)(u8##x)
11
+ #else
12
+ #define LU8(x) u8##x
13
+ #endif
14
+
15
+ // trim whitespace from the beginning and end of a string
16
+ static std::string trim(const std::string & str) {
17
+ size_t start = 0;
18
+ size_t end = str.size();
19
+ while (start < end && isspace(str[start])) {
20
+ start += 1;
21
+ }
22
+ while (end > start && isspace(str[end - 1])) {
23
+ end -= 1;
24
+ }
25
+ return str.substr(start, end - start);
26
+ }
27
+
28
+ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29
+ { "chatml", LLM_CHAT_TEMPLATE_CHATML },
30
+ { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
31
+ { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
32
+ { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
33
+ { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34
+ { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
35
+ { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36
+ { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37
+ { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38
+ { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
39
+ { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
40
+ { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
41
+ { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
42
+ { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
43
+ { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
44
+ { "orion", LLM_CHAT_TEMPLATE_ORION },
45
+ { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
46
+ { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
47
+ { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
48
+ { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
49
+ { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
50
+ { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
51
+ { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
52
+ { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
53
+ { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
54
+ { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
55
+ { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
56
+ { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
57
+ { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
58
+ { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
59
+ { "granite", LLM_CHAT_TEMPLATE_GRANITE },
60
+ { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
61
+ { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
62
+ };
63
+
64
+ llm_chat_template llm_chat_template_from_str(const std::string & name) {
65
+ return LLM_CHAT_TEMPLATES.at(name);
66
+ }
67
+
68
+ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
69
+ try {
70
+ return llm_chat_template_from_str(tmpl);
71
+ } catch (const std::out_of_range &) {
72
+ // ignore
73
+ }
74
+
75
+ auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
76
+ return tmpl.find(haystack) != std::string::npos;
77
+ };
78
+ if (tmpl_contains("<|im_start|>")) {
79
+ return tmpl_contains("<|im_sep|>")
80
+ ? LLM_CHAT_TEMPLATE_PHI_4
81
+ : LLM_CHAT_TEMPLATE_CHATML;
82
+ } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
83
+ if (tmpl_contains("[SYSTEM_PROMPT]")) {
84
+ return LLM_CHAT_TEMPLATE_MISTRAL_V7;
85
+ } else if (
86
+ // catches official 'v1' template
87
+ tmpl_contains("' [INST] ' + system_message")
88
+ // catches official 'v3' and 'v3-tekken' templates
89
+ || tmpl_contains("[AVAILABLE_TOOLS]")
90
+ ) {
91
+ // Official mistral 'v1', 'v3' and 'v3-tekken' templates
92
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
93
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
94
+ if (tmpl_contains(" [INST]")) {
95
+ return LLM_CHAT_TEMPLATE_MISTRAL_V1;
96
+ } else if (tmpl_contains("\"[INST]\"")) {
97
+ return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
98
+ }
99
+ return LLM_CHAT_TEMPLATE_MISTRAL_V3;
100
+ } else {
101
+ // llama2 template and its variants
102
+ // [variant] support system message
103
+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
104
+ bool support_system_message = tmpl_contains("<<SYS>>");
105
+ bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
106
+ bool strip_message = tmpl_contains("content.strip()");
107
+ if (strip_message) {
108
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
109
+ } else if (add_bos_inside_history) {
110
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
111
+ } else if (support_system_message) {
112
+ return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
113
+ } else {
114
+ return LLM_CHAT_TEMPLATE_LLAMA_2;
115
+ }
116
+ }
117
+ } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
118
+ return LLM_CHAT_TEMPLATE_PHI_3;
119
+ } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
120
+ return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
121
+ } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
122
+ return LLM_CHAT_TEMPLATE_ZEPHYR;
123
+ } else if (tmpl_contains("bos_token + message['role']")) {
124
+ return LLM_CHAT_TEMPLATE_MONARCH;
125
+ } else if (tmpl_contains("<start_of_turn>")) {
126
+ return LLM_CHAT_TEMPLATE_GEMMA;
127
+ } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
128
+ // OrionStarAI/Orion-14B-Chat
129
+ return LLM_CHAT_TEMPLATE_ORION;
130
+ } else if (tmpl_contains("GPT4 Correct ")) {
131
+ // openchat/openchat-3.5-0106
132
+ return LLM_CHAT_TEMPLATE_OPENCHAT;
133
+ } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
134
+ // eachadea/vicuna-13b-1.1 (and Orca variant)
135
+ if (tmpl_contains("SYSTEM: ")) {
136
+ return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
137
+ }
138
+ return LLM_CHAT_TEMPLATE_VICUNA;
139
+ } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
140
+ // deepseek-ai/deepseek-coder-33b-instruct
141
+ return LLM_CHAT_TEMPLATE_DEEPSEEK;
142
+ } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
143
+ // CohereForAI/c4ai-command-r-plus
144
+ return LLM_CHAT_TEMPLATE_COMMAND_R;
145
+ } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
146
+ return LLM_CHAT_TEMPLATE_LLAMA_3;
147
+ } else if (tmpl_contains("[gMASK]sop")) {
148
+ // chatglm3-6b
149
+ return LLM_CHAT_TEMPLATE_CHATGML_3;
150
+ } else if (tmpl_contains("[gMASK]<sop>")) {
151
+ return LLM_CHAT_TEMPLATE_CHATGML_4;
152
+ } else if (tmpl_contains(LU8("<用户>"))) {
153
+ // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
154
+ return LLM_CHAT_TEMPLATE_MINICPM;
155
+ } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
156
+ return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
157
+ } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
158
+ return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
159
+ } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
160
+ // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
161
+ // EXAONE-3.0-7.8B-Instruct
162
+ return LLM_CHAT_TEMPLATE_EXAONE_3;
163
+ } else if (tmpl_contains("rwkv-world")) {
164
+ return LLM_CHAT_TEMPLATE_RWKV_WORLD;
165
+ } else if (tmpl_contains("<|start_of_role|>")) {
166
+ return LLM_CHAT_TEMPLATE_GRANITE;
167
+ } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
168
+ return LLM_CHAT_TEMPLATE_GIGACHAT;
169
+ } else if (tmpl_contains("<|role_start|>")) {
170
+ return LLM_CHAT_TEMPLATE_MEGREZ;
171
+ }
172
+ return LLM_CHAT_TEMPLATE_UNKNOWN;
173
+ }
174
+
175
+ // Simple version of "llama_apply_chat_template" that only works with strings
176
+ // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
177
+ int32_t llm_chat_apply_template(
178
+ llm_chat_template tmpl,
179
+ const std::vector<const llama_chat_message *> & chat,
180
+ std::string & dest, bool add_ass) {
181
+ // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
182
+ std::stringstream ss;
183
+ if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
184
+ // chatml template
185
+ for (auto message : chat) {
186
+ ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
187
+ }
188
+ if (add_ass) {
189
+ ss << "<|im_start|>assistant\n";
190
+ }
191
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
192
+ // Official mistral 'v7' template
193
+ // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
194
+ for (auto message : chat) {
195
+ std::string role(message->role);
196
+ std::string content(message->content);
197
+ if (role == "system") {
198
+ ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
199
+ } else if (role == "user") {
200
+ ss << "[INST] " << content << "[/INST]";
201
+ }
202
+ else {
203
+ ss << " " << content << "</s>";
204
+ }
205
+ }
206
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
207
+ || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
208
+ || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
209
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
210
+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
211
+ std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
212
+ std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
213
+ bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
214
+ bool is_inside_turn = false;
215
+ for (auto message : chat) {
216
+ if (!is_inside_turn) {
217
+ ss << leading_space << "[INST]" << trailing_space;
218
+ is_inside_turn = true;
219
+ }
220
+ std::string role(message->role);
221
+ std::string content(message->content);
222
+ if (role == "system") {
223
+ ss << content << "\n\n";
224
+ } else if (role == "user") {
225
+ ss << content << leading_space << "[/INST]";
226
+ } else {
227
+ ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
228
+ is_inside_turn = false;
229
+ }
230
+ }
231
+ } else if (
232
+ tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
233
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
234
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
235
+ || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
236
+ // llama2 template and its variants
237
+ // [variant] support system message
238
+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
239
+ bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
240
+ // [variant] add BOS inside history
241
+ bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
242
+ // [variant] trim spaces from the input message
243
+ bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
244
+ // construct the prompt
245
+ bool is_inside_turn = true; // skip BOS at the beginning
246
+ ss << "[INST] ";
247
+ for (auto message : chat) {
248
+ std::string content = strip_message ? trim(message->content) : message->content;
249
+ std::string role(message->role);
250
+ if (!is_inside_turn) {
251
+ is_inside_turn = true;
252
+ ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
253
+ }
254
+ if (role == "system") {
255
+ if (support_system_message) {
256
+ ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
257
+ } else {
258
+ // if the model does not support system message, we still include it in the first message, but without <<SYS>>
259
+ ss << content << "\n";
260
+ }
261
+ } else if (role == "user") {
262
+ ss << content << " [/INST]";
263
+ } else {
264
+ ss << content << "</s>";
265
+ is_inside_turn = false;
266
+ }
267
+ }
268
+ } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
269
+ // Phi 3
270
+ for (auto message : chat) {
271
+ std::string role(message->role);
272
+ ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
273
+ }
274
+ if (add_ass) {
275
+ ss << "<|assistant|>\n";
276
+ }
277
+ } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
278
+ // chatml template
279
+ for (auto message : chat) {
280
+ ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
281
+ }
282
+ if (add_ass) {
283
+ ss << "<|im_start|>assistant<|im_sep|>";
284
+ }
285
+ } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
286
+ // Falcon 3
287
+ for (auto message : chat) {
288
+ std::string role(message->role);
289
+ ss << "<|" << role << "|>\n" << message->content << "\n";
290
+ }
291
+ if (add_ass) {
292
+ ss << "<|assistant|>\n";
293
+ }
294
+ } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
295
+ // zephyr template
296
+ for (auto message : chat) {
297
+ ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
298
+ }
299
+ if (add_ass) {
300
+ ss << "<|assistant|>\n";
301
+ }
302
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
303
+ // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
304
+ for (auto message : chat) {
305
+ std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
306
+ ss << bos << message->role << "\n" << message->content << "</s>\n";
307
+ }
308
+ if (add_ass) {
309
+ ss << "<s>assistant\n";
310
+ }
311
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
312
+ // google/gemma-7b-it
313
+ std::string system_prompt = "";
314
+ for (auto message : chat) {
315
+ std::string role(message->role);
316
+ if (role == "system") {
317
+ // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
318
+ system_prompt = trim(message->content);
319
+ continue;
320
+ }
321
+ // in gemma, "assistant" is "model"
322
+ role = role == "assistant" ? "model" : message->role;
323
+ ss << "<start_of_turn>" << role << "\n";
324
+ if (!system_prompt.empty() && role != "model") {
325
+ ss << system_prompt << "\n\n";
326
+ system_prompt = "";
327
+ }
328
+ ss << trim(message->content) << "<end_of_turn>\n";
329
+ }
330
+ if (add_ass) {
331
+ ss << "<start_of_turn>model\n";
332
+ }
333
+ } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
334
+ // OrionStarAI/Orion-14B-Chat
335
+ std::string system_prompt = "";
336
+ for (auto message : chat) {
337
+ std::string role(message->role);
338
+ if (role == "system") {
339
+ // there is no system message support, we will merge it with user prompt
340
+ system_prompt = message->content;
341
+ continue;
342
+ } else if (role == "user") {
343
+ ss << "Human: ";
344
+ if (!system_prompt.empty()) {
345
+ ss << system_prompt << "\n\n";
346
+ system_prompt = "";
347
+ }
348
+ ss << message->content << "\n\nAssistant: </s>";
349
+ } else {
350
+ ss << message->content << "</s>";
351
+ }
352
+ }
353
+ } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
354
+ // openchat/openchat-3.5-0106,
355
+ for (auto message : chat) {
356
+ std::string role(message->role);
357
+ if (role == "system") {
358
+ ss << message->content << "<|end_of_turn|>";
359
+ } else {
360
+ role[0] = toupper(role[0]);
361
+ ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
362
+ }
363
+ }
364
+ if (add_ass) {
365
+ ss << "GPT4 Correct Assistant:";
366
+ }
367
+ } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
368
+ // eachadea/vicuna-13b-1.1 (and Orca variant)
369
+ for (auto message : chat) {
370
+ std::string role(message->role);
371
+ if (role == "system") {
372
+ // Orca-Vicuna variant uses a system prefix
373
+ if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
374
+ ss << "SYSTEM: " << message->content << "\n";
375
+ } else {
376
+ ss << message->content << "\n\n";
377
+ }
378
+ } else if (role == "user") {
379
+ ss << "USER: " << message->content << "\n";
380
+ } else if (role == "assistant") {
381
+ ss << "ASSISTANT: " << message->content << "</s>\n";
382
+ }
383
+ }
384
+ if (add_ass) {
385
+ ss << "ASSISTANT:";
386
+ }
387
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
388
+ // deepseek-ai/deepseek-coder-33b-instruct
389
+ for (auto message : chat) {
390
+ std::string role(message->role);
391
+ if (role == "system") {
392
+ ss << message->content;
393
+ } else if (role == "user") {
394
+ ss << "### Instruction:\n" << message->content << "\n";
395
+ } else if (role == "assistant") {
396
+ ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
397
+ }
398
+ }
399
+ if (add_ass) {
400
+ ss << "### Response:\n";
401
+ }
402
+ } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
403
+ // CohereForAI/c4ai-command-r-plus
404
+ for (auto message : chat) {
405
+ std::string role(message->role);
406
+ if (role == "system") {
407
+ ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
408
+ } else if (role == "user") {
409
+ ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
410
+ } else if (role == "assistant") {
411
+ ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
412
+ }
413
+ }
414
+ if (add_ass) {
415
+ ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
416
+ }
417
+ } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
418
+ // Llama 3
419
+ for (auto message : chat) {
420
+ std::string role(message->role);
421
+ ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
422
+ }
423
+ if (add_ass) {
424
+ ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
425
+ }
426
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
427
+ // chatglm3-6b
428
+ ss << "[gMASK]" << "sop";
429
+ for (auto message : chat) {
430
+ std::string role(message->role);
431
+ ss << "<|" << role << "|>" << "\n " << message->content;
432
+ }
433
+ if (add_ass) {
434
+ ss << "<|assistant|>";
435
+ }
436
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
437
+ ss << "[gMASK]" << "<sop>";
438
+ for (auto message : chat) {
439
+ std::string role(message->role);
440
+ ss << "<|" << role << "|>" << "\n" << message->content;
441
+ }
442
+ if (add_ass) {
443
+ ss << "<|assistant|>";
444
+ }
445
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
446
+ for (auto message : chat) {
447
+ std::string role(message->role);
448
+ ss << "<|" << role << "|>" << "\n" << message->content;
449
+ }
450
+ if (add_ass) {
451
+ ss << "<|assistant|>";
452
+ }
453
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
454
+ // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
455
+ for (auto message : chat) {
456
+ std::string role(message->role);
457
+ if (role == "user") {
458
+ ss << LU8("<用户>");
459
+ ss << trim(message->content);
460
+ ss << "<AI>";
461
+ } else {
462
+ ss << trim(message->content);
463
+ }
464
+ }
465
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
466
+ // DeepSeek-V2
467
+ for (auto message : chat) {
468
+ std::string role(message->role);
469
+ if (role == "system") {
470
+ ss << message->content << "\n\n";
471
+ } else if (role == "user") {
472
+ ss << "User: " << message->content << "\n\n";
473
+ } else if (role == "assistant") {
474
+ ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
475
+ }
476
+ }
477
+ if (add_ass) {
478
+ ss << "Assistant:";
479
+ }
480
+ } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
481
+ // DeepSeek-V3
482
+ for (auto message : chat) {
483
+ std::string role(message->role);
484
+ if (role == "system") {
485
+ ss << message->content << "\n\n";
486
+ } else if (role == "user") {
487
+ ss << LU8("<|User|>") << message->content;
488
+ } else if (role == "assistant") {
489
+ ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
490
+ }
491
+ }
492
+ if (add_ass) {
493
+ ss << LU8("<|Assistant|>");
494
+ }
495
+ } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
496
+ // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
497
+ // EXAONE-3.0-7.8B-Instruct
498
+ for (auto message : chat) {
499
+ std::string role(message->role);
500
+ if (role == "system") {
501
+ ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
502
+ } else if (role == "user") {
503
+ ss << "[|user|]" << trim(message->content) << "\n";
504
+ } else if (role == "assistant") {
505
+ ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
506
+ }
507
+ }
508
+ if (add_ass) {
509
+ ss << "[|assistant|]";
510
+ }
511
+ } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
512
+ // this template requires the model to have "\n\n" as EOT token
513
+ for (auto message : chat) {
514
+ std::string role(message->role);
515
+ if (role == "user") {
516
+ ss << "User: " << message->content << "\n\nAssistant:";
517
+ } else {
518
+ ss << message->content << "\n\n";
519
+ }
520
+ }
521
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
522
+ // IBM Granite template
523
+ for (const auto & message : chat) {
524
+ std::string role(message->role);
525
+ ss << "<|start_of_role|>" << role << "<|end_of_role|>";
526
+ if (role == "assistant_tool_call") {
527
+ ss << "<|tool_call|>";
528
+ }
529
+ ss << message->content << "<|end_of_text|>\n";
530
+ }
531
+ if (add_ass) {
532
+ ss << "<|start_of_role|>assistant<|end_of_role|>\n";
533
+ }
534
+ } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
535
+ // GigaChat template
536
+ bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
537
+
538
+ // Handle system message if present
539
+ if (has_system) {
540
+ ss << "<s>" << chat[0]->content << "<|message_sep|>";
541
+ } else {
542
+ ss << "<s>";
543
+ }
544
+
545
+ // Process remaining messages
546
+ for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
547
+ std::string role(chat[i]->role);
548
+ if (role == "user") {
549
+ ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
550
+ << "available functions<|role_sep|>[]<|message_sep|>";
551
+ } else if (role == "assistant") {
552
+ ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
553
+ }
554
+ }
555
+
556
+ // Add generation prompt if needed
557
+ if (add_ass) {
558
+ ss << "assistant<|role_sep|>";
559
+ }
560
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
561
+ // Megrez template
562
+ for (auto message : chat) {
563
+ std::string role(message->role);
564
+ ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
565
+ }
566
+
567
+ if (add_ass) {
568
+ ss << "<|role_start|>assistant<|role_end|>";
569
+ }
570
+ } else {
571
+ // template not supported
572
+ return -1;
573
+ }
574
+ dest = ss.str();
575
+ return dest.size();
576
+ }
577
+
578
+ // public interface
579
+
580
+ int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
581
+ auto it = LLM_CHAT_TEMPLATES.begin();
582
+ for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
583
+ output[i] = it->first.c_str();
584
+ std::advance(it, 1);
585
+ }
586
+ return (int32_t) LLM_CHAT_TEMPLATES.size();
587
+ }
588
+