@fugood/llama.node 0.3.16 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CMakeLists.txt +3 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +5 -0
  19. package/package.json +1 -1
  20. package/src/LlamaCompletionWorker.cpp +8 -0
  21. package/src/LlamaCompletionWorker.h +1 -0
  22. package/src/LlamaContext.cpp +3 -2
  23. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +124 -0
  24. package/src/llama.cpp/.github/workflows/build.yml +70 -27
  25. package/src/llama.cpp/.github/workflows/docker.yml +6 -6
  26. package/src/llama.cpp/.github/workflows/server.yml +7 -11
  27. package/src/llama.cpp/CMakeLists.txt +23 -1
  28. package/src/llama.cpp/common/CMakeLists.txt +6 -3
  29. package/src/llama.cpp/common/arg.cpp +809 -105
  30. package/src/llama.cpp/common/arg.h +9 -0
  31. package/src/llama.cpp/common/chat.cpp +1 -1
  32. package/src/llama.cpp/common/common.cpp +31 -521
  33. package/src/llama.cpp/common/common.h +17 -36
  34. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  35. package/src/llama.cpp/common/llguidance.cpp +30 -47
  36. package/src/llama.cpp/common/minja/chat-template.hpp +15 -7
  37. package/src/llama.cpp/common/minja/minja.hpp +119 -93
  38. package/src/llama.cpp/common/sampling.cpp +3 -0
  39. package/src/llama.cpp/docs/build.md +122 -7
  40. package/src/llama.cpp/examples/CMakeLists.txt +0 -9
  41. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
  43. package/src/llama.cpp/examples/embedding/embedding.cpp +7 -1
  44. package/src/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
  45. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -16
  46. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  47. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +210 -8
  48. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  49. package/src/llama.cpp/examples/llava/CMakeLists.txt +39 -24
  50. package/src/llama.cpp/examples/llava/clip-impl.h +345 -0
  51. package/src/llama.cpp/examples/llava/clip.cpp +2152 -1803
  52. package/src/llama.cpp/examples/llava/clip.h +39 -22
  53. package/src/llama.cpp/examples/llava/deprecation-warning.cpp +22 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +64 -52
  55. package/src/llama.cpp/examples/llava/mtmd-cli.cpp +344 -0
  56. package/src/llama.cpp/examples/llava/mtmd.cpp +708 -0
  57. package/src/llama.cpp/examples/llava/mtmd.h +168 -0
  58. package/src/llama.cpp/examples/llava/{qwen2vl-cli.cpp → qwen2vl-test.cpp} +83 -31
  59. package/src/llama.cpp/examples/main/main.cpp +16 -5
  60. package/src/llama.cpp/examples/parallel/parallel.cpp +3 -1
  61. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  62. package/src/llama.cpp/examples/perplexity/perplexity.cpp +17 -3
  63. package/src/llama.cpp/examples/quantize/quantize.cpp +115 -2
  64. package/src/llama.cpp/examples/rpc/CMakeLists.txt +4 -2
  65. package/src/llama.cpp/examples/rpc/rpc-server.cpp +163 -8
  66. package/src/llama.cpp/examples/run/CMakeLists.txt +12 -1
  67. package/src/llama.cpp/examples/run/run.cpp +14 -28
  68. package/src/llama.cpp/examples/server/httplib.h +313 -247
  69. package/src/llama.cpp/examples/server/server.cpp +238 -139
  70. package/src/llama.cpp/examples/server/utils.hpp +51 -2
  71. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  72. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  73. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  74. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  75. package/src/llama.cpp/examples/tts/tts.cpp +6 -9
  76. package/src/llama.cpp/ggml/CMakeLists.txt +8 -2
  77. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  78. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  79. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  80. package/src/llama.cpp/ggml/include/ggml.h +66 -99
  81. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  82. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  83. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  84. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  85. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  86. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  87. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  88. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  89. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  90. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +48 -22
  91. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  92. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  93. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  94. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -192
  96. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +754 -404
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1003 -13519
  99. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +2 -7
  101. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +0 -1
  102. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +3 -4
  103. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +533 -88
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8809 -0
  105. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  106. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  107. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  108. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  109. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +258 -0
  110. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  111. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  112. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  113. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  114. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  115. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +70 -3
  116. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  117. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -260
  118. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +293 -40
  119. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +96 -22
  120. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  121. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  123. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  124. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +2 -292
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +967 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  130. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +204 -280
  131. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  133. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  134. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  135. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  136. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  137. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  138. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +23 -0
  139. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +646 -114
  140. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +12 -0
  141. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +17 -8
  142. package/src/llama.cpp/ggml/src/ggml.c +141 -245
  143. package/src/llama.cpp/ggml/src/gguf.cpp +1 -0
  144. package/src/llama.cpp/include/llama.h +30 -11
  145. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  146. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  147. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  148. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  149. package/src/llama.cpp/requirements/requirements-all.txt +2 -0
  150. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  151. package/src/llama.cpp/src/CMakeLists.txt +3 -2
  152. package/src/llama.cpp/src/llama-adapter.cpp +37 -1
  153. package/src/llama.cpp/src/llama-arch.cpp +160 -17
  154. package/src/llama.cpp/src/llama-arch.h +16 -0
  155. package/src/llama.cpp/src/llama-chat.cpp +82 -17
  156. package/src/llama.cpp/src/llama-chat.h +6 -2
  157. package/src/llama.cpp/src/llama-context.cpp +108 -92
  158. package/src/llama.cpp/src/llama-context.h +1 -2
  159. package/src/llama.cpp/src/llama-graph.cpp +189 -119
  160. package/src/llama.cpp/src/llama-graph.h +26 -6
  161. package/src/llama.cpp/src/llama-hparams.h +13 -0
  162. package/src/llama.cpp/src/llama-kv-cache.cpp +70 -123
  163. package/src/llama.cpp/src/llama-kv-cache.h +41 -115
  164. package/src/llama.cpp/src/llama-memory.h +1 -1
  165. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  166. package/src/llama.cpp/src/llama-model-loader.cpp +10 -5
  167. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  168. package/src/llama.cpp/src/llama-model.cpp +1760 -534
  169. package/src/llama.cpp/src/llama-model.h +13 -1
  170. package/src/llama.cpp/src/llama-quant.cpp +29 -8
  171. package/src/llama.cpp/src/llama-sampling.cpp +7 -1
  172. package/src/llama.cpp/src/llama-vocab.cpp +44 -6
  173. package/src/llama.cpp/src/llama.cpp +1 -1
  174. package/src/llama.cpp/tests/CMakeLists.txt +43 -30
  175. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  176. package/src/llama.cpp/tests/test-backend-ops.cpp +82 -43
  177. package/src/llama.cpp/tests/test-chat-template.cpp +34 -13
  178. package/src/llama.cpp/tests/test-chat.cpp +12 -2
  179. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  180. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  181. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  182. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  183. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  184. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  185. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  186. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  187. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  188. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  189. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  190. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  191. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  192. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  193. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  194. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  195. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  196. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  197. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  198. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  199. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  200. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  201. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  202. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
@@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
78
78
 
79
79
  // function to be used by test-arg-parser
80
80
  common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81
+ bool common_has_curl();
82
+
83
+ struct common_remote_params {
84
+ std::vector<std::string> headers;
85
+ long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
86
+ long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
87
+ };
88
+ // get remote file content, returns <http_code, raw_response_body>
89
+ std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
@@ -1622,7 +1622,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1622
1622
  }
1623
1623
 
1624
1624
  // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1625
- if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
1625
+ if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) {
1626
1626
  return common_chat_params_init_hermes_2_pro(tmpl, params);
1627
1627
  }
1628
1628
 
@@ -7,9 +7,6 @@
7
7
 
8
8
  #include "common.h"
9
9
  #include "log.h"
10
- // Change JSON_ASSERT from assert() to GGML_ASSERT:
11
- #define JSON_ASSERT GGML_ASSERT
12
- #include "json.hpp"
13
10
  #include "llama.h"
14
11
 
15
12
  #include <algorithm>
@@ -51,47 +48,11 @@
51
48
  #include <sys/stat.h>
52
49
  #include <unistd.h>
53
50
  #endif
54
- #if defined(LLAMA_USE_CURL)
55
- #include <curl/curl.h>
56
- #include <curl/easy.h>
57
- #include <future>
58
- #endif
59
51
 
60
52
  #if defined(_MSC_VER)
61
53
  #pragma warning(disable: 4244 4267) // possible loss of data
62
54
  #endif
63
55
 
64
- #if defined(LLAMA_USE_CURL)
65
- #ifdef __linux__
66
- #include <linux/limits.h>
67
- #elif defined(_WIN32)
68
- # if !defined(PATH_MAX)
69
- # define PATH_MAX MAX_PATH
70
- # endif
71
- #else
72
- #include <sys/syslimits.h>
73
- #endif
74
- #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
75
-
76
- //
77
- // CURL utils
78
- //
79
-
80
- using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
81
-
82
- // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
83
- struct curl_slist_ptr {
84
- struct curl_slist * ptr = nullptr;
85
- ~curl_slist_ptr() {
86
- if (ptr) {
87
- curl_slist_free_all(ptr);
88
- }
89
- }
90
- };
91
- #endif // LLAMA_USE_CURL
92
-
93
- using json = nlohmann::ordered_json;
94
-
95
56
  //
96
57
  // CPU utils
97
58
  //
@@ -869,7 +830,7 @@ std::string fs_get_cache_directory() {
869
830
  if (getenv("LLAMA_CACHE")) {
870
831
  cache_directory = std::getenv("LLAMA_CACHE");
871
832
  } else {
872
- #ifdef __linux__
833
+ #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
873
834
  if (std::getenv("XDG_CACHE_HOME")) {
874
835
  cache_directory = std::getenv("XDG_CACHE_HOME");
875
836
  } else {
@@ -879,7 +840,9 @@ std::string fs_get_cache_directory() {
879
840
  cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
880
841
  #elif defined(_WIN32)
881
842
  cache_directory = std::getenv("LOCALAPPDATA");
882
- #endif // __linux__
843
+ #else
844
+ # error Unknown architecture
845
+ #endif
883
846
  cache_directory = ensure_trailing_slash(cache_directory);
884
847
  cache_directory += "llama.cpp";
885
848
  }
@@ -900,22 +863,14 @@ std::string fs_get_cache_file(const std::string & filename) {
900
863
  //
901
864
  // Model utils
902
865
  //
866
+
903
867
  struct common_init_result common_init_from_params(common_params & params) {
904
868
  common_init_result iparams;
905
869
  auto mparams = common_model_params_to_llama(params);
906
870
 
907
- llama_model * model = nullptr;
908
-
909
- if (!params.hf_repo.empty() && !params.hf_file.empty()) {
910
- model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams);
911
- } else if (!params.model_url.empty()) {
912
- model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
913
- } else {
914
- model = llama_model_load_from_file(params.model.c_str(), mparams);
915
- }
916
-
871
+ llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
917
872
  if (model == NULL) {
918
- LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
873
+ LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
919
874
  return iparams;
920
875
  }
921
876
 
@@ -950,7 +905,7 @@ struct common_init_result common_init_from_params(common_params & params) {
950
905
 
951
906
  llama_context * lctx = llama_init_from_model(model, cparams);
952
907
  if (lctx == NULL) {
953
- LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
908
+ LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
954
909
  llama_model_free(model);
955
910
  return iparams;
956
911
  }
@@ -1074,6 +1029,19 @@ struct common_init_result common_init_from_params(common_params & params) {
1074
1029
  return iparams;
1075
1030
  }
1076
1031
 
1032
+ std::string get_model_endpoint() {
1033
+ const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
1034
+ // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
1035
+ const char * hf_endpoint_env = getenv("HF_ENDPOINT");
1036
+ const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
1037
+ std::string model_endpoint = "https://huggingface.co/";
1038
+ if (endpoint_env) {
1039
+ model_endpoint = endpoint_env;
1040
+ if (model_endpoint.back() != '/') model_endpoint += '/';
1041
+ }
1042
+ return model_endpoint;
1043
+ }
1044
+
1077
1045
  void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
1078
1046
  llama_clear_adapter_lora(ctx);
1079
1047
  for (auto & la : lora) {
@@ -1089,15 +1057,18 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
1089
1057
  if (!params.devices.empty()) {
1090
1058
  mparams.devices = params.devices.data();
1091
1059
  }
1060
+
1092
1061
  if (params.n_gpu_layers != -1) {
1093
1062
  mparams.n_gpu_layers = params.n_gpu_layers;
1094
1063
  }
1064
+
1095
1065
  mparams.main_gpu = params.main_gpu;
1096
1066
  mparams.split_mode = params.split_mode;
1097
1067
  mparams.tensor_split = params.tensor_split;
1098
1068
  mparams.use_mmap = params.use_mmap;
1099
1069
  mparams.use_mlock = params.use_mlock;
1100
1070
  mparams.check_tensors = params.check_tensors;
1071
+
1101
1072
  if (params.kv_overrides.empty()) {
1102
1073
  mparams.kv_overrides = NULL;
1103
1074
  } else {
@@ -1105,6 +1076,13 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
1105
1076
  mparams.kv_overrides = params.kv_overrides.data();
1106
1077
  }
1107
1078
 
1079
+ if (params.tensor_buft_overrides.empty()) {
1080
+ mparams.tensor_buft_overrides = NULL;
1081
+ } else {
1082
+ GGML_ASSERT(params.tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern");
1083
+ mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
1084
+ }
1085
+
1108
1086
  return mparams;
1109
1087
  }
1110
1088
 
@@ -1164,451 +1142,6 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
1164
1142
  return tpp;
1165
1143
  }
1166
1144
 
1167
- #ifdef LLAMA_USE_CURL
1168
-
1169
- #define CURL_MAX_RETRY 3
1170
- #define CURL_RETRY_DELAY_SECONDS 2
1171
-
1172
- static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
1173
- int remaining_attempts = max_attempts;
1174
-
1175
- while (remaining_attempts > 0) {
1176
- LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
1177
-
1178
- CURLcode res = curl_easy_perform(curl);
1179
- if (res == CURLE_OK) {
1180
- return true;
1181
- }
1182
-
1183
- int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
1184
- LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
1185
-
1186
- remaining_attempts--;
1187
- std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
1188
- }
1189
-
1190
- LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
1191
-
1192
- return false;
1193
- }
1194
-
1195
- static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) {
1196
- // Initialize libcurl
1197
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
1198
- curl_slist_ptr http_headers;
1199
- if (!curl) {
1200
- LOG_ERR("%s: error initializing libcurl\n", __func__);
1201
- return false;
1202
- }
1203
-
1204
- bool force_download = false;
1205
-
1206
- // Set the URL, allow to follow http redirection
1207
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
1208
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
1209
-
1210
- // Check if hf-token or bearer-token was specified
1211
- if (!hf_token.empty()) {
1212
- std::string auth_header = "Authorization: Bearer " + hf_token;
1213
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
1214
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
1215
- }
1216
-
1217
- #if defined(_WIN32)
1218
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
1219
- // operating system. Currently implemented under MS-Windows.
1220
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1221
- #endif
1222
-
1223
- // Check if the file already exists locally
1224
- auto file_exists = std::filesystem::exists(path);
1225
-
1226
- // If the file exists, check its JSON metadata companion file.
1227
- std::string metadata_path = path + ".json";
1228
- nlohmann::json metadata;
1229
- std::string etag;
1230
- std::string last_modified;
1231
-
1232
- if (file_exists) {
1233
- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
1234
- std::ifstream metadata_in(metadata_path);
1235
- if (metadata_in.good()) {
1236
- try {
1237
- metadata_in >> metadata;
1238
- LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
1239
- if (metadata.contains("url") && metadata.at("url").is_string()) {
1240
- auto previous_url = metadata.at("url").get<std::string>();
1241
- if (previous_url != url) {
1242
- LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
1243
- return false;
1244
- }
1245
- }
1246
- if (metadata.contains("etag") && metadata.at("etag").is_string()) {
1247
- etag = metadata.at("etag");
1248
- }
1249
- if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
1250
- last_modified = metadata.at("lastModified");
1251
- }
1252
- } catch (const nlohmann::json::exception & e) {
1253
- LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
1254
- return false;
1255
- }
1256
- }
1257
- } else {
1258
- LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
1259
- }
1260
-
1261
- // Send a HEAD request to retrieve the etag and last-modified headers
1262
- struct common_load_model_from_url_headers {
1263
- std::string etag;
1264
- std::string last_modified;
1265
- };
1266
-
1267
- common_load_model_from_url_headers headers;
1268
-
1269
- {
1270
- typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
1271
- auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
1272
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
1273
-
1274
- static std::regex header_regex("([^:]+): (.*)\r\n");
1275
- static std::regex etag_regex("ETag", std::regex_constants::icase);
1276
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
1277
-
1278
- std::string header(buffer, n_items);
1279
- std::smatch match;
1280
- if (std::regex_match(header, match, header_regex)) {
1281
- const std::string & key = match[1];
1282
- const std::string & value = match[2];
1283
- if (std::regex_match(key, match, etag_regex)) {
1284
- headers->etag = value;
1285
- } else if (std::regex_match(key, match, last_modified_regex)) {
1286
- headers->last_modified = value;
1287
- }
1288
- }
1289
- return n_items;
1290
- };
1291
-
1292
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
1293
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
1294
- curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
1295
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
1296
-
1297
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1298
- if (!was_perform_successful) {
1299
- return false;
1300
- }
1301
-
1302
- long http_code = 0;
1303
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
1304
- if (http_code != 200) {
1305
- // HEAD not supported, we don't know if the file has changed
1306
- // force trigger downloading
1307
- force_download = true;
1308
- LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
1309
- }
1310
- }
1311
-
1312
- bool should_download = !file_exists || force_download;
1313
- if (!should_download) {
1314
- if (!etag.empty() && etag != headers.etag) {
1315
- LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
1316
- should_download = true;
1317
- } else if (!last_modified.empty() && last_modified != headers.last_modified) {
1318
- LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
1319
- should_download = true;
1320
- }
1321
- }
1322
- if (should_download) {
1323
- std::string path_temporary = path + ".downloadInProgress";
1324
- if (file_exists) {
1325
- LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
1326
- if (remove(path.c_str()) != 0) {
1327
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
1328
- return false;
1329
- }
1330
- }
1331
-
1332
- // Set the output file
1333
-
1334
- struct FILE_deleter {
1335
- void operator()(FILE * f) const {
1336
- fclose(f);
1337
- }
1338
- };
1339
-
1340
- std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
1341
- if (!outfile) {
1342
- LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
1343
- return false;
1344
- }
1345
-
1346
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
1347
- auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
1348
- return fwrite(data, size, nmemb, (FILE *)fd);
1349
- };
1350
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
1351
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1352
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
1353
-
1354
- // display download progress
1355
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
1356
-
1357
- // helper function to hide password in URL
1358
- auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
1359
- std::size_t protocol_pos = url.find("://");
1360
- if (protocol_pos == std::string::npos) {
1361
- return url; // Malformed URL
1362
- }
1363
-
1364
- std::size_t at_pos = url.find('@', protocol_pos + 3);
1365
- if (at_pos == std::string::npos) {
1366
- return url; // No password in URL
1367
- }
1368
-
1369
- return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
1370
- };
1371
-
1372
- // start the download
1373
- LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1374
- llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
1375
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1376
- if (!was_perform_successful) {
1377
- return false;
1378
- }
1379
-
1380
- long http_code = 0;
1381
- curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
1382
- if (http_code < 200 || http_code >= 400) {
1383
- LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
1384
- return false;
1385
- }
1386
-
1387
- // Causes file to be closed explicitly here before we rename it.
1388
- outfile.reset();
1389
-
1390
- // Write the updated JSON metadata file.
1391
- metadata.update({
1392
- {"url", url},
1393
- {"etag", headers.etag},
1394
- {"lastModified", headers.last_modified}
1395
- });
1396
- std::ofstream(metadata_path) << metadata.dump(4);
1397
- LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
1398
-
1399
- if (rename(path_temporary.c_str(), path.c_str()) != 0) {
1400
- LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
1401
- return false;
1402
- }
1403
- }
1404
-
1405
- return true;
1406
- }
1407
-
1408
- struct llama_model * common_load_model_from_url(
1409
- const std::string & model_url,
1410
- const std::string & local_path,
1411
- const std::string & hf_token,
1412
- const struct llama_model_params & params) {
1413
- // Basic validation of the model_url
1414
- if (model_url.empty()) {
1415
- LOG_ERR("%s: invalid model_url\n", __func__);
1416
- return NULL;
1417
- }
1418
-
1419
- if (!common_download_file(model_url, local_path, hf_token)) {
1420
- return NULL;
1421
- }
1422
-
1423
- // check for additional GGUFs split to download
1424
- int n_split = 0;
1425
- {
1426
- struct gguf_init_params gguf_params = {
1427
- /*.no_alloc = */ true,
1428
- /*.ctx = */ NULL,
1429
- };
1430
- auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
1431
- if (!ctx_gguf) {
1432
- LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str());
1433
- return NULL;
1434
- }
1435
-
1436
- auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
1437
- if (key_n_split >= 0) {
1438
- n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
1439
- }
1440
-
1441
- gguf_free(ctx_gguf);
1442
- }
1443
-
1444
- if (n_split > 1) {
1445
- char split_prefix[PATH_MAX] = {0};
1446
- char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
1447
-
1448
- // Verify the first split file format
1449
- // and extract split URL and PATH prefixes
1450
- {
1451
- if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) {
1452
- LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split);
1453
- return NULL;
1454
- }
1455
-
1456
- if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) {
1457
- LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split);
1458
- return NULL;
1459
- }
1460
- }
1461
-
1462
- // Prepare download in parallel
1463
- std::vector<std::future<bool>> futures_download;
1464
- for (int idx = 1; idx < n_split; idx++) {
1465
- futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool {
1466
- char split_path[PATH_MAX] = {0};
1467
- llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split);
1468
-
1469
- char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
1470
- llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
1471
-
1472
- return common_download_file(split_url, split_path, hf_token);
1473
- }, idx));
1474
- }
1475
-
1476
- // Wait for all downloads to complete
1477
- for (auto & f : futures_download) {
1478
- if (!f.get()) {
1479
- return NULL;
1480
- }
1481
- }
1482
- }
1483
-
1484
- return llama_model_load_from_file(local_path.c_str(), params);
1485
- }
1486
-
1487
- struct llama_model * common_load_model_from_hf(
1488
- const std::string & repo,
1489
- const std::string & remote_path,
1490
- const std::string & local_path,
1491
- const std::string & hf_token,
1492
- const struct llama_model_params & params) {
1493
- // construct hugging face model url:
1494
- //
1495
- // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf
1496
- // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf
1497
- //
1498
- // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf
1499
- // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf
1500
- //
1501
-
1502
- std::string model_url = "https://huggingface.co/";
1503
- model_url += repo;
1504
- model_url += "/resolve/main/";
1505
- model_url += remote_path;
1506
-
1507
- return common_load_model_from_url(model_url, local_path, hf_token, params);
1508
- }
1509
-
1510
- /**
1511
- * Allow getting the HF file from the HF repo with tag (like ollama), for example:
1512
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
1513
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
1514
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
1515
- * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
1516
- *
1517
- * Return pair of <repo, file> (with "repo" already having tag removed)
1518
- *
1519
- * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
1520
- */
1521
- std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) {
1522
- auto parts = string_split<std::string>(hf_repo_with_tag, ':');
1523
- std::string tag = parts.size() > 1 ? parts.back() : "latest";
1524
- std::string hf_repo = parts[0];
1525
- if (string_split<std::string>(hf_repo, '/').size() != 2) {
1526
- throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
1527
- }
1528
-
1529
- // fetch model info from Hugging Face Hub API
1530
- json model_info;
1531
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
1532
- curl_slist_ptr http_headers;
1533
- std::string res_str;
1534
- std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
1535
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
1536
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
1537
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
1538
- auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
1539
- static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
1540
- return size * nmemb;
1541
- };
1542
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1543
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
1544
- #if defined(_WIN32)
1545
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1546
- #endif
1547
- if (!hf_token.empty()) {
1548
- std::string auth_header = "Authorization: Bearer " + hf_token;
1549
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
1550
- }
1551
- // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
1552
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
1553
- http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
1554
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
1555
-
1556
- CURLcode res = curl_easy_perform(curl.get());
1557
-
1558
- if (res != CURLE_OK) {
1559
- throw std::runtime_error("error: cannot make GET request to HF API");
1560
- }
1561
-
1562
- long res_code;
1563
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
1564
- if (res_code == 200) {
1565
- model_info = json::parse(res_str);
1566
- } else if (res_code == 401) {
1567
- throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
1568
- } else {
1569
- throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
1570
- }
1571
-
1572
- // check response
1573
- if (!model_info.contains("ggufFile")) {
1574
- throw std::runtime_error("error: model does not have ggufFile");
1575
- }
1576
- json & gguf_file = model_info.at("ggufFile");
1577
- if (!gguf_file.contains("rfilename")) {
1578
- throw std::runtime_error("error: ggufFile does not have rfilename");
1579
- }
1580
-
1581
- return std::make_pair(hf_repo, gguf_file.at("rfilename"));
1582
- }
1583
-
1584
- #else
1585
-
1586
- struct llama_model * common_load_model_from_url(
1587
- const std::string & /*model_url*/,
1588
- const std::string & /*local_path*/,
1589
- const std::string & /*hf_token*/,
1590
- const struct llama_model_params & /*params*/) {
1591
- LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
1592
- return nullptr;
1593
- }
1594
-
1595
- struct llama_model * common_load_model_from_hf(
1596
- const std::string & /*repo*/,
1597
- const std::string & /*remote_path*/,
1598
- const std::string & /*local_path*/,
1599
- const std::string & /*hf_token*/,
1600
- const struct llama_model_params & /*params*/) {
1601
- LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
1602
- return nullptr;
1603
- }
1604
-
1605
- std::pair<std::string, std::string> common_get_hf_file(const std::string &, const std::string &) {
1606
- LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
1607
- return std::make_pair("", "");
1608
- }
1609
-
1610
- #endif // LLAMA_USE_CURL
1611
-
1612
1145
  //
1613
1146
  // Batch utils
1614
1147
  //
@@ -2032,26 +1565,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
2032
1565
 
2033
1566
  return result;
2034
1567
  }
2035
-
2036
- template <>
2037
- json common_grammar_trigger::to_json() const {
2038
- json out {
2039
- {"type", (int) type},
2040
- {"value", value},
2041
- };
2042
- if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
2043
- out["token"] = (int) token;
2044
- }
2045
- return out;
2046
- }
2047
-
2048
- template <>
2049
- common_grammar_trigger common_grammar_trigger::from_json(const json & in) {
2050
- common_grammar_trigger out;
2051
- out.type = (common_grammar_trigger_type) in.at("type").get<int>();
2052
- out.value = in.at("value").get<std::string>();
2053
- if (out.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
2054
- out.token = (llama_token) in.at("token").get<int>();
2055
- }
2056
- return out;
2057
- }