@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -32,13 +32,10 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
32
32
  static bool match_string(const std::string & input, llama_grammar * grammar) {
33
33
  const auto cpts = unicode_cpts_from_utf8(input);
34
34
 
35
- const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
36
- llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
35
+ auto & stacks_cur = llama_grammar_get_stacks(grammar);
37
36
 
38
37
  for (const auto & cpt : cpts) {
39
- const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
40
-
41
- llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
38
+ llama_grammar_accept(grammar, cpt);
42
39
 
43
40
  if (stacks_cur.empty()) {
44
41
  // no stacks means that the grammar failed to match at this point
@@ -63,7 +60,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
63
60
  auto * grammar = build_grammar(grammar_str);
64
61
 
65
62
  // Save the original grammar stacks so that we can reset after every new string we want to test
66
- const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
63
+ const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
67
64
 
68
65
  llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
69
66
 
@@ -113,12 +113,10 @@ int main()
113
113
  }
114
114
  }
115
115
 
116
- llama_grammar * grammar = NULL;
117
116
  std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
118
117
 
119
- grammar = llama_grammar_init_impl(nullptr, grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
120
- if (grammar == nullptr)
121
- {
118
+ llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
119
+ if (grammar == nullptr) {
122
120
  throw std::runtime_error("Failed to initialize llama_grammar");
123
121
  }
124
122
 
@@ -79,9 +79,9 @@ static float dot_product(const float * a1, const float * a2, size_t test_size) {
79
79
  }
80
80
 
81
81
  // Total dot product error
82
- static float dot_product_error(
83
- const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float *test_data2
84
- ) {
82
+ static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float * test_data2) {
83
+ GGML_UNUSED(qfns);
84
+
85
85
  std::vector<uint8_t> tmp_q1(2*test_size);
86
86
  std::vector<uint8_t> tmp_q2(2*test_size);
87
87
 
@@ -138,7 +138,7 @@ int main(int /*argc*/, const char ** /*argv*/) {
138
138
  struct ggml_tensor * x;
139
139
 
140
140
  // rope f32
141
- for (int m = 0; m < 3; ++m) {
141
+ for (int m = 0; m < 5; ++m) {
142
142
  const int ndims = 4;
143
143
 
144
144
  const int64_t n_rot = 128;
@@ -147,28 +147,69 @@ int main(int /*argc*/, const char ** /*argv*/) {
147
147
  const int n_past_0 = 100;
148
148
  const int n_past_2 = 33;
149
149
 
150
- struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
151
- struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
152
- struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
153
-
154
- for (int i = 0; i < ne[2]; ++i) {
155
- ((int32_t *) p0->data)[i] = n_past_0 + i;
156
- ((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
157
- ((int32_t *) p2->data)[i] = n_past_2 + i;
158
- }
159
-
160
- // test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
161
- const int mode = m == 0 ? 0 : m == 1 ? 2 : 4;
162
-
150
+ struct ggml_tensor * r0;
151
+ struct ggml_tensor * r1;
152
+ struct ggml_tensor * r2;
163
153
  x = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
154
+ int mode = -1;
164
155
 
165
- // 100, 101, 102, ..., 172
166
- struct ggml_tensor * r0 = ggml_rope(ctx0, x, p0, n_rot, mode);
167
- // -67, -67, -67, ..., -67
168
- struct ggml_tensor * r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens
156
+ if (m < 3) {
157
+ struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
158
+ struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
159
+ struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
169
160
 
170
- // 33, 34, 35, ..., 105
171
- struct ggml_tensor * r2 = ggml_rope(ctx0, x, p2, n_rot, mode);
161
+ for (int i = 0; i < ne[2]; ++i) {
162
+ ((int32_t *) p0->data)[i] = n_past_0 + i;
163
+ ((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
164
+ ((int32_t *) p2->data)[i] = n_past_2 + i;
165
+ }
166
+ // test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
167
+ mode = m == 0 ? 0 : m == 1 ? 2 : 4;
168
+
169
+ // 100, 101, 102, ..., 172
170
+ r0 = ggml_rope(ctx0, x, p0, n_rot, mode);
171
+ // -67, -67, -67, ..., -67
172
+ r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens
173
+
174
+ // 33, 34, 35, ..., 105
175
+ r2 = ggml_rope(ctx0, x, p2, n_rot, mode);
176
+ } else {
177
+ // testing multi-dimension rope position embedding mode
178
+ struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
179
+ struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
180
+ struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
181
+
182
+ int sections[4] = {16, 24, 24, 0};
183
+ mode = (m == 3) ? GGML_ROPE_TYPE_MROPE : GGML_ROPE_TYPE_VISION;
184
+
185
+ for (int i = 0; i < ne[2]; ++i) {
186
+ for (int j = 0; j < 4; ++j) {
187
+ ((int32_t *) p0->data)[i + ne[2] * j] = n_past_0 + i + j;
188
+ ((int32_t *) p1->data)[i + ne[2] * j] = n_past_2 - n_past_0;
189
+ ((int32_t *) p2->data)[i + ne[2] * j] = n_past_2 + i + j;
190
+ }
191
+ }
192
+
193
+ // [[100, 101, 102, ..., 172],
194
+ // [101, 102, 103, ..., 173],
195
+ // [102, 103, 104, ..., 174]]
196
+ r0 = ggml_rope_multi(
197
+ ctx0, x, p0, nullptr,
198
+ n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
199
+ // [[-67, -67, -67, ..., -67]
200
+ // [-67, -67, -67, ..., -67]
201
+ // [-67, -67, -67, ..., -67]]
202
+ r1 = ggml_rope_multi(
203
+ ctx0, r0, p1, nullptr,
204
+ n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
205
+
206
+ // [[33, 34, 35, ..., 105]
207
+ // [34, 35, 36, ..., 106]
208
+ // [35, 36, 37, ..., 107]]
209
+ r2 = ggml_rope_multi(
210
+ ctx0, x, p2, nullptr,
211
+ n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
212
+ }
172
213
 
173
214
  ggml_cgraph * gf = ggml_new_graph(ctx0);
174
215
 
@@ -145,7 +145,7 @@ static void test_penalties(
145
145
  sampler_tester tester(probs, probs_expected);
146
146
 
147
147
  const size_t n_vocab = probs.size();
148
- auto * sampler = llama_sampler_init_penalties(n_vocab, LLAMA_TOKEN_NULL, LLAMA_TOKEN_NULL, last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence, false, false);
148
+ auto * sampler = llama_sampler_init_penalties(last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence);
149
149
 
150
150
  for (size_t i = 0; i < last_tokens.size(); i++) {
151
151
  llama_sampler_accept(sampler, last_tokens[i]);
@@ -284,7 +284,7 @@ static void test_perf() {
284
284
 
285
285
  data.reserve(n_vocab);
286
286
  for (int i = 0; i < n_vocab; i++) {
287
- const float logit = 2.0f*((float)(rand())/RAND_MAX - 0.5f);
287
+ const float logit = 2.0f*((double)(rand())/RAND_MAX - 0.5);
288
288
  data.emplace_back(llama_token_data{i, logit, 0.0f});
289
289
  }
290
290
 
@@ -1,72 +0,0 @@
1
- name: Nix aarch64 builds
2
-
3
- on:
4
- workflow_dispatch: # allows manual triggering
5
- schedule:
6
- # Rebuild daily rather than on every push because QEMU is expensive (e.g.
7
- # 1.5h instead of minutes with the cold cache).
8
- #
9
- # randint(0, 59), randint(0, 23)
10
- - cron: '26 12 * * *'
11
- # But also rebuild if we touched any of the Nix expressions:
12
- push:
13
- branches:
14
- - master
15
- paths: ['**/*.nix', 'flake.lock']
16
- pull_request:
17
- types: [opened, synchronize, reopened]
18
- paths: ['**/*.nix', 'flake.lock']
19
-
20
- concurrency:
21
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
22
- cancel-in-progress: true
23
-
24
- # Fine-grant permission
25
- # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26
- permissions:
27
- # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28
- id-token: write
29
- contents: read
30
-
31
- jobs:
32
- nix-build-aarch64:
33
- runs-on: ubuntu-latest
34
- steps:
35
- - name: Checkout repository
36
- uses: actions/checkout@v4
37
- - name: Install QEMU
38
- # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
39
- run: |
40
- sudo apt-get update
41
- sudo apt-get install -y qemu-user-static qemu-system-aarch64
42
- sudo usermod -a -G kvm $USER
43
- - name: Install Nix
44
- uses: DeterminateSystems/nix-installer-action@v9
45
- with:
46
- github-token: ${{ secrets.GITHUB_TOKEN }}
47
- extra-conf: |
48
- extra-platforms = aarch64-linux
49
- extra-system-features = nixos-test kvm
50
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
51
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
52
- - uses: DeterminateSystems/magic-nix-cache-action@v2
53
- with:
54
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
55
- - name: Set-up cachix to push the results to
56
- uses: cachix/cachix-action@v13
57
- with:
58
- authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
59
- name: llama-cpp
60
- - name: Show all output paths
61
- run: >
62
- nix run github:nix-community/nix-eval-jobs
63
- -- --gc-roots-dir gcroot
64
- --flake
65
- ".#packages.aarch64-linux"
66
- - name: Build
67
- run: >
68
- nix run github:Mic92/nix-fast-build
69
- -- --skip-cached --no-nom
70
- --systems aarch64-linux
71
- --flake
72
- ".#checks.aarch64-linux"
@@ -1,79 +0,0 @@
1
- name: Nix CI
2
-
3
- on:
4
- workflow_dispatch: # allows manual triggering
5
- push:
6
- branches:
7
- - master
8
- pull_request:
9
- types: [opened, synchronize, reopened]
10
-
11
- concurrency:
12
- group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
13
- cancel-in-progress: true
14
-
15
- # Fine-grant permission
16
- # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17
- permissions:
18
- # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19
- id-token: write
20
- contents: read
21
-
22
- jobs:
23
- nix-eval:
24
- strategy:
25
- fail-fast: false
26
- matrix:
27
- os: [ ubuntu-latest, macos-latest ]
28
- runs-on: ${{ matrix.os }}
29
- steps:
30
- - name: Checkout repository
31
- uses: actions/checkout@v4
32
- - name: Install Nix
33
- uses: DeterminateSystems/nix-installer-action@v9
34
- with:
35
- github-token: ${{ secrets.GITHUB_TOKEN }}
36
- extra-conf: |
37
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
38
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
39
- - uses: DeterminateSystems/magic-nix-cache-action@v2
40
- with:
41
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
42
- - name: List all flake outputs
43
- run: nix flake show --all-systems
44
- - name: Show all output paths
45
- run: >
46
- nix run github:nix-community/nix-eval-jobs
47
- -- --gc-roots-dir gcroot
48
- --flake
49
- ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
50
- nix-build:
51
- strategy:
52
- fail-fast: false
53
- matrix:
54
- os: [ ubuntu-latest, macos-latest ]
55
- runs-on: ${{ matrix.os }}
56
- steps:
57
- - name: Checkout repository
58
- uses: actions/checkout@v4
59
- - name: Install Nix
60
- uses: DeterminateSystems/nix-installer-action@v9
61
- with:
62
- github-token: ${{ secrets.GITHUB_TOKEN }}
63
- extra-conf: |
64
- extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
65
- extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
66
- - uses: DeterminateSystems/magic-nix-cache-action@v2
67
- with:
68
- upstream-cache: https://${{ matrix.cachixName }}.cachix.org
69
- - name: Set-up cachix to push the results to
70
- uses: cachix/cachix-action@v13
71
- with:
72
- authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
73
- name: llama-cpp
74
- - name: Build
75
- run: >
76
- nix run github:Mic92/nix-fast-build
77
- -- --skip-cached --no-nom
78
- --flake
79
- ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
@@ -1,22 +0,0 @@
1
- name: update-flake-lock
2
- on:
3
- workflow_dispatch:
4
- schedule:
5
- - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
6
-
7
- jobs:
8
- lockfile:
9
- runs-on: ubuntu-latest
10
- steps:
11
- - name: Checkout repository
12
- uses: actions/checkout@v4
13
- - name: Install Nix
14
- uses: DeterminateSystems/nix-installer-action@main
15
- - name: Update flake.lock
16
- uses: DeterminateSystems/update-flake-lock@main
17
- with:
18
- pr-title: "nix: update flake.lock"
19
- pr-labels: |
20
- nix
21
- pr-reviewers: philiptaron,SomeoneSerge
22
- token: ${{ secrets.FLAKE_TOKEN }}
@@ -1,36 +0,0 @@
1
- # Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
2
- name: "Publish a flake to flakestry & flakehub"
3
- on:
4
- push:
5
- tags:
6
- - "*"
7
- workflow_dispatch:
8
- inputs:
9
- tag:
10
- description: "The existing tag to publish"
11
- type: "string"
12
- required: true
13
- jobs:
14
- flakestry-publish:
15
- runs-on: ubuntu-latest
16
- permissions:
17
- id-token: "write"
18
- contents: "read"
19
- steps:
20
- - uses: flakestry/flakestry-publish@main
21
- with:
22
- version: "${{ inputs.tag || github.ref_name }}"
23
- flakehub-publish:
24
- runs-on: "ubuntu-latest"
25
- permissions:
26
- id-token: "write"
27
- contents: "read"
28
- steps:
29
- - uses: "actions/checkout@v4"
30
- with:
31
- ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
32
- - uses: "DeterminateSystems/nix-installer-action@main"
33
- - uses: "DeterminateSystems/flakehub-push@main"
34
- with:
35
- visibility: "public"
36
- tag: "${{ inputs.tag }}"
@@ -1,25 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- // buffer_type API
12
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
13
-
14
- GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);
15
-
16
- // backend API
17
- GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);
18
-
19
- GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
20
-
21
- GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);
22
-
23
- #ifdef __cplusplus
24
- }
25
- #endif
@@ -1,129 +0,0 @@
1
- #define GGML_COMMON_DECL_C
2
- #include "ggml-common.h"
3
-
4
- #include "ggml-aarch64.h"
5
- #include "ggml-impl.h"
6
- #include "ggml-quants.h"
7
- #include <assert.h>
8
-
9
- #define UNUSED GGML_UNUSED
10
-
11
- static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
12
- block_q4_0x4 out;
13
-
14
- for (int i = 0; i < 4; i++) {
15
- out.d[i] = in[i].d;
16
- }
17
-
18
- const int end = QK4_0 * 2 / blck_size_interleave;
19
-
20
- if (blck_size_interleave == 8) {
21
- const uint64_t xor_mask = 0x8888888888888888ULL;
22
- for (int i = 0; i < end; ++i) {
23
- int src_id = i % 4;
24
- int src_offset = (i / 4) * blck_size_interleave;
25
- int dst_offset = i * blck_size_interleave;
26
-
27
- uint64_t elems;
28
- // Using memcpy to avoid unaligned memory accesses
29
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
30
- elems ^= xor_mask;
31
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
32
- }
33
- } else if (blck_size_interleave == 4) {
34
- const uint32_t xor_mask = 0x88888888;
35
- for (int i = 0; i < end; ++i) {
36
- int src_id = i % 4;
37
- int src_offset = (i / 4) * blck_size_interleave;
38
- int dst_offset = i * blck_size_interleave;
39
-
40
- uint32_t elems;
41
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
42
- elems ^= xor_mask;
43
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
44
- }
45
- } else {
46
- GGML_ASSERT(false);
47
- }
48
-
49
- return out;
50
- }
51
-
52
- // interleave 8 block_q4_0s in blocks of blck_size_interleave
53
- // returns an interleaved block_q4_0x8
54
- // in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
55
- // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
56
- static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
57
- block_q4_0x8 out;
58
-
59
- for (int i = 0; i < 8; i++) {
60
- out.d[i] = in[i].d;
61
- }
62
-
63
- const int end = QK4_0 * 4 / blck_size_interleave;
64
- const uint64_t xor_mask = 0x8888888888888888ULL;
65
-
66
- for (int i = 0; i < end; ++i) {
67
- int src_id = i % 8;
68
- int src_offset = (i / 8) * blck_size_interleave;
69
- int dst_offset = i * blck_size_interleave;
70
-
71
- uint64_t elems;
72
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
73
- elems ^= xor_mask;
74
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
75
- }
76
-
77
- return out;
78
- }
79
-
80
- static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
81
- assert(n_per_row % QK4_0 == 0);
82
- const int nb = n_per_row / QK4_0;
83
-
84
- void * out_ptr = NULL;
85
- if (nrows_interleaved == 8) {
86
- out_ptr = (block_q4_0x8 *) dst;
87
- }
88
- else if (nrows_interleaved == 4) {
89
- out_ptr = (block_q4_0x4 *) dst;
90
- }
91
- assert(nrows_interleaved <= 8);
92
- block_q4_0 dst_tmp[8];
93
-
94
- for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
95
-
96
- for (int64_t x = 0; x < nb; x++) {
97
-
98
- for (int i = 0; i < nrows_interleaved; i++ ) {
99
- quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
100
- }
101
-
102
- if (nrows_interleaved == 8) {
103
- *(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
104
- out_ptr = (block_q4_0x8 *) out_ptr + 1;
105
- }
106
- else if (nrows_interleaved == 4) {
107
- *(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
108
- out_ptr = (block_q4_0x4 *) out_ptr + 1;
109
- }
110
- }
111
- }
112
-
113
- return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
114
- }
115
-
116
- size_t quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
117
- UNUSED(quant_weights);
118
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
119
- }
120
-
121
- size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
122
- UNUSED(quant_weights);
123
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
124
- }
125
-
126
- size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
127
- UNUSED(quant_weights);
128
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
129
- }
@@ -1,19 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
-
5
- // GGML internal header
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- // Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
12
- size_t quantize_q4_0_4x4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
13
- size_t quantize_q4_0_4x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
14
- size_t quantize_q4_0_8x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
15
-
16
- #ifdef __cplusplus
17
- }
18
- #endif
19
-
@@ -1,107 +0,0 @@
1
- if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
2
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
3
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$") AND
4
- CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
5
- message(STATUS "Using AMX")
6
-
7
- file(GLOB GGML_HEADERS_AMX "*.h")
8
- list(APPEND GGML_HEADERS_AMX "../../include/ggml-amx.h")
9
-
10
- file(GLOB GGML_SOURCES_AMX "*.cpp")
11
-
12
- add_library(ggml-amx
13
- ${GGML_HEADERS_AMX}
14
- ${GGML_SOURCES_AMX})
15
-
16
- target_link_libraries(ggml-amx PRIVATE ggml-base)
17
- target_include_directories(ggml-amx PRIVATE . ..)
18
-
19
- # this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags
20
- # TODO: integrate AMX backend into the CPU backend
21
- if (MSVC)
22
- # instruction set detection for MSVC only
23
- if (GGML_NATIVE)
24
- # TODO: improve, should not reference files from the parent folder
25
- include(../ggml-cpu/cmake/FindSIMD.cmake)
26
- endif ()
27
- if (GGML_AVX512)
28
- list(APPEND ARCH_FLAGS /arch:AVX512)
29
- # MSVC has no compile-time flags enabling specific
30
- # AVX512 extensions, neither it defines the
31
- # macros corresponding to the extensions.
32
- # Do it manually.
33
- if (GGML_AVX512_VBMI)
34
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
35
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
36
- endif()
37
- if (GGML_AVX512_VNNI)
38
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
39
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
40
- endif()
41
- if (GGML_AVX512_BF16)
42
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
43
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
44
- endif()
45
- if (GGML_AMX_TILE)
46
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
47
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
48
- endif()
49
- if (GGML_AMX_INT8)
50
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
51
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
52
- endif()
53
- if (GGML_AMX_BF16)
54
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
55
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
56
- endif()
57
- elseif (GGML_AVX2)
58
- list(APPEND ARCH_FLAGS /arch:AVX2)
59
- elseif (GGML_AVX)
60
- list(APPEND ARCH_FLAGS /arch:AVX)
61
- endif()
62
- else()
63
- if (GGML_NATIVE)
64
- list(APPEND ARCH_FLAGS -march=native)
65
- endif()
66
- if (GGML_F16C)
67
- list(APPEND ARCH_FLAGS -mf16c)
68
- endif()
69
- if (GGML_FMA)
70
- list(APPEND ARCH_FLAGS -mfma)
71
- endif()
72
- if (GGML_AVX)
73
- list(APPEND ARCH_FLAGS -mavx)
74
- endif()
75
- if (GGML_AVX2)
76
- list(APPEND ARCH_FLAGS -mavx2)
77
- endif()
78
- if (GGML_AVX512)
79
- list(APPEND ARCH_FLAGS -mavx512f)
80
- list(APPEND ARCH_FLAGS -mavx512dq)
81
- list(APPEND ARCH_FLAGS -mavx512bw)
82
- endif()
83
- if (GGML_AVX512_VBMI)
84
- list(APPEND ARCH_FLAGS -mavx512vbmi)
85
- endif()
86
- if (GGML_AVX512_VNNI)
87
- list(APPEND ARCH_FLAGS -mavx512vnni)
88
- endif()
89
- if (GGML_AVX512_BF16)
90
- list(APPEND ARCH_FLAGS -mavx512bf16)
91
- endif()
92
- if (GGML_AMX_TILE)
93
- list(APPEND ARCH_FLAGS -mamx-tile)
94
- endif()
95
- if (GGML_AMX_INT8)
96
- list(APPEND ARCH_FLAGS -mamx-int8)
97
- endif()
98
- if (GGML_AMX_BF16)
99
- list(APPEND ARCH_FLAGS -mamx-bf16)
100
- endif()
101
- endif()
102
-
103
- target_compile_options(ggml-amx PRIVATE ${ARCH_FLAGS})
104
- else()
105
- set(GGML_AMX OFF PARENT_SCOPE)
106
- message(WARNING "AMX requires x86 and gcc version > 11.0. Turning off GGML_AMX.")
107
- endif()