@fugood/llama.node 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +29 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +17 -1
  21. package/src/LlamaContext.cpp +86 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -2,11 +2,18 @@
2
2
  #include "ggml-backend-impl.h"
3
3
  #include "ggml-cpu.h"
4
4
  #include "ggml-cpu-aarch64.h"
5
+ #include "ggml-cpu-traits.h"
5
6
  #include "ggml-impl.h"
7
+ #include "amx/amx.h"
8
+
6
9
  #include <cctype>
7
10
  #include <string>
8
11
  #include <vector>
9
12
 
13
+ #ifdef GGML_USE_CPU_HBM
14
+ #include "ggml-cpu-hbm.h"
15
+ #endif
16
+
10
17
  #if defined(__APPLE__)
11
18
  #include <sys/types.h>
12
19
  #include <sys/sysctl.h>
@@ -22,124 +29,20 @@
22
29
 
23
30
  // ggml-backend interface
24
31
 
25
- #ifdef GGML_USE_CPU_HBM
26
-
27
- // buffer type HBM
28
-
29
- #include <hbwmalloc.h>
30
-
31
- static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
32
- return "CPU_HBM";
33
-
34
- GGML_UNUSED(buft);
35
- }
36
-
37
- static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
38
- hbw_free(buffer->context);
39
- }
40
-
41
- static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
42
- void * ptr;
43
- int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
44
- if (result != 0) {
45
- GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
46
- return NULL;
47
- }
48
-
49
- ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
50
- buffer->buft = buft;
51
- buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer;
52
-
53
- return buffer;
54
- }
55
-
56
- ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
57
- static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
58
- /* .iface = */ {
59
- /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
60
- /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
61
- /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
62
- /* .get_max_size = */ NULL, // defaults to SIZE_MAX
63
- /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
64
- /* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
65
- },
66
- /* .context = */ NULL,
67
- };
68
-
69
- return &ggml_backend_cpu_buffer_type_hbm;
70
- }
71
- #endif
72
-
73
- // buffer type AARCH64
74
-
75
- static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
76
- tensor->extra = (void *)ggml_aarch64_get_optimal_repack_type(tensor); // NOLINT
77
-
78
- GGML_UNUSED(buffer);
79
- }
80
-
81
- static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
82
- GGML_ASSERT(offset == 0);
83
- GGML_ASSERT(size == ggml_nbytes(tensor));
84
-
85
- enum ggml_type repack_type = (enum ggml_type)(intptr_t)tensor->extra;
86
-
87
- ggml_aarch64_repack_tensor(tensor, repack_type, data, size);
88
-
89
- GGML_UNUSED(buffer);
90
- }
91
-
92
- static const char * ggml_backend_cpu_aarch64_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
93
- return "CPU_AARCH64";
94
-
95
- GGML_UNUSED(buft);
96
- }
97
-
98
- static ggml_backend_buffer_t ggml_backend_cpu_aarch64_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
99
- auto * buffer = ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
100
-
101
- if (buffer == NULL) {
102
- return NULL;
103
- }
104
-
105
- buffer->buft = buft;
106
- buffer->iface.init_tensor = ggml_backend_cpu_aarch64_buffer_init_tensor;
107
- buffer->iface.set_tensor = ggml_backend_cpu_aarch64_buffer_set_tensor;
108
-
109
- return buffer;
110
- }
111
-
112
- ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
113
- static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_aarch64 = {
114
- /* .iface = */ {
115
- /* .get_name = */ ggml_backend_cpu_aarch64_buffer_type_get_name,
116
- /* .alloc_buffer = */ ggml_backend_cpu_aarch64_buffer_type_alloc_buffer,
117
- /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
118
- /* .get_max_size = */ NULL, // defaults to SIZE_MAX
119
- /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
120
- /* .is_host = */ NULL,
121
- },
122
- /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
123
- /* .context = */ NULL,
124
- };
125
-
126
- return &ggml_backend_cpu_buffer_type_aarch64;
127
- }
128
-
129
- bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft) {
130
- return buft == ggml_backend_cpu_aarch64_buffer_type();
131
- }
132
-
133
- static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backend_dev_t device) {
32
+ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
134
33
  static std::vector<ggml_backend_buffer_type_t> bufts = []() {
135
34
  std::vector<ggml_backend_buffer_type_t> bufts;
136
35
 
137
- #ifdef GGML_USE_CPU_HBM
138
- bufts.push_back(ggml_backend_cpu_hbm_buffer_type());
36
+ #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
37
+ if (ggml_backend_amx_buffer_type()) {
38
+ bufts.push_back(ggml_backend_amx_buffer_type());
39
+ }
139
40
  #endif
140
41
 
141
42
  #ifdef GGML_USE_CPU_AARCH64
142
- bufts.push_back(ggml_backend_cpu_aarch64_buffer_type());
43
+ if (ggml_backend_cpu_aarch64_buffer_type()) {
44
+ bufts.push_back(ggml_backend_cpu_aarch64_buffer_type());
45
+ }
143
46
  #endif
144
47
 
145
48
  bufts.push_back(NULL);
@@ -147,11 +50,22 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backen
147
50
  return bufts;
148
51
  }();
149
52
 
150
- return bufts.data();
53
+ return bufts;
54
+ }
55
+
56
+ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
57
+ return ggml_backend_cpu_get_extra_buffers_type().data();
151
58
 
152
59
  GGML_UNUSED(device);
153
60
  }
154
61
 
62
+ static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
63
+ for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
64
+ if (extra && extra == buft) return true;
65
+ }
66
+ return false;
67
+ }
68
+
155
69
  // CPU backend - backend (stream)
156
70
 
157
71
  struct ggml_backend_cpu_context {
@@ -456,14 +370,23 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
456
370
  const struct ggml_tensor * src0 = op->src[0];
457
371
  const struct ggml_tensor * src1 = op->src[1];
458
372
 
459
- if (src0 && src0->buffer && ggml_backend_cpu_buft_is_aarch64(src0->buffer->buft)) {
460
- if (op->op != GGML_OP_MUL_MAT || src0->type != GGML_TYPE_Q4_0 || ggml_aarch64_get_optimal_repack_type(src0) == GGML_TYPE_Q4_0) {
461
- return false;
373
+ if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
374
+ return true;
375
+ }
376
+
377
+ // extra_buffer_op?
378
+ for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
379
+ if (extra) {
380
+ auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
381
+ if (buf_extra && buf_extra->supports_op(dev, op)) {
382
+ return true;
383
+ }
462
384
  }
463
385
  }
464
386
 
465
- for (int i = 1; i < GGML_MAX_SRC; i++) {
466
- if (op->src[i] && op->src[i]->buffer && ggml_backend_cpu_buft_is_aarch64(op->src[i]->buffer->buft)) {
387
+ // the other case need host buffer.
388
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
389
+ if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
467
390
  return false;
468
391
  }
469
392
  }
@@ -471,8 +394,11 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
471
394
  switch (op->op) {
472
395
  case GGML_OP_CPY:
473
396
  return
397
+ op->type != GGML_TYPE_IQ3_XXS &&
398
+ op->type != GGML_TYPE_IQ3_S &&
474
399
  op->type != GGML_TYPE_IQ2_XXS &&
475
400
  op->type != GGML_TYPE_IQ2_XS &&
401
+ op->type != GGML_TYPE_IQ2_S &&
476
402
  op->type != GGML_TYPE_IQ1_S &&
477
403
  op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
478
404
  case GGML_OP_MUL_MAT:
@@ -486,13 +412,10 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
486
412
  default:
487
413
  return true;
488
414
  }
489
-
490
- GGML_UNUSED(dev);
491
415
  }
492
416
 
493
417
  static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
494
- return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_buft_is_aarch64(buft);
495
-
418
+ return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
496
419
  GGML_UNUSED(dev);
497
420
  }
498
421
 
@@ -541,16 +464,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
541
464
  return &ggml_backend_cpu_device;
542
465
  }
543
466
 
544
- struct ggml_backend_feature {
545
- const char * name;
546
- const char * value;
547
- };
548
-
549
- // Not used yet
550
467
  // This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
551
- // and additionally to allow other backends to expose their own list of features that applications can query using the same API.
468
+ // and additionally to allow other backends to expose their own list of features that applications can query using the same API
552
469
  static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
553
470
  static std::vector<ggml_backend_feature> features = []() {
471
+ ggml_cpu_init();
472
+
554
473
  std::vector<ggml_backend_feature> features;
555
474
  if (ggml_cpu_has_sse3()) {
556
475
  features.push_back({ "SSE3", "1" });
@@ -561,6 +480,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
561
480
  if (ggml_cpu_has_avx()) {
562
481
  features.push_back({ "AVX", "1" });
563
482
  }
483
+ if (ggml_cpu_has_avx_vnni()) {
484
+ features.push_back({ "AVX_VNNI", "1" });
485
+ }
564
486
  if (ggml_cpu_has_avx2()) {
565
487
  features.push_back({ "AVX2", "1" });
566
488
  }
@@ -570,9 +492,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
570
492
  if (ggml_cpu_has_fma()) {
571
493
  features.push_back({ "FMA", "1" });
572
494
  }
573
- if (ggml_cpu_has_avx_vnni()) {
574
- features.push_back({ "AVX_VNNI", "1" });
575
- }
576
495
  if (ggml_cpu_has_avx512()) {
577
496
  features.push_back({ "AVX512", "1" });
578
497
  }
@@ -603,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
603
522
  if (ggml_cpu_has_sve()) {
604
523
  features.push_back({ "SVE", "1" });
605
524
  }
525
+ if (ggml_cpu_has_dotprod()) {
526
+ features.push_back({ "DOTPROD", "1" });
527
+ }
528
+ if (ggml_cpu_has_matmul_int8()) {
529
+ features.push_back({ "MATMUL_INT8", "1" });
530
+ }
606
531
  if (ggml_cpu_get_sve_cnt() > 0) {
607
532
  static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
608
533
  features.push_back({ "SVE_CNT", sve_cnt.c_str() });
@@ -619,6 +544,18 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
619
544
  if (ggml_cpu_has_llamafile()) {
620
545
  features.push_back({ "LLAMAFILE", "1" });
621
546
  }
547
+ #ifdef GGML_USE_ACCELERATE
548
+ features.push_back({ "ACCELERATE", "1" });
549
+ #endif
550
+ #ifdef GGML_USE_CPU_HBM
551
+ features.push_back({ "CPU_HBM", "1" });
552
+ #endif
553
+ #ifdef GGML_USE_OPENMP
554
+ features.push_back({ "OPENMP", "1" });
555
+ #endif
556
+ #ifdef GGML_USE_CPU_AARCH64
557
+ features.push_back({ "AARCH64_REPACK", "1" });
558
+ #endif
622
559
 
623
560
  features.push_back({ nullptr, nullptr });
624
561
 
@@ -632,10 +569,35 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
632
569
 
633
570
  static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
634
571
  if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
635
- return (void *)ggml_backend_cpu_set_n_threads;
572
+ ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
573
+ return (void *)fct;
636
574
  }
637
575
  if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
638
- return (void *)ggml_backend_cpu_get_extra_bufts;
576
+ ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
577
+ return (void *)fct;
578
+ }
579
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
580
+ return (void *)ggml_backend_cpu_get_features;
581
+ }
582
+ if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
583
+ return (void *)ggml_backend_cpu_set_abort_callback;
584
+ }
585
+ if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
586
+ return (void *)ggml_numa_init;
587
+ }
588
+ if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
589
+ return (void *)ggml_is_numa;
590
+ }
591
+
592
+ // threadpool - TODO: move to ggml-base
593
+ if (strcmp(name, "ggml_threadpool_new") == 0) {
594
+ return (void *)ggml_threadpool_new;
595
+ }
596
+ if (strcmp(name, "ggml_threadpool_free") == 0) {
597
+ return (void *)ggml_threadpool_free;
598
+ }
599
+ if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
600
+ return (void *)ggml_backend_cpu_set_threadpool;
639
601
  }
640
602
 
641
603
  return NULL;
@@ -655,9 +617,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
655
617
  ggml_cpu_init();
656
618
 
657
619
  static struct ggml_backend_reg ggml_backend_cpu_reg = {
658
- /* .iface = */ ggml_backend_cpu_reg_i,
659
- /* .context = */ NULL,
620
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
621
+ /* .iface = */ ggml_backend_cpu_reg_i,
622
+ /* .context = */ NULL,
660
623
  };
661
624
 
662
625
  return &ggml_backend_cpu_reg;
663
626
  }
627
+
628
+ GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
@@ -50,8 +50,7 @@
50
50
 
51
51
  #include "sgemm.h"
52
52
  #include "ggml-impl.h"
53
- // hack until moved into the CPU backend
54
- #include "../ggml-cpu-impl.h"
53
+ #include "ggml-cpu-impl.h"
55
54
  #include "ggml-quants.h"
56
55
 
57
56
  #ifdef _MSC_VER
@@ -205,6 +204,7 @@ template <> inline float32x4_t load(const float *p) {
205
204
  return vld1q_f32(p);
206
205
  }
207
206
  #if !defined(_MSC_VER)
207
+ // FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
208
208
  template <> inline float16x8_t load(const ggml_fp16_t *p) {
209
209
  return vld1q_f16((const float16_t *)p);
210
210
  }
@@ -12,7 +12,7 @@ if (CUDAToolkit_FOUND)
12
12
  # 61 == Pascal, __dp4a instruction (per-byte integer dot product)
13
13
  # 70 == V100, FP16 tensor cores
14
14
  # 75 == Turing, int8 tensor cores
15
- if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6")
15
+ if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
16
16
  set(CMAKE_CUDA_ARCHITECTURES "native")
17
17
  elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
18
18
  set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
@@ -46,13 +46,10 @@ if (CUDAToolkit_FOUND)
46
46
  list(APPEND GGML_SOURCES_CUDA ${SRCS})
47
47
  endif()
48
48
 
49
- add_library(ggml-cuda
50
- ${GGML_HEADERS_CUDA}
51
- ${GGML_SOURCES_CUDA}
52
- )
53
-
54
- target_link_libraries(ggml-cuda PRIVATE ggml-base)
55
- target_include_directories(ggml-cuda PRIVATE . ..)
49
+ ggml_add_backend_library(ggml-cuda
50
+ ${GGML_HEADERS_CUDA}
51
+ ${GGML_SOURCES_CUDA}
52
+ )
56
53
 
57
54
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
58
55
 
@@ -135,7 +132,7 @@ if (CUDAToolkit_FOUND)
135
132
 
136
133
  message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
137
134
 
138
- get_flags(${CUDA_CCID} ${CUDA_CCVER})
135
+ ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
139
136
  list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
140
137
  endif()
141
138
 
@@ -149,7 +146,7 @@ if (CUDAToolkit_FOUND)
149
146
  list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
150
147
  endif()
151
148
 
152
- add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
149
+ target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
153
150
  else()
154
151
  message(FATAL_ERROR "CUDA Toolkit not found")
155
152
  endif()
@@ -95,6 +95,14 @@
95
95
 
96
96
  #define __CUDA_ARCH__ 1300
97
97
 
98
+ #if defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__)
99
+ #define GCN
100
+ #endif
101
+
102
+ #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__)
103
+ #define CDNA
104
+ #endif
105
+
98
106
  #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \
99
107
  defined(__gfx1150__) || defined(__gfx1151__)
100
108
  #define RDNA3
@@ -64,12 +64,10 @@ else()
64
64
  list(APPEND GGML_SOURCES_ROCM ${SRCS})
65
65
  endif()
66
66
 
67
- add_library(ggml-hip
68
- ${GGML_HEADERS_ROCM}
69
- ${GGML_SOURCES_ROCM})
70
-
71
- target_link_libraries(ggml-hip PRIVATE ggml-base)
72
- target_include_directories(ggml-hip PRIVATE . ..)
67
+ ggml_add_backend_library(ggml-hip
68
+ ${GGML_HEADERS_ROCM}
69
+ ${GGML_SOURCES_ROCM}
70
+ )
73
71
 
74
72
  # TODO: do not use CUDA definitions for HIP
75
73
  target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
@@ -14,7 +14,7 @@
14
14
  #include <arm_sve.h>
15
15
  #endif // __ARM_FEATURE_SVE
16
16
 
17
- #if defined(__ARM_NEON)
17
+ #if defined(__ARM_NEON) && !defined(__CUDACC__)
18
18
  // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
19
19
  //
20
20
  // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
@@ -30,11 +30,13 @@
30
30
  extern "C" {
31
31
  #endif
32
32
 
33
- #undef MIN
34
- #undef MAX
33
+ #ifndef MIN
34
+ # define MIN(a, b) ((a) < (b) ? (a) : (b))
35
+ #endif
35
36
 
36
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
37
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
37
+ #ifndef MAX
38
+ # define MAX(a, b) ((a) > (b) ? (a) : (b))
39
+ #endif
38
40
 
39
41
  // required for mmap as gguf only guarantees 32-byte alignment
40
42
  #define TENSOR_ALIGNMENT 32
@@ -72,8 +74,8 @@ static inline int ggml_up(int n, int m) {
72
74
  //
73
75
 
74
76
  GGML_ATTRIBUTE_FORMAT(2, 3)
75
- void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
76
- void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
77
+ GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
78
+ GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
77
79
 
78
80
  #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
79
81
  #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
@@ -295,24 +297,27 @@ struct ggml_cgraph {
295
297
  enum ggml_cgraph_eval_order order;
296
298
  };
297
299
 
300
+ // returns a slice of cgraph with nodes [i0, i1)
301
+ // the slice does not have leafs or gradients
302
+ // if you need the gradients, get them from the original graph
298
303
  struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
299
304
 
300
305
  // Memory allocation
301
306
 
302
- void * ggml_aligned_malloc(size_t size);
303
- void ggml_aligned_free(void * ptr, size_t size);
307
+ GGML_API void * ggml_aligned_malloc(size_t size);
308
+ GGML_API void ggml_aligned_free(void * ptr, size_t size);
304
309
 
305
310
  // FP16 to FP32 conversion
306
311
 
307
312
  #if defined(__ARM_NEON)
308
- #ifdef _MSC_VER
313
+ #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
309
314
  typedef uint16_t ggml_fp16_internal_t;
310
315
  #else
311
316
  typedef __fp16 ggml_fp16_internal_t;
312
317
  #endif
313
318
  #endif
314
319
 
315
- #if defined(__ARM_NEON) && !defined(_MSC_VER)
320
+ #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
316
321
  #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
317
322
  #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
318
323
 
@@ -546,6 +551,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
546
551
  #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
547
552
  #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
548
553
 
554
+ // expose GGUF internals for test code
555
+
556
+ GGML_API size_t gguf_type_size(enum gguf_type type);
557
+
558
+ GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
559
+
560
+ struct gguf_buf {
561
+ void * data;
562
+ size_t size;
563
+ size_t offset;
564
+ };
565
+ GGML_API struct gguf_buf gguf_buf_init(size_t size);
566
+ GGML_API void gguf_buf_free(struct gguf_buf buf);
567
+
568
+ GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
569
+
549
570
  #ifdef __cplusplus
550
571
  }
551
572
  #endif
@@ -6,13 +6,13 @@ if (NOT glslc_executable)
6
6
  message(FATAL_ERROR "glslc not found")
7
7
  endif()
8
8
 
9
- add_library(ggml-kompute
10
- ggml-kompute.cpp
11
- ../../include/ggml-kompute.h
12
- )
9
+ ggml_add_backend_library(ggml-kompute
10
+ ggml-kompute.cpp
11
+ ../../include/ggml-kompute.h
12
+ )
13
13
 
14
14
  target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
15
- target_include_directories(ggml-kompute PRIVATE . .. ${CMAKE_CURRENT_BINARY_DIR})
15
+ target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
16
16
 
17
17
  add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
18
18
 
@@ -105,8 +105,10 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
105
105
  kompute-shaders/op_getrows_q4_0.comp
106
106
  kompute-shaders/op_getrows_q4_1.comp
107
107
  kompute-shaders/op_getrows_q6_k.comp
108
- kompute-shaders/op_rope_f16.comp
109
- kompute-shaders/op_rope_f32.comp
108
+ kompute-shaders/op_rope_norm_f16.comp
109
+ kompute-shaders/op_rope_norm_f32.comp
110
+ kompute-shaders/op_rope_neox_f16.comp
111
+ kompute-shaders/op_rope_neox_f32.comp
110
112
  kompute-shaders/op_cpy_f16_f16.comp
111
113
  kompute-shaders/op_cpy_f16_f32.comp
112
114
  kompute-shaders/op_cpy_f32_f16.comp
@@ -139,8 +141,10 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
139
141
  shaderop_getrows_q4_0.h
140
142
  shaderop_getrows_q4_1.h
141
143
  shaderop_getrows_q6_k.h
142
- shaderop_rope_f16.h
143
- shaderop_rope_f32.h
144
+ shaderop_rope_norm_f16.h
145
+ shaderop_rope_norm_f32.h
146
+ shaderop_rope_neox_f16.h
147
+ shaderop_rope_neox_f32.h
144
148
  shaderop_cpy_f16_f16.h
145
149
  shaderop_cpy_f16_f32.h
146
150
  shaderop_cpy_f32_f16.h