@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -1,7 +1,3 @@
1
- if (NOT SOC_TYPE)
2
- set (SOC_TYPE "Ascend910B3")
3
- endif()
4
-
5
1
  file(GLOB SRC_FILES
6
2
  get_row_f32.cpp
7
3
  get_row_f16.cpp
@@ -13,7 +9,6 @@ file(GLOB SRC_FILES
13
9
  dup.cpp
14
10
  )
15
11
 
16
- string(TOLOWER ${SOC_TYPE} SOC_VERSION)
17
12
  set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
18
13
  set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
19
14
 
@@ -30,4 +25,6 @@ ascendc_library(ascendc_kernels STATIC
30
25
  ${SRC_FILES}
31
26
  )
32
27
 
28
+ message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
29
+ ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
33
30
  # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
@@ -5,6 +5,7 @@
5
5
  using namespace AscendC;
6
6
 
7
7
  #define BUFFER_NUM 2
8
+ const int64_t SUPPORTED_MAX_DIM = 65535; // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
8
9
 
9
10
  template <typename SRC_T, typename DST_T>
10
11
  class DupByRows {
@@ -51,24 +52,36 @@ class DupByRows {
51
52
 
52
53
  __aicore__ inline void copy_in() {
53
54
  LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
54
-
55
- DataCopyExtParams dataCopyParams;
56
- dataCopyParams.blockCount = 1;
57
- dataCopyParams.blockLen = num_elem * sizeof(SRC_T);
58
- DataCopyPadExtParams<SRC_T> padParams;
59
- DataCopyPad(src_local, src_gm, dataCopyParams, padParams);
60
-
55
+ const size_t elem_per_block = 32 / sizeof(SRC_T);
56
+ size_t tail = num_elem % elem_per_block;
57
+ size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
58
+ DataCopy(src_local, src_gm, cpy_elements_len);
61
59
  src_queue.EnQue(src_local);
62
60
  }
63
61
 
64
62
  __aicore__ inline void copy_out() {
65
63
  LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
66
-
64
+ #ifdef ASCEND_310P
65
+ const size_t elem_per_block = 32 / sizeof(DST_T);
66
+ size_t tail = num_elem % elem_per_block;
67
+ size_t len = num_elem & ~(elem_per_block - 1);
68
+ if (len > 0) {
69
+ DataCopy(dst_gm, dst_local, len);
70
+ }
71
+ if(tail != 0) {
72
+ for (size_t i = tail; i < elem_per_block; i++) {
73
+ dst_local[len + i].SetValue(0, 0);
74
+ }
75
+ SetAtomicAdd<float>();
76
+ DataCopy(dst_gm[len], dst_local[len], elem_per_block);
77
+ SetAtomicNone();
78
+ }
79
+ #else
67
80
  DataCopyExtParams dataCopyParams;
68
81
  dataCopyParams.blockCount = 1;
69
82
  dataCopyParams.blockLen = num_elem * sizeof(DST_T);
70
83
  DataCopyPad(dst_gm, dst_local, dataCopyParams);
71
-
84
+ #endif
72
85
  dst_queue.FreeTensor(dst_local);
73
86
  }
74
87
 
@@ -14,7 +14,7 @@ class GET_ROW_F16 {
14
14
  int64_t *output_ne_ub, size_t *output_nb_ub) {
15
15
  // TODO, use template for F16/f32
16
16
  int64_t op_block_num = GetBlockNum();
17
- int64_t op_block_idx = GetBlockIdx();
17
+ op_block_idx = GetBlockIdx();
18
18
 
19
19
  for (int i = 0; i < 4; i++) {
20
20
  input_ne[i] = input_ne_ub[i];
@@ -59,32 +59,42 @@ class GET_ROW_F16 {
59
59
  }
60
60
 
61
61
  __aicore__ inline void copy_in(uint32_t offset, size_t len) {
62
+ size_t origin_len = len;
62
63
  LocalTensor<half> input_local = input_queue.AllocTensor<half>();
63
- size_t tail = len % 32;
64
- len = len & ~31;
65
- DataCopy(input_local, input_gm[offset], len);
64
+ const size_t elem_per_block = 32 / sizeof(half);
65
+ size_t tail = len % elem_per_block;
66
+ len = len & ~(elem_per_block - 1);
66
67
  if(tail != 0) {
67
- DataCopyExtParams dataCopyParams;
68
- dataCopyParams.blockCount = 1;
69
- dataCopyParams.blockLen = tail * sizeof(half);
70
- DataCopyPadExtParams<half> padParams;
71
- DataCopyPad(input_local[len], input_gm[offset + len],
72
- dataCopyParams, padParams);
68
+ len += elem_per_block;
73
69
  }
70
+ DataCopy(input_local, input_gm[offset], len);
74
71
  input_queue.EnQue(input_local);
75
72
  }
76
73
 
77
74
  __aicore__ inline void copy_out(uint32_t offset, size_t len) {
78
75
  LocalTensor<float> output_local = output_queue.DeQue<float>();
79
- size_t tail = len % 32;
80
- len = len & ~31;
81
- DataCopy(output_gm[offset], output_local, len);
76
+ const size_t elem_per_block = 32 / sizeof(float);
77
+ size_t tail = len % elem_per_block;
78
+ len = len & ~(elem_per_block - 1);
79
+ if (len > 0) {
80
+ DataCopy(output_gm[offset], output_local, len);
81
+ }
82
+
82
83
  if(tail != 0) {
84
+ #ifdef ASCEND_310P
85
+ for (size_t i = tail; i < elem_per_block; i++) {
86
+ output_local[len + i].SetValue(0, 0);
87
+ }
88
+ SetAtomicAdd<float>();
89
+ DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
90
+ SetAtomicNone();
91
+ #else
83
92
  DataCopyExtParams dataCopyParams;
84
93
  dataCopyParams.blockCount = 1;
85
94
  dataCopyParams.blockLen = tail * sizeof(float);
86
95
  DataCopyPad(output_gm[offset + len], output_local[len],
87
96
  dataCopyParams);
97
+ #endif
88
98
  }
89
99
  output_queue.FreeTensor(output_local);
90
100
  }
@@ -150,6 +160,7 @@ class GET_ROW_F16 {
150
160
  GlobalTensor<float> output_gm;
151
161
  TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
152
162
  TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
163
+ int64_t op_block_idx;
153
164
  };
154
165
 
155
166
  template <typename T>
@@ -13,7 +13,7 @@ class GET_ROW_F32 {
13
13
  int64_t *indices_ne_ub, size_t *indices_nb_ub,
14
14
  int64_t *output_ne_ub, size_t *output_nb_ub) {
15
15
  int64_t op_block_num = GetBlockNum();
16
- int64_t op_block_idx = GetBlockIdx();
16
+ op_block_idx = GetBlockIdx();
17
17
 
18
18
  for (int i = 0; i < 4; i++) {
19
19
  input_ne[i] = input_ne_ub[i];
@@ -55,31 +55,40 @@ class GET_ROW_F32 {
55
55
 
56
56
  __aicore__ inline void copy_in(uint32_t offset, size_t len) {
57
57
  LocalTensor<float> input_local = input_queue.AllocTensor<float>();
58
- size_t tail = len % 32;
59
- len = len & ~31;
60
- DataCopy(input_local, input_gm[offset], len);
58
+ const size_t elem_per_block = 32 / sizeof(float);
59
+ size_t tail = len % elem_per_block;
60
+ len = len & ~(elem_per_block - 1);
61
61
  if(tail != 0) {
62
- DataCopyExtParams dataCopyParams;
63
- dataCopyParams.blockCount = 1;
64
- dataCopyParams.blockLen = tail * sizeof(float);
65
- DataCopyPadExtParams<float> padParams;
66
- DataCopyPad(input_local[len], input_gm[offset + len],
67
- dataCopyParams, padParams);
62
+ len += elem_per_block;
68
63
  }
64
+ DataCopy(input_local, input_gm[offset], len);
69
65
  input_queue.EnQue(input_local);
70
66
  }
71
67
 
72
68
  __aicore__ inline void copy_out(uint32_t offset, size_t len) {
73
69
  LocalTensor<float> output_local = output_queue.DeQue<float>();
74
- size_t tail = len % 32;
75
- len = len & ~31;
76
- DataCopy(output_gm[offset], output_local, len);
70
+ const size_t elem_per_block = 32 / sizeof(float);
71
+ size_t tail = len % elem_per_block;
72
+ len = len & ~(elem_per_block - 1);
73
+ if (len > 0) {
74
+ DataCopy(output_gm[offset], output_local, len);
75
+ }
76
+
77
77
  if(tail != 0) {
78
+ #ifdef ASCEND_310P
79
+ for (size_t i = tail; i < elem_per_block; i++) {
80
+ output_local[len + i].SetValue(0, 0);
81
+ }
82
+ SetAtomicAdd<float>();
83
+ DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
84
+ SetAtomicNone();
85
+ #else
78
86
  DataCopyExtParams dataCopyParams;
79
87
  dataCopyParams.blockCount = 1;
80
88
  dataCopyParams.blockLen = tail * sizeof(float);
81
89
  DataCopyPad(output_gm[offset + len], output_local[len],
82
90
  dataCopyParams);
91
+ #endif
83
92
  }
84
93
  output_queue.FreeTensor(output_local);
85
94
  }
@@ -144,6 +153,7 @@ class GET_ROW_F32 {
144
153
  GlobalTensor<float> output_gm;
145
154
  TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
146
155
  TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
156
+ int64_t op_block_idx;
147
157
  };
148
158
 
149
159
  template <typename T>
@@ -2,6 +2,15 @@
2
2
 
3
3
  // optimize me. Use template to avoid copy code.
4
4
  using namespace AscendC;
5
+ #ifdef ASCEND_310P // 310P not support 4bit get row
6
+ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
7
+ GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
8
+ GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
9
+ GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
10
+ // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
11
+ printf("Ascend310P not support 4bit get row.\n");
12
+ }
13
+ #else
5
14
 
6
15
  #define BUFFER_NUM 2
7
16
 
@@ -191,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
191
200
  indices_nb_ub, output_ne_ub, output_nb_ub);
192
201
  op.calculate();
193
202
  }
203
+
204
+ #endif // #ifdef ASCEND_310P
@@ -1,6 +1,14 @@
1
1
  #include "kernel_operator.h"
2
2
 
3
3
  using namespace AscendC;
4
+ #ifdef ASCEND_310P
5
+ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
6
+ GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7
+ GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8
+ // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9
+ printf("Ascend310P not support f16->8bit quantization.\n");
10
+ }
11
+ #else
4
12
 
5
13
  #define BUFFER_NUM 2
6
14
  #define QK8_0 32
@@ -206,3 +214,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
206
214
  op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
207
215
  op.calculate();
208
216
  }
217
+
218
+ #endif // #ifdef ASCEND_310P
@@ -1,6 +1,14 @@
1
1
  #include "kernel_operator.h"
2
2
 
3
3
  using namespace AscendC;
4
+ #ifdef ASCEND_310P // 310P not support f32->8bit quantization
5
+ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
6
+ GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7
+ GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8
+ // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9
+ printf("Ascend310P not support f32->8bit quantization.\n");
10
+ }
11
+ #else
4
12
 
5
13
  #define BUFFER_NUM 2
6
14
  #define QK8_0 32
@@ -204,3 +212,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
204
212
  op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
205
213
  op.calculate();
206
214
  }
215
+
216
+ #endif // #ifdef ASCEND_310P
@@ -1,6 +1,21 @@
1
1
  #include "kernel_operator.h"
2
2
 
3
3
  using namespace AscendC;
4
+ #ifdef ASCEND_310P // 310P not support float->4bit quantization
5
+ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
6
+ GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
7
+ GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
8
+ // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
9
+ printf("Ascend310P not support f32->4bit quantization.\n");
10
+ }
11
+
12
+ extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
13
+ GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
14
+ GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
15
+ // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
16
+ printf("Ascend310P not support f16->4bit quantization.\n");
17
+ }
18
+ #else
4
19
 
5
20
  #define BUFFER_NUM 2
6
21
  #define Group_Size 32
@@ -276,3 +291,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
276
291
  op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
277
292
  op.calculate();
278
293
  }
294
+
295
+ #endif // #ifdef ASCEND_310P
@@ -6,7 +6,20 @@
6
6
  typedef uint16_t ggml_half;
7
7
  typedef uint32_t ggml_half2;
8
8
 
9
- #define GGML_COMMON_AGGR
9
+ #define GGML_COMMON_AGGR_U
10
+ #define GGML_COMMON_AGGR_S
11
+
12
+ #define GGML_COMMON_DECL
13
+ #elif defined(GGML_COMMON_DECL_CPP)
14
+ #include <cstdint>
15
+
16
+ typedef uint16_t ggml_half;
17
+ typedef uint32_t ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define GGML_COMMON_AGGR_S data
10
23
 
11
24
  #define GGML_COMMON_DECL
12
25
  #elif defined(GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t ggml_half2;
15
28
  typedef half ggml_half;
16
29
  typedef half2 ggml_half2;
17
30
 
18
- #define GGML_COMMON_AGGR
31
+ #define GGML_COMMON_AGGR_U
32
+ #define GGML_COMMON_AGGR_S
19
33
 
20
34
  #define GGML_COMMON_DECL
21
35
  #elif defined(GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 ggml_half2;
29
43
  typedef half ggml_half;
30
44
  typedef half2 ggml_half2;
31
45
 
32
- #define GGML_COMMON_AGGR data
46
+ #define GGML_COMMON_AGGR_U
47
+ #define GGML_COMMON_AGGR_S data
33
48
 
34
49
  #define GGML_COMMON_DECL
35
50
  #elif defined(GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 ggml_half2;
39
54
  typedef half ggml_half;
40
55
  typedef half2 ggml_half2;
41
56
 
42
- #define GGML_COMMON_AGGR data
57
+ #define GGML_COMMON_AGGR_U
58
+ #define GGML_COMMON_AGGR_S data
43
59
 
44
60
  #define GGML_COMMON_DECL
45
61
  #elif defined(GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 ggml_half2;
49
65
  typedef sycl::half ggml_half;
50
66
  typedef sycl::half2 ggml_half2;
51
67
 
52
- #define GGML_COMMON_AGGR data
68
+ #define GGML_COMMON_AGGR_U
69
+ #define GGML_COMMON_AGGR_S data
53
70
 
54
71
  #define GGML_COMMON_DECL
55
72
  #endif
@@ -154,9 +171,9 @@ typedef struct {
154
171
  struct {
155
172
  ggml_half d; // delta
156
173
  ggml_half m; // min
157
- } GGML_COMMON_AGGR;
174
+ } GGML_COMMON_AGGR_S;
158
175
  ggml_half2 dm;
159
- };
176
+ } GGML_COMMON_AGGR_U;
160
177
  uint8_t qs[QK4_1 / 2]; // nibbles / quants
161
178
  } block_q4_1;
162
179
  static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175
192
  struct {
176
193
  ggml_half d; // delta
177
194
  ggml_half m; // min
178
- } GGML_COMMON_AGGR;
195
+ } GGML_COMMON_AGGR_S;
179
196
  ggml_half2 dm;
180
- };
197
+ } GGML_COMMON_AGGR_U;
181
198
  uint8_t qh[4]; // 5-th bit of quants
182
199
  uint8_t qs[QK5_1 / 2]; // nibbles / quants
183
200
  } block_q5_1;
@@ -196,37 +213,13 @@ typedef struct {
196
213
  struct {
197
214
  ggml_half d; // delta
198
215
  ggml_half s; // d * sum(qs[i])
199
- } GGML_COMMON_AGGR;
216
+ } GGML_COMMON_AGGR_S;
200
217
  ggml_half2 ds;
201
- };
218
+ } GGML_COMMON_AGGR_U;
202
219
  int8_t qs[QK8_1]; // quants
203
220
  } block_q8_1;
204
221
  static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205
222
 
206
- typedef struct {
207
- ggml_half d[4]; // deltas for 4 q4_0 blocks
208
- uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209
- } block_q4_0x4;
210
- static_assert(sizeof(block_q4_0x4) == 4 * sizeof(ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211
-
212
- typedef struct {
213
- ggml_half d[8]; // deltas for 8 q4_0 blocks
214
- uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215
- } block_q4_0x8;
216
- static_assert(sizeof(block_q4_0x8) == 8 * sizeof(ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217
-
218
- typedef struct {
219
- ggml_half d[4]; // deltas for 4 q8_0 blocks
220
- int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221
- } block_q8_0x4;
222
- static_assert(sizeof(block_q8_0x4) == 4 * sizeof(ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223
-
224
- typedef struct {
225
- ggml_half d[8]; // deltas for 8 q8_0 blocks
226
- int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227
- } block_q8_0x8;
228
- static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229
-
230
223
  //
231
224
  // Ternary quantization
232
225
  //
@@ -261,9 +254,9 @@ typedef struct {
261
254
  struct {
262
255
  ggml_half d; // super-block scale for quantized scales
263
256
  ggml_half dmin; // super-block scale for quantized mins
264
- } GGML_COMMON_AGGR;
257
+ } GGML_COMMON_AGGR_S;
265
258
  ggml_half2 dm;
266
- };
259
+ } GGML_COMMON_AGGR_U;
267
260
  } block_q2_K;
268
261
  static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
262
 
@@ -288,9 +281,9 @@ typedef struct {
288
281
  struct {
289
282
  ggml_half d; // super-block scale for quantized scales
290
283
  ggml_half dmin; // super-block scale for quantized mins
291
- } GGML_COMMON_AGGR;
284
+ } GGML_COMMON_AGGR_S;
292
285
  ggml_half2 dm;
293
- };
286
+ } GGML_COMMON_AGGR_U;
294
287
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
288
  uint8_t qs[QK_K/2]; // 4--bit quants
296
289
  } block_q4_K;
@@ -305,9 +298,9 @@ typedef struct {
305
298
  struct {
306
299
  ggml_half d; // super-block scale for quantized scales
307
300
  ggml_half dmin; // super-block scale for quantized mins
308
- } GGML_COMMON_AGGR;
301
+ } GGML_COMMON_AGGR_S;
309
302
  ggml_half2 dm;
310
- };
303
+ } GGML_COMMON_AGGR_U;
311
304
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
305
  uint8_t qh[QK_K/8]; // quants, high bit
313
306
  uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -431,6 +424,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
431
424
  #define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
425
  #define GGML_TABLE_END() };
433
426
 
427
+ #define GGML_COMMON_IMPL
428
+ #elif defined(GGML_COMMON_IMPL_CPP)
429
+ #include <cstdint>
430
+
431
+ #define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
+ #define GGML_TABLE_END() };
433
+
434
434
  #define GGML_COMMON_IMPL
435
435
  #elif defined(GGML_COMMON_IMPL_METAL)
436
436
  #include <metal_stdlib>
@@ -473,7 +473,7 @@ GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
473
473
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
474
474
  GGML_TABLE_END()
475
475
 
476
- //#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
476
+ //#if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
477
477
  GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
478
478
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
479
479
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,