toy 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2107) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +1124 -0
  3. data/LICENSE +21 -0
  4. data/Makefile +2022 -0
  5. data/README.md +154 -0
  6. data/bin/toy +10 -0
  7. data/lib/toy/compute.rb +135 -0
  8. data/lib/toy/compute_cuda.rb +104 -0
  9. data/lib/toy/compute_metal.rb +97 -0
  10. data/lib/toy/core/cli/describe.rb +188 -0
  11. data/lib/toy/core/cli/eval.rb +385 -0
  12. data/lib/toy/core/cli/exit_codes.rb +15 -0
  13. data/lib/toy/core/cli/fetch.rb +238 -0
  14. data/lib/toy/core/cli/infer.rb +268 -0
  15. data/lib/toy/core/cli/install.rb +228 -0
  16. data/lib/toy/core/cli/list.rb +86 -0
  17. data/lib/toy/core/cli/manifest.rb +49 -0
  18. data/lib/toy/core/cli/new.rb +594 -0
  19. data/lib/toy/core/cli/serve.rb +237 -0
  20. data/lib/toy/core/cli/train.rb +471 -0
  21. data/lib/toy/core/cli.rb +165 -0
  22. data/lib/toy/core/config.rb +64 -0
  23. data/lib/toy/core/gguf_meta.rb +161 -0
  24. data/lib/toy/core/model_scan.rb +221 -0
  25. data/lib/toy/core/run_log.rb +94 -0
  26. data/lib/toy/core/toy_root.rb +95 -0
  27. data/lib/toy/dev/toy_card.rb +299 -0
  28. data/lib/toy/dev/toy_describe_flow.rb +412 -0
  29. data/lib/toy/dev/toy_logprobs.rb +86 -0
  30. data/lib/toy/dev/toy_tap.rb +183 -0
  31. data/lib/toy/dev/toy_token_drift.rb +121 -0
  32. data/lib/toy/ffi/tinynn.rb +1491 -0
  33. data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
  34. data/lib/toy/ffi/tinynn_metal.rb +359 -0
  35. data/lib/toy/ffi_manifest.rb +84 -0
  36. data/lib/toy/io/bpe.rb +325 -0
  37. data/lib/toy/io/gguf_kv.rb +35 -0
  38. data/lib/toy/io/gguf_load.rb +331 -0
  39. data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
  40. data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
  41. data/lib/toy/io/model_index.rb +206 -0
  42. data/lib/toy/io/run_bundle.rb +280 -0
  43. data/lib/toy/io/tokenizer.rb +613 -0
  44. data/lib/toy/io/toy_corpus_loader.rb +52 -0
  45. data/lib/toy/io/toy_events.rb +56 -0
  46. data/lib/toy/io/toy_image_loader.rb +48 -0
  47. data/lib/toy/llm/adamw.rb +169 -0
  48. data/lib/toy/llm/archs/llama_arch.rb +233 -0
  49. data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
  50. data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
  51. data/lib/toy/llm/blocks/transformer_block.rb +876 -0
  52. data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
  53. data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
  54. data/lib/toy/llm/classify_batch.rb +88 -0
  55. data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
  56. data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
  57. data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
  58. data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
  59. data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
  60. data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
  61. data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
  62. data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
  63. data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
  64. data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
  65. data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
  66. data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
  67. data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
  68. data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
  69. data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
  70. data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
  71. data/lib/toy/llm/labels.rb +142 -0
  72. data/lib/toy/llm/primitives/gqa.rb +62 -0
  73. data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
  74. data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
  75. data/lib/toy/llm/primitives/rms_norm.rb +39 -0
  76. data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
  77. data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
  78. data/lib/toy/llm/primitives/rope.rb +68 -0
  79. data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
  80. data/lib/toy/llm/primitives/rope_metal.rb +72 -0
  81. data/lib/toy/llm/primitives/swiglu.rb +41 -0
  82. data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
  83. data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
  84. data/lib/toy/llm/recipe_options.rb +71 -0
  85. data/lib/toy/llm/recipes/from_scratch.rb +105 -0
  86. data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
  87. data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
  88. data/lib/toy/llm/recipes/lora.rb +110 -0
  89. data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
  90. data/lib/toy/llm/recipes/lora_metal.rb +114 -0
  91. data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
  92. data/lib/toy/llm/recipes/warm_start.rb +235 -0
  93. data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
  94. data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
  95. data/lib/toy/llm/training_batch.rb +133 -0
  96. data/lib/toy/models/arch.rb +253 -0
  97. data/lib/toy/models/gpt2.rb +311 -0
  98. data/lib/toy/models/toy_gpt2.rb +177 -0
  99. data/lib/toy/models/toy_smollm2.rb +393 -0
  100. data/lib/toy/models/toy_vit.rb +83 -0
  101. data/lib/toy/models/transformer.rb +1494 -0
  102. data/lib/toy/models/transformer_lm.rb +298 -0
  103. data/lib/toy/models/transformer_lm_cuda.rb +159 -0
  104. data/lib/toy/models/transformer_lm_metal.rb +142 -0
  105. data/lib/toy/mri.rb +300 -0
  106. data/lib/toy/run/eval.rb +76 -0
  107. data/lib/toy/run/eval_cuda.rb +66 -0
  108. data/lib/toy/run/eval_lmc.rb +334 -0
  109. data/lib/toy/run/eval_metal.rb +67 -0
  110. data/lib/toy/run/infer.rb +130 -0
  111. data/lib/toy/run/infer_cuda.rb +118 -0
  112. data/lib/toy/run/infer_metal.rb +119 -0
  113. data/lib/toy/run/infer_trace.rb +37 -0
  114. data/lib/toy/run/serve.rb +144 -0
  115. data/lib/toy/run/train.rb +404 -0
  116. data/lib/toy/run/train_cuda.rb +397 -0
  117. data/lib/toy/run/train_gpt2.rb +103 -0
  118. data/lib/toy/run/train_gpt2_cuda.rb +85 -0
  119. data/lib/toy/run/train_gpt2_metal.rb +85 -0
  120. data/lib/toy/run/train_lora.rb +207 -0
  121. data/lib/toy/run/train_lora_cuda.rb +219 -0
  122. data/lib/toy/run/train_metal.rb +227 -0
  123. data/lib/toy/run/train_vit.rb +251 -0
  124. data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
  125. data/lib/toy/serve/openai/handlers.rb +143 -0
  126. data/lib/toy/serve/openai/server.rb +159 -0
  127. data/lib/toy/train/sampler.rb +314 -0
  128. data/lib/toy/train/toy_chat_template.rb +179 -0
  129. data/lib/toy/train/toy_drift_grad.rb +176 -0
  130. data/lib/toy/train/toy_gguf_fuse.rb +428 -0
  131. data/lib/toy/train/toy_gguf_writer.rb +100 -0
  132. data/lib/toy/train/toy_lr_schedule.rb +39 -0
  133. data/lib/toy/train/toy_sample.rb +125 -0
  134. data/lib/toy/train/toy_trainer.rb +86 -0
  135. data/lib/toy/train/training.rb +160 -0
  136. data/lib/toy/version.rb +11 -0
  137. data/lib/toy.rb +902 -0
  138. data/prep/progress +118 -0
  139. data/prep/quietly +64 -0
  140. data/sig/toy.rbs +397 -0
  141. data/sig/toy_compute.rbs +450 -0
  142. data/spinel-ext.json +122 -0
  143. data/tinynn/Makefile +71 -0
  144. data/tinynn/tinynn_backend_cuda.c +99 -0
  145. data/tinynn/tinynn_backend_metal.m +75 -0
  146. data/tinynn/tinynn_events.c +122 -0
  147. data/tinynn/tinynn_events.h +83 -0
  148. data/tinynn/tinynn_ggml.c +2460 -0
  149. data/tinynn/tinynn_ggml.h +545 -0
  150. data/tinynn/tinynn_gguf.c +783 -0
  151. data/tinynn/tinynn_gguf.h +167 -0
  152. data/tinynn/tinynn_trace.c +180 -0
  153. data/tinynn/tinynn_trace.h +85 -0
  154. data/vendor/ggml/AUTHORS +335 -0
  155. data/vendor/ggml/CMakeLists.txt +505 -0
  156. data/vendor/ggml/CONTRIBUTING.md +3 -0
  157. data/vendor/ggml/LICENSE +21 -0
  158. data/vendor/ggml/README.md +50 -0
  159. data/vendor/ggml/ci/run.sh +395 -0
  160. data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
  161. data/vendor/ggml/cmake/GitVars.cmake +22 -0
  162. data/vendor/ggml/cmake/common.cmake +50 -0
  163. data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
  164. data/vendor/ggml/docs/gguf.md +828 -0
  165. data/vendor/ggml/examples/CMakeLists.txt +34 -0
  166. data/vendor/ggml/examples/common-ggml.cpp +244 -0
  167. data/vendor/ggml/examples/common-ggml.h +18 -0
  168. data/vendor/ggml/examples/common.cpp +675 -0
  169. data/vendor/ggml/examples/common.h +322 -0
  170. data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
  171. data/vendor/ggml/examples/gpt-2/README.md +225 -0
  172. data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
  173. data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
  174. data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
  175. data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
  176. data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
  177. data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
  178. data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
  179. data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
  180. data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
  181. data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
  182. data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
  183. data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
  184. data/vendor/ggml/examples/gpt-j/README.md +239 -0
  185. data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
  186. data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
  187. data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
  188. data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
  189. data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
  190. data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
  191. data/vendor/ggml/examples/magika/README.md +23 -0
  192. data/vendor/ggml/examples/magika/convert.py +32 -0
  193. data/vendor/ggml/examples/magika/main.cpp +374 -0
  194. data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
  195. data/vendor/ggml/examples/mnist/README.md +206 -0
  196. data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
  197. data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
  198. data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
  199. data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
  200. data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
  201. data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
  202. data/vendor/ggml/examples/mnist/server.py +36 -0
  203. data/vendor/ggml/examples/mnist/web/index.html +178 -0
  204. data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
  205. data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
  206. data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
  207. data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
  208. data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
  209. data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
  210. data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
  211. data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
  212. data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
  213. data/vendor/ggml/examples/prompts/replit.txt +100 -0
  214. data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
  215. data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
  216. data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
  217. data/vendor/ggml/examples/prompts/whisper.txt +100 -0
  218. data/vendor/ggml/examples/python/README.md +115 -0
  219. data/vendor/ggml/examples/python/api.h +14 -0
  220. data/vendor/ggml/examples/python/example_add_quant.py +25 -0
  221. data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
  222. data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
  223. data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
  224. data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
  225. data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
  226. data/vendor/ggml/examples/python/ggml/utils.py +182 -0
  227. data/vendor/ggml/examples/python/regenerate.py +42 -0
  228. data/vendor/ggml/examples/python/stubs.py +128 -0
  229. data/vendor/ggml/examples/python/test_tensor.py +258 -0
  230. data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
  231. data/vendor/ggml/examples/sam/README.md +95 -0
  232. data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
  233. data/vendor/ggml/examples/sam/example.jpg +0 -0
  234. data/vendor/ggml/examples/sam/sam.cpp +2370 -0
  235. data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
  236. data/vendor/ggml/examples/simple/README.md +61 -0
  237. data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
  238. data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
  239. data/vendor/ggml/examples/stb_image.h +7987 -0
  240. data/vendor/ggml/examples/stb_image_write.h +1724 -0
  241. data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
  242. data/vendor/ggml/examples/test-cmake/README.md +3 -0
  243. data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
  244. data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
  245. data/vendor/ggml/examples/yolo/README.md +59 -0
  246. data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
  247. data/vendor/ggml/examples/yolo/data/coco.names +80 -0
  248. data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
  249. data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
  250. data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
  251. data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
  252. data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
  253. data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
  254. data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
  255. data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
  256. data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
  257. data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
  258. data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
  259. data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
  260. data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
  261. data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
  262. data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
  263. data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
  264. data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
  265. data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
  266. data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
  267. data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
  268. data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
  269. data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
  270. data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
  271. data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
  272. data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
  273. data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
  274. data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
  275. data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
  276. data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
  277. data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
  278. data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
  279. data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
  280. data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
  281. data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
  282. data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
  283. data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
  284. data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
  285. data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
  286. data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
  287. data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
  288. data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
  289. data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
  290. data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
  291. data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
  292. data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
  293. data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
  294. data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
  295. data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
  296. data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
  297. data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
  298. data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
  299. data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
  300. data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
  301. data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
  302. data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
  303. data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
  304. data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
  305. data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
  306. data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
  307. data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
  308. data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
  309. data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
  310. data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
  311. data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
  312. data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
  313. data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
  314. data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
  315. data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
  316. data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
  317. data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
  318. data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
  319. data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
  320. data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
  321. data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
  322. data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
  323. data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
  324. data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
  325. data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
  326. data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
  327. data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
  328. data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
  329. data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
  330. data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
  331. data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
  332. data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
  333. data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
  334. data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
  335. data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
  336. data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
  337. data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
  338. data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
  339. data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
  340. data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
  341. data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
  342. data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
  343. data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
  344. data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
  345. data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
  346. data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
  347. data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
  348. data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
  349. data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
  350. data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
  351. data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
  352. data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
  353. data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
  354. data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
  355. data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
  356. data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
  357. data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
  358. data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
  359. data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
  360. data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
  361. data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
  362. data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
  363. data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
  364. data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
  365. data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
  366. data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
  367. data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
  368. data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
  369. data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
  370. data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
  371. data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
  372. data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
  373. data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
  374. data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
  375. data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
  376. data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
  377. data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
  378. data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
  379. data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
  380. data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
  381. data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
  382. data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
  383. data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
  384. data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
  385. data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
  386. data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
  387. data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
  388. data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
  389. data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
  390. data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
  391. data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
  392. data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
  393. data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
  394. data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
  395. data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
  396. data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
  397. data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
  398. data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
  399. data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
  400. data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
  401. data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
  402. data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
  403. data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
  404. data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
  405. data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
  406. data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
  407. data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
  408. data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
  409. data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
  410. data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
  411. data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
  412. data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
  413. data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
  414. data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
  415. data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
  416. data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
  417. data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
  418. data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
  419. data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
  420. data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
  421. data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
  422. data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
  423. data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
  424. data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
  425. data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
  426. data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
  427. data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
  428. data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
  429. data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
  430. data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
  431. data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
  432. data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
  433. data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
  434. data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
  435. data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
  436. data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
  437. data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
  438. data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
  439. data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
  440. data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
  441. data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
  442. data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
  443. data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
  444. data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
  445. data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
  446. data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
  447. data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
  448. data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
  449. data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
  450. data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
  451. data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
  452. data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
  453. data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
  454. data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
  455. data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
  456. data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
  457. data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
  458. data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
  459. data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
  460. data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
  461. data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
  462. data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
  463. data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
  464. data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
  465. data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
  466. data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
  467. data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
  468. data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
  469. data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
  470. data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
  471. data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
  472. data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
  473. data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
  474. data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
  475. data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
  476. data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
  477. data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
  478. data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
  479. data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
  480. data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
  481. data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
  482. data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
  483. data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
  484. data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
  485. data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
  486. data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
  487. data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
  488. data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
  489. data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
  490. data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
  491. data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
  492. data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
  493. data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
  494. data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
  495. data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
  496. data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
  497. data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
  498. data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
  499. data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
  500. data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
  501. data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
  502. data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
  503. data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
  504. data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
  505. data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
  506. data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
  507. data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
  508. data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
  509. data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
  510. data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
  511. data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
  512. data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
  513. data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
  514. data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
  515. data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
  516. data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
  517. data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
  518. data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
  519. data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
  520. data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
  521. data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
  522. data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
  523. data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
  524. data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
  525. data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
  526. data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
  527. data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
  528. data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
  529. data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
  530. data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
  531. data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
  532. data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
  533. data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
  534. data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
  535. data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
  536. data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
  537. data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
  538. data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
  539. data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
  540. data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
  541. data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
  542. data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
  543. data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
  544. data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
  545. data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
  546. data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
  547. data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
  548. data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
  549. data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
  550. data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
  551. data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
  552. data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
  553. data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
  554. data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
  555. data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
  556. data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
  557. data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
  558. data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
  559. data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
  560. data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
  561. data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
  562. data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
  563. data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
  564. data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
  565. data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
  566. data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
  567. data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
  568. data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
  569. data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
  570. data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
  571. data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
  572. data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
  573. data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
  574. data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
  575. data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
  576. data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
  577. data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
  578. data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
  579. data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
  580. data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
  581. data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
  582. data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
  583. data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
  584. data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
  585. data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
  586. data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
  587. data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
  588. data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
  589. data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
  590. data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
  591. data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
  592. data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
  593. data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
  594. data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
  595. data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
  596. data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
  597. data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
  598. data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
  599. data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
  600. data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
  601. data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
  602. data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
  603. data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
  604. data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
  605. data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
  606. data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
  607. data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
  608. data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
  609. data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
  610. data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
  611. data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
  612. data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
  613. data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
  614. data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
  615. data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
  616. data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
  617. data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
  618. data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
  619. data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
  620. data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
  621. data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
  622. data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
  623. data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
  624. data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
  625. data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
  626. data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
  627. data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
  628. data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
  629. data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
  630. data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
  631. data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
  632. data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
  633. data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
  634. data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
  635. data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
  636. data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
  637. data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
  638. data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
  639. data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
  640. data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
  641. data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
  642. data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
  643. data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
  644. data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
  645. data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
  646. data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
  647. data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
  648. data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
  649. data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
  650. data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
  651. data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
  652. data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
  653. data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
  654. data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
  655. data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
  656. data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
  657. data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
  658. data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
  659. data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
  660. data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
  661. data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
  662. data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
  663. data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
  664. data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
  665. data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
  666. data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
  667. data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
  668. data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
  669. data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
  670. data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
  671. data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
  672. data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
  673. data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
  674. data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
  675. data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
  676. data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
  677. data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
  678. data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
  679. data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
  680. data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
  681. data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
  682. data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
  683. data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
  684. data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
  685. data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
  686. data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
  687. data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
  688. data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
  689. data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
  690. data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
  691. data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
  692. data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
  693. data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
  694. data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
  695. data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
  696. data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
  697. data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
  698. data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
  699. data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
  700. data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
  701. data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
  702. data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
  703. data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
  704. data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
  705. data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
  706. data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
  707. data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
  708. data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
  709. data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
  710. data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
  711. data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
  712. data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
  713. data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
  714. data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
  715. data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
  716. data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
  717. data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
  718. data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
  719. data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
  720. data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
  721. data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
  722. data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
  723. data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
  724. data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
  725. data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
  726. data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
  727. data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
  728. data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
  729. data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
  730. data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
  731. data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
  732. data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
  733. data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
  734. data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
  735. data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
  736. data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
  737. data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
  738. data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
  739. data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
  740. data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
  741. data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
  742. data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
  743. data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
  744. data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
  745. data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
  746. data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
  747. data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
  748. data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
  749. data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
  750. data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
  751. data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
  752. data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
  753. data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
  754. data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
  755. data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
  756. data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
  757. data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
  758. data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
  759. data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
  760. data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
  761. data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
  762. data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
  763. data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
  764. data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
  765. data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
  766. data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
  767. data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
  768. data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
  769. data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
  770. data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
  771. data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
  772. data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
  773. data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
  774. data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
  775. data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
  776. data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
  777. data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
  778. data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
  779. data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
  780. data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
  781. data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
  782. data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
  783. data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
  784. data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
  785. data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
  786. data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
  787. data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
  788. data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
  789. data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
  790. data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
  791. data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
  792. data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
  793. data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
  794. data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
  795. data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
  796. data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
  797. data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
  798. data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
  799. data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
  800. data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
  801. data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
  802. data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
  803. data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
  804. data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
  805. data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
  806. data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
  807. data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
  808. data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
  809. data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
  810. data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
  811. data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
  812. data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
  813. data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
  814. data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
  815. data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
  816. data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
  817. data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
  818. data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
  819. data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
  820. data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
  821. data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
  822. data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
  823. data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
  824. data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
  825. data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
  826. data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
  827. data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
  828. data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
  829. data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
  830. data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
  831. data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
  832. data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
  833. data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
  834. data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
  835. data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
  836. data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
  837. data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
  838. data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
  839. data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
  840. data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
  841. data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
  842. data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
  843. data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
  844. data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
  845. data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
  846. data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
  847. data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
  848. data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
  849. data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
  850. data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
  851. data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
  852. data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
  853. data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
  854. data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
  855. data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
  856. data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
  857. data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
  858. data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
  859. data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
  860. data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
  861. data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
  862. data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
  863. data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
  864. data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
  865. data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
  866. data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
  867. data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
  868. data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
  869. data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
  870. data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
  871. data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
  872. data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
  873. data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
  874. data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
  875. data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
  876. data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
  877. data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
  878. data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
  879. data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
  880. data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
  881. data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
  882. data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
  883. data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
  884. data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
  885. data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
  886. data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
  887. data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
  888. data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
  889. data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
  890. data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
  891. data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
  892. data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
  893. data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
  894. data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
  895. data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
  896. data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
  897. data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
  898. data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
  899. data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
  900. data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
  901. data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
  902. data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
  903. data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
  904. data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
  905. data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
  906. data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
  907. data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
  908. data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
  909. data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
  910. data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
  911. data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
  912. data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
  913. data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
  914. data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
  915. data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
  916. data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
  917. data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
  918. data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
  919. data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
  920. data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
  921. data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
  922. data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
  923. data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
  924. data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
  925. data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
  926. data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
  927. data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
  928. data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
  929. data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
  930. data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
  931. data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
  932. data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
  933. data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
  934. data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
  935. data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
  936. data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
  937. data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
  938. data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
  939. data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
  940. data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
  941. data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
  942. data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
  943. data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
  944. data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
  945. data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
  946. data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
  947. data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
  948. data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
  949. data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
  950. data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
  951. data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
  952. data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
  953. data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
  954. data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
  955. data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
  956. data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
  957. data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
  958. data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
  959. data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
  960. data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
  961. data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
  962. data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
  963. data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
  964. data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
  965. data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
  966. data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
  967. data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
  968. data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
  969. data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
  970. data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
  971. data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
  972. data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
  973. data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
  974. data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
  975. data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
  976. data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
  977. data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
  978. data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
  979. data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
  980. data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
  981. data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
  982. data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
  983. data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
  984. data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
  985. data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
  986. data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
  987. data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
  988. data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
  989. data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
  990. data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
  991. data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
  992. data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
  993. data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
  994. data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
  995. data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
  996. data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
  997. data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
  998. data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
  999. data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
  1000. data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
  1001. data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
  1002. data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
  1003. data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
  1004. data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
  1005. data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
  1006. data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
  1007. data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
  1008. data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
  1009. data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
  1010. data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
  1011. data/vendor/ggml/ggml.pc.in +10 -0
  1012. data/vendor/ggml/include/ggml-alloc.h +85 -0
  1013. data/vendor/ggml/include/ggml-backend.h +431 -0
  1014. data/vendor/ggml/include/ggml-blas.h +25 -0
  1015. data/vendor/ggml/include/ggml-cann.h +123 -0
  1016. data/vendor/ggml/include/ggml-cpp.h +39 -0
  1017. data/vendor/ggml/include/ggml-cpu.h +151 -0
  1018. data/vendor/ggml/include/ggml-cuda.h +50 -0
  1019. data/vendor/ggml/include/ggml-hexagon.h +19 -0
  1020. data/vendor/ggml/include/ggml-metal.h +61 -0
  1021. data/vendor/ggml/include/ggml-opencl.h +26 -0
  1022. data/vendor/ggml/include/ggml-openvino.h +37 -0
  1023. data/vendor/ggml/include/ggml-opt.h +256 -0
  1024. data/vendor/ggml/include/ggml-rpc.h +35 -0
  1025. data/vendor/ggml/include/ggml-sycl.h +49 -0
  1026. data/vendor/ggml/include/ggml-virtgpu.h +14 -0
  1027. data/vendor/ggml/include/ggml-vulkan.h +29 -0
  1028. data/vendor/ggml/include/ggml-webgpu.h +19 -0
  1029. data/vendor/ggml/include/ggml-zdnn.h +17 -0
  1030. data/vendor/ggml/include/ggml-zendnn.h +22 -0
  1031. data/vendor/ggml/include/ggml.h +2845 -0
  1032. data/vendor/ggml/include/gguf.h +204 -0
  1033. data/vendor/ggml/requirements.txt +12 -0
  1034. data/vendor/ggml/scripts/gen-authors.sh +9 -0
  1035. data/vendor/ggml/scripts/release.sh +296 -0
  1036. data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
  1037. data/vendor/ggml/scripts/sync-llama.last +1 -0
  1038. data/vendor/ggml/scripts/sync-llama.sh +21 -0
  1039. data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
  1040. data/vendor/ggml/scripts/sync-whisper.last +1 -0
  1041. data/vendor/ggml/scripts/sync-whisper.sh +17 -0
  1042. data/vendor/ggml/src/CMakeLists.txt +493 -0
  1043. data/vendor/ggml/src/ggml-alloc.c +1248 -0
  1044. data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
  1045. data/vendor/ggml/src/ggml-backend-dl.h +45 -0
  1046. data/vendor/ggml/src/ggml-backend-impl.h +275 -0
  1047. data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
  1048. data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
  1049. data/vendor/ggml/src/ggml-backend.cpp +2371 -0
  1050. data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  1051. data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
  1052. data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
  1053. data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
  1054. data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
  1055. data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
  1056. data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
  1057. data/vendor/ggml/src/ggml-cann/common.h +651 -0
  1058. data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
  1059. data/vendor/ggml/src/ggml-common.h +1900 -0
  1060. data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
  1061. data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
  1062. data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
  1063. data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
  1064. data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  1065. data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  1066. data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  1067. data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
  1068. data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
  1069. data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
  1070. data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  1071. data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
  1072. data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  1073. data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
  1074. data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
  1075. data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  1076. data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
  1077. data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
  1078. data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  1079. data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
  1080. data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
  1081. data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
  1082. data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  1083. data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
  1084. data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  1085. data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
  1086. data/vendor/ggml/src/ggml-cpu/common.h +95 -0
  1087. data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
  1088. data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
  1089. data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
  1090. data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
  1091. data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
  1092. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
  1093. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  1094. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
  1095. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  1096. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
  1097. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  1098. data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
  1099. data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
  1100. data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
  1101. data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
  1102. data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
  1103. data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
  1104. data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
  1105. data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
  1106. data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
  1107. data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
  1108. data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
  1109. data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
  1110. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
  1111. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
  1112. data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
  1113. data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
  1114. data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
  1115. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
  1116. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
  1117. data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
  1118. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
  1119. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
  1120. data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
  1121. data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
  1122. data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
  1123. data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  1124. data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
  1125. data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
  1126. data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
  1127. data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
  1128. data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
  1129. data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
  1130. data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
  1131. data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
  1132. data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
  1133. data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
  1134. data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
  1135. data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
  1136. data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
  1137. data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
  1138. data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
  1139. data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
  1140. data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
  1141. data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
  1142. data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
  1143. data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
  1144. data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
  1145. data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
  1146. data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
  1147. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
  1148. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  1149. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  1150. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  1151. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
  1152. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
  1153. data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
  1154. data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  1155. data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
  1156. data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
  1157. data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
  1158. data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  1159. data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  1160. data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  1161. data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
  1162. data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
  1163. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
  1164. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  1165. data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
  1166. data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  1167. data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
  1168. data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
  1169. data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
  1170. data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
  1171. data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  1172. data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
  1173. data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
  1174. data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
  1175. data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
  1176. data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
  1177. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
  1178. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
  1179. data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
  1180. data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
  1181. data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
  1182. data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
  1183. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
  1184. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  1185. data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
  1186. data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
  1187. data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
  1188. data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
  1189. data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
  1190. data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
  1191. data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
  1192. data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
  1193. data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
  1194. data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
  1195. data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
  1196. data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
  1197. data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
  1198. data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
  1199. data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
  1200. data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
  1201. data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
  1202. data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
  1203. data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
  1204. data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
  1205. data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
  1206. data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
  1207. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  1208. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  1209. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  1210. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  1211. data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
  1212. data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  1213. data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
  1214. data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
  1215. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  1216. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  1217. data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
  1218. data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  1219. data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
  1220. data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
  1221. data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
  1222. data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
  1223. data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
  1224. data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
  1225. data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
  1226. data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
  1227. data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
  1228. data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
  1229. data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  1230. data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
  1231. data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
  1232. data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
  1233. data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
  1234. data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
  1235. data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
  1236. data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
  1237. data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
  1238. data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  1239. data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  1240. data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
  1241. data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  1242. data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
  1243. data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  1244. data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
  1245. data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
  1246. data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
  1247. data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  1248. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
  1249. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
  1250. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
  1251. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  1252. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  1253. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
  1254. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
  1255. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
  1256. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
  1257. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
  1258. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  1259. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  1260. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
  1261. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  1262. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
  1263. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
  1264. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  1265. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  1266. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  1267. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
  1268. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
  1269. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  1270. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  1271. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
  1272. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  1273. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
  1274. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  1275. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
  1276. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  1277. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  1278. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  1279. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  1280. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  1281. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
  1282. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
  1283. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
  1284. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
  1285. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
  1286. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
  1287. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
  1288. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
  1289. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  1290. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  1291. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  1292. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  1293. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  1294. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  1295. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
  1296. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  1297. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  1298. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  1299. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  1300. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  1301. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  1302. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
  1303. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  1304. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  1305. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  1306. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  1307. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  1308. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  1309. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
  1310. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  1311. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  1312. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  1313. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  1314. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  1315. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  1316. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
  1317. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  1318. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  1319. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  1320. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  1321. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  1322. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  1323. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
  1324. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  1325. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  1326. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  1327. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  1328. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  1329. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  1330. data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
  1331. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  1332. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  1333. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  1334. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  1335. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  1336. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  1337. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  1338. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  1339. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  1340. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  1341. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  1342. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  1343. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  1344. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  1345. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  1346. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  1347. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  1348. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  1349. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  1350. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  1351. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  1352. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  1353. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  1354. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  1355. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  1356. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
  1357. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
  1358. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  1359. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  1360. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  1361. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  1362. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  1363. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  1364. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  1365. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  1366. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  1367. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  1368. data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
  1369. data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
  1370. data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
  1371. data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
  1372. data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
  1373. data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
  1374. data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
  1375. data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  1376. data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
  1377. data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
  1378. data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
  1379. data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
  1380. data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
  1381. data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
  1382. data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
  1383. data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
  1384. data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
  1385. data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
  1386. data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
  1387. data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
  1388. data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
  1389. data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
  1390. data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
  1391. data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
  1392. data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  1393. data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
  1394. data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
  1395. data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
  1396. data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
  1397. data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
  1398. data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
  1399. data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
  1400. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  1401. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
  1402. data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
  1403. data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  1404. data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
  1405. data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
  1406. data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
  1407. data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
  1408. data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
  1409. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
  1410. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
  1411. data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
  1412. data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
  1413. data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
  1414. data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
  1415. data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  1416. data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
  1417. data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
  1418. data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
  1419. data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  1420. data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
  1421. data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  1422. data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  1423. data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  1424. data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
  1425. data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  1426. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
  1427. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  1428. data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  1429. data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
  1430. data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
  1431. data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
  1432. data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
  1433. data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
  1434. data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
  1435. data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
  1436. data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
  1437. data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
  1438. data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  1439. data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
  1440. data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
  1441. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  1442. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  1443. data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  1444. data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  1445. data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
  1446. data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
  1447. data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
  1448. data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
  1449. data/vendor/ggml/src/ggml-impl.h +783 -0
  1450. data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  1451. data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  1452. data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  1453. data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  1454. data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
  1455. data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
  1456. data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
  1457. data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
  1458. data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
  1459. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
  1460. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
  1461. data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
  1462. data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
  1463. data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
  1464. data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
  1465. data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
  1466. data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
  1467. data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
  1468. data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  1469. data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  1470. data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  1471. data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  1472. data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  1473. data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  1474. data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  1475. data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
  1476. data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  1477. data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
  1478. data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  1479. data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  1480. data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  1481. data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  1482. data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  1483. data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  1484. data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  1485. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  1486. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  1487. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  1488. data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  1489. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  1490. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
  1491. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
  1492. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
  1493. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
  1494. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
  1495. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
  1496. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
  1497. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  1498. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
  1499. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
  1500. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
  1501. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
  1502. data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
  1503. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  1504. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
  1505. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
  1506. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
  1507. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
  1508. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
  1509. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
  1510. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
  1511. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
  1512. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  1513. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
  1514. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
  1515. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
  1516. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
  1517. data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  1518. data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  1519. data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  1520. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  1521. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  1522. data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  1523. data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  1524. data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
  1525. data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
  1526. data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  1527. data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  1528. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  1529. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  1530. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  1531. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
  1532. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  1533. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  1534. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
  1535. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
  1536. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  1537. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  1538. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  1539. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  1540. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  1541. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  1542. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  1543. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  1544. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  1545. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  1546. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  1547. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  1548. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
  1549. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
  1550. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  1551. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  1552. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  1553. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  1554. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  1555. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  1556. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  1557. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  1558. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  1559. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  1560. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
  1561. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
  1562. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
  1563. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  1564. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  1565. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  1566. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  1567. data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  1568. data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  1569. data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  1570. data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  1571. data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  1572. data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  1573. data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  1574. data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  1575. data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  1576. data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  1577. data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  1578. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  1579. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  1580. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  1581. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  1582. data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  1583. data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  1584. data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  1585. data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  1586. data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  1587. data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  1588. data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  1589. data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  1590. data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
  1591. data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  1592. data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  1593. data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  1594. data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  1595. data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
  1596. data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  1597. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
  1598. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  1599. data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
  1600. data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
  1601. data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  1602. data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  1603. data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  1604. data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  1605. data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  1606. data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  1607. data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  1608. data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  1609. data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  1610. data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  1611. data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  1612. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  1613. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  1614. data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  1615. data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  1616. data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  1617. data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  1618. data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
  1619. data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  1620. data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  1621. data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  1622. data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  1623. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
  1624. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  1625. data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  1626. data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
  1627. data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
  1628. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  1629. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  1630. data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  1631. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  1632. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  1633. data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
  1634. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
  1635. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  1636. data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
  1637. data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
  1638. data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
  1639. data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
  1640. data/vendor/ggml/src/ggml-opt.cpp +1094 -0
  1641. data/vendor/ggml/src/ggml-quants.c +5491 -0
  1642. data/vendor/ggml/src/ggml-quants.h +112 -0
  1643. data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
  1644. data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
  1645. data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
  1646. data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
  1647. data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
  1648. data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
  1649. data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
  1650. data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
  1651. data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
  1652. data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  1653. data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
  1654. data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
  1655. data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
  1656. data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
  1657. data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
  1658. data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
  1659. data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
  1660. data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
  1661. data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  1662. data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  1663. data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
  1664. data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
  1665. data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
  1666. data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
  1667. data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
  1668. data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
  1669. data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
  1670. data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
  1671. data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  1672. data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
  1673. data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
  1674. data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
  1675. data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
  1676. data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
  1677. data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
  1678. data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
  1679. data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
  1680. data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
  1681. data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
  1682. data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
  1683. data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
  1684. data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
  1685. data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
  1686. data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
  1687. data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
  1688. data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
  1689. data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
  1690. data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
  1691. data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
  1692. data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
  1693. data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
  1694. data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
  1695. data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  1696. data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
  1697. data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
  1698. data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
  1699. data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
  1700. data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
  1701. data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
  1702. data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
  1703. data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
  1704. data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
  1705. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  1706. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  1707. data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
  1708. data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
  1709. data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
  1710. data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  1711. data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  1712. data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
  1713. data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
  1714. data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
  1715. data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
  1716. data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
  1717. data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
  1718. data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
  1719. data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  1720. data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
  1721. data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
  1722. data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
  1723. data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
  1724. data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
  1725. data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  1726. data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
  1727. data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
  1728. data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
  1729. data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
  1730. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  1731. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  1732. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  1733. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  1734. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
  1735. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  1736. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  1737. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  1738. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  1739. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  1740. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
  1741. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
  1742. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
  1743. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
  1744. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
  1745. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
  1746. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
  1747. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
  1748. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
  1749. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
  1750. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
  1751. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
  1752. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
  1753. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
  1754. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
  1755. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
  1756. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
  1757. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
  1758. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
  1759. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
  1760. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
  1761. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
  1762. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
  1763. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
  1764. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
  1765. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
  1766. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
  1767. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
  1768. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
  1769. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
  1770. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
  1771. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
  1772. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
  1773. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
  1774. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
  1775. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
  1776. data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  1777. data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  1778. data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
  1779. data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
  1780. data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
  1781. data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
  1782. data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
  1783. data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
  1784. data/vendor/ggml/src/ggml-threading.cpp +12 -0
  1785. data/vendor/ggml/src/ggml-threading.h +14 -0
  1786. data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  1787. data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  1788. data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  1789. data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  1790. data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  1791. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  1792. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  1793. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  1794. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  1795. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  1796. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  1797. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  1798. data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  1799. data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  1800. data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  1801. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  1802. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  1803. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  1804. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  1805. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  1806. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  1807. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
  1808. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
  1809. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  1810. data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
  1811. data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  1812. data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  1813. data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  1814. data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  1815. data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  1816. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  1817. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  1818. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  1819. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  1820. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  1821. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  1822. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
  1823. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  1824. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  1825. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  1826. data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
  1827. data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
  1828. data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
  1829. data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  1830. data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
  1831. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  1832. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  1833. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  1834. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  1835. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  1836. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  1837. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  1838. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  1839. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  1840. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  1841. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  1842. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  1843. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  1844. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  1845. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  1846. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  1847. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  1848. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  1849. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  1850. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
  1851. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  1852. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  1853. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  1854. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  1855. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  1856. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  1857. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  1858. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  1859. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
  1860. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
  1861. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  1862. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  1863. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  1864. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  1865. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  1866. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  1867. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  1868. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  1869. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  1870. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  1871. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  1872. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
  1873. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
  1874. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  1875. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  1876. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  1877. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  1878. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  1879. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  1880. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  1881. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  1882. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  1883. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  1884. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
  1885. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  1886. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  1887. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  1888. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
  1889. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  1890. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  1891. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  1892. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  1893. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  1894. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
  1895. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
  1896. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
  1897. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
  1898. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
  1899. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  1900. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
  1901. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  1902. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  1903. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
  1904. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  1905. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  1906. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  1907. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  1908. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  1909. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  1910. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
  1911. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  1912. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  1913. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  1914. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  1915. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
  1916. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
  1917. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  1918. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  1919. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  1920. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
  1921. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
  1922. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  1923. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  1924. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
  1925. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  1926. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  1927. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  1928. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  1929. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  1930. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  1931. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  1932. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  1933. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  1934. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  1935. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  1936. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  1937. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  1938. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  1939. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  1940. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  1941. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
  1942. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  1943. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  1944. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  1945. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
  1946. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  1947. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  1948. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
  1949. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  1950. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  1951. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  1952. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
  1953. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
  1954. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  1955. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  1956. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  1957. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  1958. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  1959. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  1960. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
  1961. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  1962. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  1963. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  1964. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  1965. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  1966. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  1967. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  1968. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  1969. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  1970. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
  1971. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  1972. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  1973. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  1974. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
  1975. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  1976. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  1977. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  1978. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  1979. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  1980. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  1981. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  1982. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  1983. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  1984. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  1985. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  1986. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  1987. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  1988. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  1989. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  1990. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  1991. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  1992. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  1993. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  1994. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  1995. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  1996. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  1997. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  1998. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  1999. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  2000. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  2001. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  2002. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  2003. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  2004. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  2005. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  2006. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
  2007. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  2008. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
  2009. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  2010. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  2011. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
  2012. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  2013. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  2014. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  2015. data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  2016. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
  2017. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
  2018. data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  2019. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
  2020. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  2021. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  2022. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  2023. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
  2024. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
  2025. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  2026. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
  2027. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
  2028. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  2029. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
  2030. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
  2031. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
  2032. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
  2033. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
  2034. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
  2035. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
  2036. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
  2037. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
  2038. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
  2039. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  2040. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
  2041. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
  2042. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
  2043. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
  2044. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
  2045. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
  2046. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
  2047. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
  2048. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
  2049. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  2050. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  2051. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
  2052. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
  2053. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
  2054. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  2055. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
  2056. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  2057. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
  2058. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
  2059. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
  2060. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
  2061. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  2062. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
  2063. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
  2064. data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  2065. data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
  2066. data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
  2067. data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  2068. data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  2069. data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
  2070. data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
  2071. data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  2072. data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
  2073. data/vendor/ggml/src/ggml.c +7777 -0
  2074. data/vendor/ggml/src/ggml.cpp +26 -0
  2075. data/vendor/ggml/src/gguf.cpp +1556 -0
  2076. data/vendor/ggml/tests/CMakeLists.txt +356 -0
  2077. data/vendor/ggml/tests/test-arange.cpp +100 -0
  2078. data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
  2079. data/vendor/ggml/tests/test-cont.c +170 -0
  2080. data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
  2081. data/vendor/ggml/tests/test-conv-transpose.c +248 -0
  2082. data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
  2083. data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
  2084. data/vendor/ggml/tests/test-conv1d.cpp +289 -0
  2085. data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
  2086. data/vendor/ggml/tests/test-conv2d.cpp +391 -0
  2087. data/vendor/ggml/tests/test-customop.c +300 -0
  2088. data/vendor/ggml/tests/test-dup.c +111 -0
  2089. data/vendor/ggml/tests/test-interpolate.cpp +166 -0
  2090. data/vendor/ggml/tests/test-opt.cpp +1003 -0
  2091. data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
  2092. data/vendor/ggml/tests/test-pool.c +274 -0
  2093. data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
  2094. data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
  2095. data/vendor/ggml/tests/test-rel-pos.c +87 -0
  2096. data/vendor/ggml/tests/test-roll.cpp +128 -0
  2097. data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
  2098. data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
  2099. data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
  2100. data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
  2101. data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
  2102. data/vendor-patches/0005-concat-backward.patch +36 -0
  2103. data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
  2104. data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
  2105. data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
  2106. data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
  2107. metadata +2161 -0
@@ -0,0 +1,3062 @@
1
+ /*
2
+ * Copyright (c) 2023-2026 The ggml authors
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to
6
+ * deal in the Software without restriction, including without limitation the
7
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
+ * sell copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
+ * IN THE SOFTWARE.
21
+ */
22
+
23
+ #include "ggml-cann.h"
24
+
25
+ #include "ggml-backend-impl.h"
26
+ #include "ggml-cann/aclnn_ops.h"
27
+ #include "ggml-cann/common.h"
28
+ #include "ggml-impl.h"
29
+ #include "ggml.h"
30
+
31
+ #include <acl/acl.h>
32
+ #include <aclnnop/aclnn_trans_matmul_weight.h>
33
+ #include <stdarg.h>
34
+
35
+ #include <chrono>
36
+ #include <cmath>
37
+ #include <cstdio>
38
+ #include <cstring>
39
+ #include <memory>
40
+ #include <mutex>
41
+ #include <optional>
42
+ #include <queue>
43
+ #include <unordered_map>
44
+ #include <unordered_set>
45
+ #include <vector>
46
+
47
+ #define GGML_COMMON_DECL_C
48
+
49
+ #include "ggml-common.h"
50
+
51
+ #define GGML_CANN_NAME "CANN"
52
+
53
+ /**
54
+ * @brief Handles CANN errors by printing an error message and aborting.
55
+ *
56
+ * @param stmt The statement that caused the error.
57
+ * @param func The function in which the error occurred.
58
+ * @param file The file in which the error occurred.
59
+ * @param line The line number where the error occurred.
60
+ * @param msg The error message.
61
+ */
62
+ [[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
63
+ int32_t id = -1;
64
+ aclrtGetDevice(&id);
65
+
66
+ GGML_LOG_ERROR("CANN error: %s\n", msg);
67
+ GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
68
+ GGML_LOG_ERROR(" %s\n", stmt);
69
+ // abort with GGML_ASSERT to get a stack trace
70
+ GGML_ABORT("CANN error");
71
+ }
72
+
73
+ // Thread-local variable to record the current device of this thread.
74
+ thread_local int g_current_cann_device = -1;
75
+
76
+ /**
77
+ * @brief Set the CANN device to be used.
78
+ *
79
+ * @param device The target device ID to set.
80
+ */
81
+ void ggml_cann_set_device(const int32_t device) {
82
+ // int current_device = -1;
83
+ // Note: In some CANN versions, if no device has been set yet,
84
+ // aclrtGetDevice(&current_device) may return 0 by default.
85
+ // aclrtGetDevice(&current_device);
86
+
87
+ // If the current device is already the target one, no need to switch.
88
+ if (device == g_current_cann_device) {
89
+ return;
90
+ }
91
+
92
+ // Switch to the new device.
93
+ ACL_CHECK(aclrtSetDevice(device));
94
+
95
+ // Update the global device record.
96
+ g_current_cann_device = device;
97
+ }
98
+
99
+ /**
100
+ * @brief Get the value of the specified environment variable (name) as lowercase.
101
+ * if not empty, return a std::string object
102
+ */
103
+ std::optional<std::string> get_env_as_lowercase(const std::string & name) {
104
+ const char * val = std::getenv(name.c_str());
105
+ if (!val) {
106
+ return std::nullopt;
107
+ }
108
+ std::string res = std::string(val);
109
+ std::transform(res.begin(), res.end(), res.begin(), ::tolower);
110
+ return res;
111
+ }
112
+
113
+ /**
114
+ * @brief Verify whether the environment variable is a valid value.
115
+ */
116
+ bool parse_bool(const std::string & value) {
117
+ static const std::unordered_set<std::string> valid_values = { "on", "1", "yes", "y", "enable", "true" };
118
+ return valid_values.find(value) != valid_values.end();
119
+ }
120
+
121
+ /**
122
+ * @brief Parse a string as an integer, returning 0 if invalid.
123
+ *
124
+ * This function attempts to convert the input string `value` to an `int`.
125
+ * If the string is not a valid integer or is out of the `int` range,
126
+ * it returns 0.
127
+ *
128
+ * @param value The string to parse.
129
+ * @return The parsed integer, or 0 if conversion fails.
130
+ */
131
+ int parse_integer(const std::string & value) {
132
+ try {
133
+ return std::stoi(value);
134
+ } catch (...) {
135
+ return 0;
136
+ }
137
+ }
138
+
139
+ /**
140
+ * @brief Initialize the CANN device information.
141
+ *
142
+ * This function initializes the CANN device information by obtaining the
143
+ * device count and setting the memory allocation granularity for each device.
144
+ *
145
+ * @return A structure containing the device information.
146
+ */
147
+ static ggml_cann_device_info ggml_cann_init() {
148
+ ggml_cann_device_info info = {};
149
+
150
+ aclError err = aclrtGetDeviceCount((uint32_t *) &info.device_count);
151
+
152
+ if (err != ACL_SUCCESS) {
153
+ GGML_LOG_ERROR("%s: failed to initialize CANN: %s\n", __func__, aclGetRecentErrMsg());
154
+ return info;
155
+ }
156
+
157
+ GGML_ASSERT(info.device_count <= GGML_CANN_MAX_DEVICES);
158
+
159
+ for (int id = 0; id < info.device_count; ++id) {
160
+ aclrtPhysicalMemProp prop = {};
161
+ prop.handleType = ACL_MEM_HANDLE_TYPE_NONE;
162
+ prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
163
+ prop.memAttr = ACL_HBM_MEM_HUGE;
164
+ prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
165
+ prop.location.id = id;
166
+ prop.reserve = 0;
167
+ err = aclrtMemGetAllocationGranularity(&prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED,
168
+ &info.devices[id].vmm_granularity);
169
+ info.devices[id].vmm = err == ACL_SUCCESS;
170
+
171
+ size_t free, total;
172
+ ggml_backend_cann_get_device_memory(id, &free, &total);
173
+ info.devices[id].total_vram = free;
174
+ }
175
+
176
+ // TODO: add more device info later.
177
+ return info;
178
+ }
179
+
180
+ /**
181
+ * @brief Retrieve the CANN device information.
182
+ *
183
+ * This function returns a reference to a structure containing the CANN device
184
+ * information. The device information is initialized once and reused on
185
+ * subsequent calls.
186
+ *
187
+ * @return A reference to the structure containing the device information.
188
+ */
189
+ const ggml_cann_device_info & ggml_cann_info() {
190
+ static ggml_cann_device_info info = ggml_cann_init();
191
+ return info;
192
+ }
193
+
194
+ //#define DEBUG_CANN_MALLOC
195
+ /**
196
+ * @brief A pool of CANN buffers(priority segment buffer).
197
+ *
198
+ * This class manages a pool of CANN buffers for a specific device.
199
+ */
200
+ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
201
+ /**
202
+ * @brief The maximum reuse margin for a buffer.
203
+ */
204
+ static const size_t max_reuse_margin = 1ull << 22; // 4MB
205
+
206
+ /**
207
+ * @brief The minimum free margin for a buffer.
208
+ */
209
+ static const size_t min_free_margin = 1ull << 20; // 1MB
210
+
211
+ /**
212
+ * @brief The alignment for buffer allocation.
213
+ */
214
+ static const size_t alignment = 128;
215
+
216
+ /**
217
+ * @brief The device ID associated with this buffer pool.
218
+ */
219
+ int device;
220
+
221
+ /**
222
+ * @brief Whether to disable clean during buffer allocation.
223
+ */
224
+ bool disable_clean = false;
225
+
226
+ /**
227
+ * @brief Structure representing a CANN buffer.
228
+ */
229
+ struct ggml_cann_buffer {
230
+ void * ptr = nullptr; ///< Pointer to the buffer.
231
+ size_t size = 0; ///< Size of the buffer.
232
+ std::chrono::steady_clock::time_point last_used; ///< Last used time.
233
+
234
+ bool operator>(const ggml_cann_buffer & other) const { return size > other.size; }
235
+ };
236
+
237
+ /**
238
+ * @brief Array of CANN buffers in the pool.
239
+ */
240
+ std::unordered_map<void *, size_t> buffer_pool;
241
+ std::priority_queue<ggml_cann_buffer, std::vector<ggml_cann_buffer>, std::greater<>> free_buffers;
242
+
243
+ /**
244
+ * @brief Total size of all buffers in the pool.
245
+ */
246
+ size_t pool_size = 0;
247
+
248
+ /**
249
+ * @brief Constructor to initialize the buffer pool for a specific device.
250
+ *
251
+ * @param device The device ID to associate with this buffer pool.
252
+ */
253
+ explicit ggml_cann_pool_buf_prio(int device) : device(device) {
254
+ disable_clean = parse_bool(get_env_as_lowercase("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
255
+ }
256
+
257
+ /**
258
+ * @brief Destructor to free all buffers in the pool.
259
+ */
260
+ ~ggml_cann_pool_buf_prio() {
261
+ ggml_cann_set_device(device);
262
+ for (auto & [b_ptr, b_size] : buffer_pool) {
263
+ aclrtFree(b_ptr);
264
+ pool_size -= b_size;
265
+ }
266
+ buffer_pool.clear();
267
+ GGML_ASSERT(pool_size == 0);
268
+ }
269
+
270
+ /**
271
+ * @brief Allocate a buffer of the given size.
272
+ *
273
+ * @param size The size of the buffer to allocate.
274
+ * @param actual_size A pointer to a variable to receive the actual size of
275
+ * the allocated buffer.
276
+ * @return A pointer to the allocated buffer.
277
+ */
278
+ void * alloc(size_t size, size_t * actual_size) override {
279
+ size = GGML_PAD(size, alignment);
280
+ if (size == 0) {
281
+ size = alignment;
282
+ }
283
+
284
+ void * ptr = nullptr;
285
+ auto now = std::chrono::steady_clock::now();
286
+
287
+ std::vector<ggml_cann_buffer> free_buffers_rest;
288
+ free_buffers_rest.reserve(free_buffers.size());
289
+ while (!free_buffers.empty()) {
290
+ auto b = free_buffers.top();
291
+ free_buffers.pop();
292
+
293
+ if (b.size >= size) {
294
+ // reuse the buffer if the size is enough
295
+ const size_t margin = b.size - size;
296
+ if (margin <= max_reuse_margin) {
297
+ *actual_size = b.size;
298
+ ptr = b.ptr;
299
+ #ifdef DEBUG_CANN_MALLOC
300
+ GGML_LOG_INFO(
301
+ "cann pool[%d]: reused %p, "
302
+ "pool_size = %5u MB, "
303
+ "size = %5u MB, "
304
+ "margin = %5u MB\n",
305
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
306
+ (uint32_t) (GGML_PAD(size, 1048576) / 1048576),
307
+ (uint32_t) (GGML_PAD(margin, 1048576) / 1048576));
308
+ #endif
309
+ break;
310
+ }
311
+ }
312
+
313
+ bool should_clean = !disable_clean && b.size > min_free_margin &&
314
+ std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
315
+ if (should_clean) {
316
+ // free the buffer if the size is needed to be freed
317
+ ACL_CHECK(aclrtFree(b.ptr));
318
+ pool_size -= b.size;
319
+ buffer_pool.erase(b.ptr);
320
+ #ifdef DEBUG_CANN_MALLOC
321
+ GGML_LOG_INFO(
322
+ "cann pool[%d]: clean %p, "
323
+ "pool_size = %5u MB, "
324
+ "size = %5u MB\n",
325
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
326
+ (uint32_t) (GGML_PAD(b.size, 1048576) / 1048576));
327
+ #endif
328
+ continue;
329
+ }
330
+ free_buffers_rest.push_back(b);
331
+ }
332
+ for (ggml_cann_buffer & b : free_buffers_rest) {
333
+ free_buffers.push(std::move(b));
334
+ }
335
+
336
+ #ifdef DEBUG_CANN_MALLOC
337
+ GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device,
338
+ (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576));
339
+ #endif
340
+ if (ptr != nullptr) {
341
+ return ptr;
342
+ }
343
+
344
+ // allocate a new buffer if no buffer can be reused
345
+ ggml_cann_set_device(device);
346
+ ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
347
+ *actual_size = size;
348
+ pool_size += size;
349
+ #ifdef DEBUG_CANN_MALLOC
350
+ GGML_LOG_INFO(
351
+ "cann pool[%d]: allocate %p, "
352
+ "pool_size = %5u MB, "
353
+ "size = %5u MB\n",
354
+ device, ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
355
+ (uint32_t) (GGML_PAD(size, 1048576) / 1048576));
356
+ #endif
357
+ buffer_pool.emplace(ptr, size);
358
+ return ptr;
359
+ }
360
+
361
+ /**
362
+ * @brief Free a buffer and return it to the pool.
363
+ *
364
+ * @param ptr Pointer to the buffer to free.
365
+ * @param size Size of the buffer to free.
366
+ */
367
+ void free(void * ptr, size_t size) override {
368
+ GGML_UNUSED(size);
369
+ auto it = buffer_pool.find(ptr);
370
+ if (it == buffer_pool.end()) {
371
+ GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
372
+ }
373
+
374
+ auto now = std::chrono::steady_clock::now();
375
+ free_buffers.emplace(ggml_cann_buffer{ ptr, it->second, now });
376
+ #ifdef DEBUG_CANN_MALLOC
377
+ GGML_LOG_INFO(
378
+ "cann pool[%d]: return %p, "
379
+ "pool_size = %5u MB\n",
380
+ device, ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576));
381
+ #endif
382
+ }
383
+ };
384
+
385
+ /**
386
+ * @brief A pool of CANN buffers(segment buffer).
387
+ *
388
+ * This class manages a pool of CANN buffers for a specific device.
389
+ */
390
+ struct ggml_cann_pool_buf : public ggml_cann_pool {
391
+ /**
392
+ * @brief The maximum reuse margin for a buffer.
393
+ */
394
+ static const size_t max_reuse_margin = 1ull << 22; // 4MB
395
+
396
+ /**
397
+ * @brief The minimum free margin for a buffer.
398
+ */
399
+ static const size_t min_free_margin = 1ull << 20; // 1MB
400
+
401
+ /**
402
+ * @brief The alignment for buffer allocation.
403
+ */
404
+ static const size_t alignment = 128;
405
+
406
+ /**
407
+ * @brief The maximum number of buffers in the pool.
408
+ */
409
+ static const int MAX_BUFFERS = 256;
410
+
411
+ /**
412
+ * @brief The device ID associated with this buffer pool.
413
+ */
414
+ int device;
415
+
416
+ /**
417
+ * @brief Whether to disable clean during buffer allocation.
418
+ */
419
+ bool disable_clean = false;
420
+
421
+ /**
422
+ * @brief Structure representing a CANN buffer.
423
+ */
424
+ struct ggml_cann_buffer {
425
+ void * ptr = nullptr; ///< Pointer to the buffer memory.
426
+ size_t size = 0; ///< Size of the buffer.
427
+ bool used = false; ///< Whether the buffer is currently in use.
428
+ std::chrono::steady_clock::time_point last_used; ///< Last used time.
429
+ };
430
+
431
+ /**
432
+ * @brief Array of CANN buffers in the pool.
433
+ */
434
+ ggml_cann_buffer buffer_pool[MAX_BUFFERS] = {};
435
+
436
+ /**
437
+ * @brief Total size of all buffers in the pool.
438
+ */
439
+ size_t pool_size = 0;
440
+
441
+ /**
442
+ * @brief Constructor to initialize the buffer pool for a specific device.
443
+ *
444
+ * @param device The device ID to associate with this buffer pool.
445
+ */
446
+ explicit ggml_cann_pool_buf(int device) : device(device) {
447
+ disable_clean = parse_bool(get_env_as_lowercase("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
448
+ }
449
+
450
+ /**
451
+ * @brief Destructor to free all buffers in the pool.
452
+ */
453
+ ~ggml_cann_pool_buf() {
454
+ ggml_cann_set_device(device);
455
+ for (int i = 0; i < MAX_BUFFERS; ++i) {
456
+ ggml_cann_buffer & b = buffer_pool[i];
457
+ if (b.ptr != nullptr) {
458
+ aclrtFree(b.ptr);
459
+ pool_size -= b.size;
460
+ }
461
+ }
462
+ GGML_ASSERT(pool_size == 0);
463
+ }
464
+
465
+ /**
466
+ * @brief Allocate a buffer of the given size.
467
+ *
468
+ * @param size The size of the buffer to allocate.
469
+ * @param actual_size A pointer to a variable to receive the actual size of
470
+ * the allocated buffer.
471
+ * @return A pointer to the allocated buffer.
472
+ */
473
+ void * alloc(size_t size, size_t * actual_size) override {
474
+ size = GGML_PAD(size, alignment);
475
+ if (size == 0) {
476
+ size = alignment;
477
+ }
478
+
479
+ void * ptr = nullptr;
480
+ auto now = std::chrono::steady_clock::now();
481
+
482
+ int i = 0;
483
+ for (; i < MAX_BUFFERS; ++i) {
484
+ ggml_cann_buffer & b = buffer_pool[i];
485
+ if (b.ptr == nullptr) {
486
+ break;
487
+ }
488
+ if (b.used) {
489
+ continue;
490
+ }
491
+ if (b.size >= size) {
492
+ // reuse the buffer if the size is enough
493
+ const size_t margin = b.size - size;
494
+ if (margin <= max_reuse_margin) {
495
+ *actual_size = b.size;
496
+ b.used = true;
497
+ ptr = b.ptr;
498
+ #ifdef DEBUG_CANN_MALLOC
499
+ GGML_LOG_INFO(
500
+ "cann pool[%d]: reused %p, "
501
+ "pool_size = %5u MB, "
502
+ "size = %5u MB, "
503
+ "margin = %5u MB\n",
504
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
505
+ (uint32_t) (GGML_PAD(size, 1048576) / 1048576),
506
+ (uint32_t) (GGML_PAD(margin, 1048576) / 1048576));
507
+ #endif
508
+ break;
509
+ }
510
+ }
511
+
512
+ bool should_clean = !disable_clean && b.size > min_free_margin &&
513
+ std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
514
+ if (should_clean) {
515
+ // free the buffer if the size is needed to be freed
516
+ ACL_CHECK(aclrtFree(b.ptr));
517
+ pool_size -= b.size;
518
+ #ifdef DEBUG_CANN_MALLOC
519
+ GGML_LOG_INFO(
520
+ "cann pool[%d]: clean %p, "
521
+ "pool_size = %5u MB, "
522
+ "size = %5u MB\n",
523
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
524
+ (uint32_t) (GGML_PAD(b.size, 1048576) / 1048576));
525
+ #endif
526
+ b.ptr = nullptr;
527
+ }
528
+ }
529
+ if (ptr != nullptr) {
530
+ return ptr;
531
+ }
532
+
533
+ if (i < MAX_BUFFERS) {
534
+ // allocate a new buffer if no buffer can be reused
535
+ ggml_cann_buffer & b = buffer_pool[i];
536
+ ggml_cann_set_device(device);
537
+ ACL_CHECK(aclrtMalloc(&b.ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
538
+ pool_size += size;
539
+ *actual_size = size;
540
+ b.size = size;
541
+ b.used = true;
542
+ if (i >= MAX_BUFFERS - 8) {
543
+ GGML_LOG_WARN("cann pool[%d]: slots almost full\n", device);
544
+ }
545
+ #ifdef DEBUG_CANN_MALLOC
546
+ GGML_LOG_INFO(
547
+ "cann pool[%d]: allocate %p, "
548
+ "pool_size = %5u MB, "
549
+ "size = %5u MB\n",
550
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576),
551
+ (uint32_t) (GGML_PAD(b.size, 1048576) / 1048576));
552
+ #endif
553
+ return b.ptr;
554
+ }
555
+
556
+ GGML_ABORT("cann pool[%d]: slots full\n", device);
557
+ }
558
+
559
+ /**
560
+ * @brief Free a buffer and return it to the pool.
561
+ *
562
+ * @param ptr Pointer to the buffer to free.
563
+ * @param size Size of the buffer to free.
564
+ */
565
+ void free(void * ptr, size_t size) override {
566
+ GGML_UNUSED(size);
567
+ for (int i = 0; i < MAX_BUFFERS; ++i) {
568
+ ggml_cann_buffer & b = buffer_pool[i];
569
+ if (b.ptr != ptr) {
570
+ continue;
571
+ }
572
+ b.used = false;
573
+ b.last_used = std::chrono::steady_clock::now();
574
+ #ifdef DEBUG_CANN_MALLOC
575
+ GGML_LOG_INFO(
576
+ "cann pool[%d]: return %p, "
577
+ "pool_size = %5u MB\n",
578
+ device, b.ptr, (uint32_t) (GGML_PAD(pool_size, 1048576) / 1048576));
579
+ #endif
580
+ return;
581
+ }
582
+ GGML_ABORT("cann pool[%d]: slots full\n", device);
583
+ }
584
+ };
585
+
586
+ /**
587
+ * @brief A pool of CANN buffers with virtual memory.
588
+ *
589
+ * This class manages a pool of CANN buffers with virtual memory for a specific
590
+ * device.
591
+ */
592
+ struct ggml_cann_pool_vmm : public ggml_cann_pool {
593
+ /**
594
+ * @brief The maximum size of the virtual memory pool (32 GB).
595
+ */
596
+ size_t max_size;
597
+
598
+ /**
599
+ * @brief The device ID associated with this buffer pool.
600
+ */
601
+ int device;
602
+
603
+ /**
604
+ * @brief Pointer to the start of the virtual memory pool.
605
+ */
606
+ void * pool_addr = 0;
607
+
608
+ /**
609
+ * @brief Amount of virtual memory used in the pool.
610
+ */
611
+ size_t pool_used = 0;
612
+
613
+ /**
614
+ * @brief Total size of the virtual memory pool.
615
+ */
616
+ size_t pool_size = 0;
617
+
618
+ /**
619
+ * @brief Allocation granularity for the virtual memory pool.
620
+ */
621
+ size_t granularity;
622
+
623
+ /**
624
+ * @brief Handles for the physical memory allocated.
625
+ */
626
+ std::vector<aclrtDrvMemHandle> handles;
627
+
628
+ /**
629
+ * @brief Offsets for the mapped memory regions.
630
+ */
631
+ std::vector<void *> map_offsets;
632
+
633
+ /**
634
+ * @brief Constructor to initialize the buffer pool with virtual memory for
635
+ * a specific device.
636
+ *
637
+ * @param device The device ID to associate with this buffer pool.
638
+ */
639
+ explicit ggml_cann_pool_vmm(int device) : device(device) {
640
+ auto dev = ggml_cann_info().devices[device];
641
+ granularity = dev.vmm_granularity;
642
+ max_size = dev.total_vram;
643
+ }
644
+
645
+ /**
646
+ * @brief Destructor to free all buffers in the virtual memory pool.
647
+ */
648
+ ~ggml_cann_pool_vmm() {
649
+ if (pool_addr != 0) {
650
+ for (auto & offset : map_offsets) {
651
+ ACL_CHECK(aclrtUnmapMem(offset));
652
+ }
653
+ for (auto & handle : handles) {
654
+ ACL_CHECK(aclrtFreePhysical(handle));
655
+ }
656
+ ACL_CHECK(aclrtReleaseMemAddress(pool_addr));
657
+ }
658
+ }
659
+
660
+ /**
661
+ * @brief Allocate a buffer of the given size in the virtual memory pool.
662
+ *
663
+ * @param size The size of the buffer to allocate.
664
+ * @param actual_size A pointer to a variable to receive the actual size of
665
+ * the allocated buffer.
666
+ * @return A pointer to the allocated buffer.
667
+ */
668
+ void * alloc(size_t size, size_t * actual_size) override {
669
+ // round up the allocation size to the alignment to ensure that all
670
+ // allocations are aligned for all data types
671
+ const size_t alignment = 128;
672
+ size = GGML_PAD(size, alignment);
673
+ if (size == 0) {
674
+ size = alignment;
675
+ }
676
+
677
+ size_t avail = pool_size - pool_used;
678
+
679
+ if (size > avail) {
680
+ // round up to the next multiple of the granularity
681
+ size_t reserve_size = size - avail;
682
+ reserve_size = GGML_PAD(reserve_size, granularity);
683
+
684
+ GGML_ASSERT(pool_size + reserve_size <= max_size);
685
+
686
+ // allocate more physical memory
687
+ aclrtPhysicalMemProp prop = {};
688
+ prop.handleType = ACL_MEM_HANDLE_TYPE_NONE;
689
+ prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
690
+ prop.memAttr = ACL_HBM_MEM_HUGE;
691
+ prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
692
+ prop.location.id = device;
693
+ prop.reserve = 0;
694
+ aclrtDrvMemHandle handle;
695
+ ACL_CHECK(aclrtMallocPhysical(&handle, reserve_size, &prop, 0));
696
+
697
+ // reserve virtual address space (if not already reserved)
698
+ if (pool_addr == 0) {
699
+ ACL_CHECK(aclrtReserveMemAddress(&pool_addr, max_size, 0, NULL, 1));
700
+ }
701
+
702
+ // map at the end of the pool
703
+ ACL_CHECK(aclrtMapMem((char *) pool_addr + pool_size, reserve_size, 0, handle, 0));
704
+
705
+ handles.push_back(handle);
706
+ map_offsets.push_back((char *) pool_addr + pool_size);
707
+
708
+ // add to the pool
709
+ pool_size += reserve_size;
710
+
711
+ #ifdef DEBUG_CANN_MALLOC
712
+ GGML_LOG_INFO("cann pool[%d]: size increased to %llu MB (reserved %llu MB)\n", device,
713
+ (unsigned long long) (pool_size / 1024 / 1024),
714
+ (unsigned long long) (reserve_size / 1024 / 1024));
715
+ #endif
716
+ }
717
+
718
+ GGML_ASSERT(pool_addr != 0);
719
+
720
+ void * ptr = (void *) ((char *) pool_addr + pool_used);
721
+ *actual_size = size;
722
+ pool_used += size;
723
+
724
+ #ifdef DEBUG_CANN_MALLOC
725
+ GGML_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device, (unsigned long long) size,
726
+ (unsigned long long) ptr);
727
+ #endif
728
+ return ptr;
729
+ }
730
+
731
+ /**
732
+ * @brief Free a buffer and return it to the virtual memory pool.
733
+ *
734
+ * @param ptr Pointer to the buffer to free.
735
+ * @param size Size of the buffer to free.
736
+ */
737
+ void free(void * ptr, size_t size) override {
738
+ #ifdef DEBUG_CANN_MALLOC
739
+ GGML_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device, (unsigned long long) size,
740
+ (unsigned long long) ptr);
741
+ #endif
742
+
743
+ pool_used -= size;
744
+
745
+ // all deallocations must be in reverse order of the allocations
746
+ GGML_ASSERT(ptr == (void *) ((char *) pool_addr + pool_used));
747
+ }
748
+ };
749
+
750
+ /**
751
+ * @brief Create a new CANN pool for a specific device.
752
+ *
753
+ * Factory method to create a new CANN pool object based on the device type.
754
+ *
755
+ * @param device The device ID for which to create the pool.
756
+ * @return A unique pointer to the created CANN pool.
757
+ */
758
+ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(int device) {
759
+ std::string mem_pool_type = get_env_as_lowercase("GGML_CANN_MEM_POOL").value_or("");
760
+
761
+ if (mem_pool_type == "prio") {
762
+ GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
763
+ return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
764
+ }
765
+
766
+ if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
767
+ GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
768
+ return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
769
+ }
770
+
771
+ GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
772
+ return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
773
+ }
774
+
775
+ // cann buffer
776
+
777
+ /**
778
+ * @brief Tracks multi-threaded write progress for a single tensor.
779
+ *
780
+ * When multiple threads call set_tensor on different chunks of the same tensor,
781
+ * this tracker accumulates progress and defers post-processing (quantized format
782
+ * transform or ND-to-NZ conversion) until all data has been written.
783
+ */
784
+ struct TensorSetTracker {
785
+ std::mutex mtx; ///< Protects concurrent access to this tracker
786
+ size_t bytes_written = 0; ///< Accumulated bytes written so far
787
+ size_t total_bytes = 0; ///< Target size (full tensor)
788
+ std::vector<uint8_t> host_buffer; ///< Host staging buffer for quantized tensors
789
+ };
790
+
791
+ /**
792
+ * @brief Context for managing a CANN buffer associated with a specific device.
793
+ *
794
+ * This structure holds information about a CANN buffer, including the device
795
+ * ID, device pointer, and a name derived from GGML_CANN_NAME and the device ID.
796
+ */
797
+ struct ggml_backend_cann_buffer_context {
798
+ int32_t device; ///< The device ID associated with this buffer context.
799
+ void * dev_ptr = nullptr; ///< Pointer to the device memory allocated for the buffer.
800
+
801
+ std::mutex tracker_mutex; ///< Protects the trackers map
802
+ std::unordered_map<void *, std::unique_ptr<TensorSetTracker>> trackers;
803
+
804
+ /**
805
+ * @brief Constructor to initialize the CANN buffer context.
806
+ *
807
+ * @param device The device ID associated with this buffer context.
808
+ * @param dev_ptr Pointer to the device memory allocated for the buffer.
809
+ */
810
+ ggml_backend_cann_buffer_context(int32_t device, void * dev_ptr) : device(device), dev_ptr(dev_ptr) {}
811
+
812
+ /**
813
+ * @brief Destructor to free the device memory allocated for the buffer.
814
+ */
815
+ ~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
816
+
817
+ /**
818
+ * @brief Get or create a tracker for the given tensor.
819
+ */
820
+ TensorSetTracker * get_or_create_tracker(ggml_tensor * tensor) {
821
+ std::lock_guard<std::mutex> lock(tracker_mutex);
822
+ auto key = tensor->data;
823
+ auto it = trackers.find(key);
824
+ if (it == trackers.end()) {
825
+ auto tracker = std::make_unique<TensorSetTracker>();
826
+ tracker->total_bytes = ggml_nbytes(tensor);
827
+ auto * ptr = tracker.get();
828
+ trackers[key] = std::move(tracker);
829
+ return ptr;
830
+ }
831
+ return it->second.get();
832
+ }
833
+
834
+ /**
835
+ * @brief Remove the tracker for the given tensor.
836
+ */
837
+ void remove_tracker(ggml_tensor * tensor) {
838
+ std::lock_guard<std::mutex> lock(tracker_mutex);
839
+ trackers.erase(tensor->data);
840
+ }
841
+ };
842
+
843
+ // cann buffer type
844
+ /**
845
+ * @brief Structure representing context information for a specific backend
846
+ * buffer type.
847
+ */
848
+ struct ggml_backend_cann_buffer_type_context {
849
+ int32_t device; /**< Device identifier associated with the buffer context. */
850
+ std::string name; /**< Name associated with the buffer context. */
851
+ };
852
+
853
+ /**
854
+ * @brief Retrieves the name associated with a CANN buffer type.
855
+ *
856
+ * This function returns the descriptive name associated with the specified
857
+ * CANN buffer type context.
858
+ *
859
+ * @param buft Pointer to the buffer type context.
860
+ * @return Const pointer to the C-style string containing the name.
861
+ */
862
+ static const char * ggml_backend_cann_buffer_type_name(ggml_backend_buffer_type_t buft) {
863
+ ggml_backend_cann_buffer_type_context * buft_ctx = (ggml_backend_cann_buffer_type_context *) buft->context;
864
+
865
+ return buft_ctx->name.c_str();
866
+ }
867
+
868
+ /**
869
+ * @brief Checks if the backend buffer type is associated with the CANN backend.
870
+ *
871
+ * This function checks whether the provided backend buffer type is associated
872
+ * with the CANN backend based on the comparison of its name retrieval function
873
+ * pointer.
874
+ *
875
+ * @param buft Pointer to the backend buffer type to check.
876
+ * @return bool Returns true if the buffer type is associated with the CANN
877
+ * backend, otherwise false.
878
+ */
879
+ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
880
+ return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
881
+ }
882
+
883
+ /**
884
+ * @brief Free resources associated with a CANN buffer.
885
+ *
886
+ * This function frees the resources associated with a CANN buffer, including
887
+ * its context.
888
+ *
889
+ * @param buffer The CANN buffer to free.
890
+ */
891
+ static void ggml_backend_cann_buffer_free_buffer(ggml_backend_buffer_t buffer) {
892
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
893
+ delete ctx;
894
+ }
895
+
896
+ /**
897
+ * @brief Retrieve the base pointer of a CANN buffer.
898
+ *
899
+ * This function returns the base pointer of a CANN buffer, which points to the
900
+ * device memory allocated for the buffer.
901
+ *
902
+ * @param buffer The CANN buffer whose base pointer is to be retrieved.
903
+ * @return A pointer to the base of the device memory allocated for the buffer.
904
+ */
905
+ static void * ggml_backend_cann_buffer_get_base(ggml_backend_buffer_t buffer) {
906
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
907
+ return ctx->dev_ptr;
908
+ }
909
+
910
+ /**
911
+ * @brief Transform quantized Q4.0 tensor data into a format suitable for CANN
912
+ * processing.
913
+ *
914
+ * This function transforms quantized Q4.0 tensor data into a format suitable
915
+ * for CANN processing. It extracts quantization values and scales from the
916
+ * source data and prepares them in a format expected by CANN operations.
917
+ *
918
+ * @param tensor Pointer to the tensor information.
919
+ * @param src Pointer to the source data in Q4.0 format.
920
+ * @param dst Pointer to the destination buffer where transformed data will be
921
+ * stored.
922
+ */
923
+ static void ggml_backend_cann_transform_q4_0(ggml_tensor * tensor, const void * src, void * dst) {
924
+ int64_t n_elems = ggml_nelements(tensor);
925
+ int64_t groups = n_elems / QK4_0;
926
+ size_t quant_bytes = n_elems * sizeof(uint8_t) / 2;
927
+
928
+ uint8_t * quant_offset = (uint8_t *) dst;
929
+ uint16_t * scale_offset = (uint16_t *) ((char *) dst + quant_bytes);
930
+
931
+ for (int i = 0; i < groups; i++) {
932
+ const block_q4_0 * group = (const block_q4_0 *) ((const char *) src + i * sizeof(block_q4_0));
933
+ *scale_offset = group->d;
934
+ scale_offset++;
935
+
936
+ // 0-15
937
+ for (int j = 0; j < QK4_0 / 2; j += 2) {
938
+ (*quant_offset) = (group->qs[j] & 0x0F);
939
+ (*quant_offset) |= ((group->qs[j + 1] << 4));
940
+ quant_offset++;
941
+ }
942
+
943
+ // 16-31
944
+ for (int j = 0; j < QK4_0 / 2; j += 2) {
945
+ (*quant_offset) = (group->qs[j] >> 4);
946
+ (*quant_offset) |= (group->qs[j + 1] & 0xF0);
947
+ quant_offset++;
948
+ }
949
+ }
950
+
951
+ // put (uint4b_t -8) into int4b_t
952
+ for (quant_offset = (uint8_t *) dst; quant_offset < (uint8_t *) dst + quant_bytes; quant_offset++) {
953
+ (*quant_offset) ^= 0x88;
954
+ }
955
+ }
956
+
957
+ /**
958
+ * @brief Transform CANN processed data back into quantized Q4.0 format.
959
+ *
960
+ * This function transforms CANN processed data back into quantized Q4.0 format.
961
+ * It reverses the transformation performed by
962
+ * ggml_backend_cann_transform_q4_0(), converting the data back into its
963
+ * original quantized form.
964
+ *
965
+ * @param tensor Pointer to the tensor information.
966
+ * @param src Pointer to the source buffer containing transformed data.
967
+ * @param dst Pointer to the destination buffer where the Q4.0 formatted data
968
+ * will be stored.
969
+ */
970
+ static void ggml_backend_cann_transform_back_q4_0(const ggml_tensor * tensor, void * src, void * dst) {
971
+ int64_t n_elems = ggml_nelements(tensor);
972
+ int64_t groups = n_elems / QK4_0;
973
+ size_t quant_bytes = n_elems * sizeof(uint8_t) / 2;
974
+
975
+ uint8_t * quant_offset = (uint8_t *) src;
976
+ uint16_t * scale_offset = (uint16_t *) ((char *) src + quant_bytes);
977
+
978
+ for (; quant_offset < (uint8_t *) src + quant_bytes; quant_offset++) {
979
+ (*quant_offset) ^= 0x88;
980
+ }
981
+ quant_offset = (uint8_t *) src;
982
+
983
+ for (int i = 0; i < groups; i++) {
984
+ block_q4_0 * group = (block_q4_0 *) ((char *) dst + i * sizeof(block_q4_0));
985
+ group->d = *scale_offset;
986
+ scale_offset++;
987
+
988
+ // 0-15
989
+ for (int j = 0; j < QK4_0 / 2; j += 2) {
990
+ group->qs[j] = ((*quant_offset) & 0x0F);
991
+ group->qs[j + 1] = ((*quant_offset) >> 4);
992
+ quant_offset++;
993
+ }
994
+
995
+ // 16-31
996
+ for (int j = 0; j < QK4_0 / 2; j += 2) {
997
+ group->qs[j] |= ((*quant_offset) << 4);
998
+ group->qs[j + 1] |= ((*quant_offset) & 0xF0);
999
+ quant_offset++;
1000
+ }
1001
+ }
1002
+ }
1003
+
1004
+ /**
1005
+ * @brief Transform quantized Q8.0 tensor data into a format suitable for CANN
1006
+ * processing.
1007
+ *
1008
+ * This function transforms quantized Q8.0 tensor data into a format suitable
1009
+ * for CANN processing. It extracts quantization values and scales from the
1010
+ * source data and prepares them in a format expected by CANN operations.
1011
+ *
1012
+ * @param tensor Pointer to the tensor information.
1013
+ * @param src Pointer to the source data in Q8.0 format.
1014
+ * @param dst Pointer to the destination buffer where transformed data will be
1015
+ * stored.
1016
+ */
1017
+ static void ggml_backend_cann_transform_q8_0(ggml_tensor * tensor, const void * src, void * dst) {
1018
+ int64_t n_elems = ggml_nelements(tensor);
1019
+ int64_t groups = n_elems / QK8_0;
1020
+ size_t quant_bytes = n_elems * sizeof(uint8_t);
1021
+
1022
+ uint8_t * quant_offset = (uint8_t *) dst;
1023
+ uint16_t * scale_offset = (uint16_t *) ((char *) dst + quant_bytes);
1024
+
1025
+ for (int i = 0; i < groups; i++) {
1026
+ const block_q8_0 * group = (const block_q8_0 *) ((const char *) src + i * sizeof(block_q8_0));
1027
+ *scale_offset = group->d;
1028
+ scale_offset++;
1029
+ size_t group_quant_size = QK8_0 * sizeof(uint8_t);
1030
+ memcpy(quant_offset, group->qs, group_quant_size);
1031
+ quant_offset += group_quant_size;
1032
+ }
1033
+ }
1034
+
1035
+ /**
1036
+ * @brief Transform CANN processed data back into quantized Q8.0 format.
1037
+ *
1038
+ * This function transforms CANN processed data back into quantized Q8.0 format.
1039
+ * It reverses the transformation performed by
1040
+ * ggml_backend_cann_transform_q8_0(), converting the data back into its
1041
+ * original quantized form.
1042
+ *
1043
+ * @param tensor Pointer to the tensor information.
1044
+ * @param src Pointer to the source buffer containing transformed data.
1045
+ * @param dst Pointer to the destination buffer where the Q8.0 formatted data
1046
+ * will be stored.
1047
+ */
1048
+ static void ggml_backend_cann_transform_back_q8_0(const ggml_tensor * tensor, const void * src, void * dst) {
1049
+ int64_t n_elems = ggml_nelements(tensor);
1050
+ int64_t groups = n_elems / QK8_0;
1051
+ size_t quant_bytes = n_elems * sizeof(uint8_t);
1052
+
1053
+ const uint8_t * quant_offset = (const uint8_t *) src;
1054
+ const uint16_t * scale_offset = (const uint16_t *) ((const char *) src + quant_bytes);
1055
+
1056
+ for (int i = 0; i < groups; i++) {
1057
+ block_q8_0 * group = (block_q8_0 *) ((char *) dst + i * sizeof(block_q8_0));
1058
+ group->d = *scale_offset;
1059
+ scale_offset++;
1060
+ size_t group_quant_size = QK8_0 * sizeof(uint8_t);
1061
+ memcpy(group->qs, quant_offset, group_quant_size);
1062
+ quant_offset += group_quant_size;
1063
+ }
1064
+ }
1065
+
1066
+ /**
1067
+ * @brief Transform tensor data based on its type for CANN processing.
1068
+ *
1069
+ * This function transforms tensor data based on its quantization type for CANN
1070
+ * processing. It dispatches the transformation based on the tensor's type to
1071
+ * specialized functions handling Q4.0 and Q8.0 formats.
1072
+ *
1073
+ * @param tensor Pointer to the tensor information.
1074
+ * @param src Pointer to the source data to be transformed.
1075
+ * @param dst Pointer to the destination buffer where transformed data will be
1076
+ * stored.
1077
+ */
1078
+ static void ggml_backend_cann_transform(ggml_tensor * tensor, const void * src, void * dst) {
1079
+ switch (tensor->type) {
1080
+ case GGML_TYPE_Q4_0:
1081
+ ggml_backend_cann_transform_q4_0(tensor, src, dst);
1082
+ break;
1083
+ case GGML_TYPE_Q8_0:
1084
+ ggml_backend_cann_transform_q8_0(tensor, src, dst);
1085
+ break;
1086
+ default:
1087
+ break;
1088
+ }
1089
+ }
1090
+
1091
+ /**
1092
+ * @brief Transform CANN processed data back into tensor data based on its type.
1093
+ *
1094
+ * This function transforms CANN processed data back into tensor data based on
1095
+ * its quantization type for Q4.0 and Q8.0 formats. It dispatches the
1096
+ * transformation based on the tensor's type to specialized functions.
1097
+ *
1098
+ * @param tensor Pointer to the tensor information.
1099
+ * @param src Pointer to the source data containing CANN processed data.
1100
+ * @param dst Pointer to the destination buffer where transformed tensor data
1101
+ * will be stored.
1102
+ */
1103
+ static void ggml_backend_cann_transform_back(const ggml_tensor * tensor, void * src, void * dst) {
1104
+ switch (tensor->type) {
1105
+ case GGML_TYPE_Q4_0:
1106
+ ggml_backend_cann_transform_back_q4_0(tensor, src, dst);
1107
+ break;
1108
+ case GGML_TYPE_Q8_0:
1109
+ ggml_backend_cann_transform_back_q8_0(tensor, src, dst);
1110
+ break;
1111
+ default:
1112
+ break;
1113
+ }
1114
+ }
1115
+
1116
+ /**
1117
+ * @brief Check if transformation is needed for a given tensor type.
1118
+ *
1119
+ * This function checks if transformation is needed for a given tensor type
1120
+ * to prepare data for CANN processing.
1121
+ *
1122
+ * @param type The tensor type to check.
1123
+ * @return true if transformation is needed, false otherwise.
1124
+ */
1125
+ static bool need_transform(ggml_type type) {
1126
+ switch (type) {
1127
+ case GGML_TYPE_Q4_0:
1128
+ case GGML_TYPE_Q8_0:
1129
+ return true;
1130
+ default:
1131
+ return false;
1132
+ }
1133
+ }
1134
+
1135
+ /**
1136
+ * @brief Initialize a tensor using data from a CANN buffer.
1137
+ *
1138
+ * This function initializes a tensor using data from a CANN buffer.
1139
+ * It handles special cases such as views and quantization.
1140
+ *
1141
+ * @param buffer The CANN buffer from which to initialize the tensor.
1142
+ * @param tensor Pointer to the tensor to be initialized.
1143
+ */
1144
+ static enum ggml_status ggml_backend_cann_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
1145
+ if (tensor->view_src != NULL && tensor->view_offs == 0) {
1146
+ GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
1147
+ return GGML_STATUS_SUCCESS;
1148
+ }
1149
+
1150
+ // TODO: cann backend doesn't support quantized yet. Just leave the code
1151
+ // here.
1152
+ if (ggml_is_quantized(tensor->type)) {
1153
+ // Initialize padding to 0 to avoid possible NaN values
1154
+ size_t original_size = ggml_nbytes(tensor);
1155
+ size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor);
1156
+
1157
+ if (padded_size > original_size && tensor->view_src == nullptr) {
1158
+ size_t memset_size = padded_size - original_size;
1159
+ ACL_CHECK(aclrtMemset((char *) tensor->data + original_size, memset_size, 0, memset_size));
1160
+ }
1161
+ }
1162
+ return GGML_STATUS_SUCCESS;
1163
+ }
1164
+
1165
+ /**
1166
+ * @brief Workspace for caching NZ buffers per device.
1167
+ *
1168
+ * This struct manages a device buffer used in NZ computations. It supports
1169
+ * allocation, reallocation, and clearing of cached memory. The struct is
1170
+ * designed to be used with a global array, one per device.
1171
+ */
1172
+ struct ggml_cann_nz_workspace {
1173
+ std::mutex mtx; // Protects ptr/allocated from concurrent access
1174
+ void * ptr; // Pointer to allocated device buffer
1175
+ size_t allocated; // Size of currently allocated buffer in bytes
1176
+
1177
+ /**
1178
+ * @brief Constructor. Initializes the workspace with no allocated memory.
1179
+ */
1180
+ ggml_cann_nz_workspace() : ptr(nullptr), allocated(0) {}
1181
+
1182
+ /**
1183
+ * @brief Free cached memory and reset the workspace.
1184
+ *
1185
+ * If a buffer has been allocated, this function releases it using
1186
+ * aclrtFree and resets internal state.
1187
+ */
1188
+ void clear() {
1189
+ if (ptr) {
1190
+ ACL_CHECK(aclrtFree(ptr));
1191
+ ptr = nullptr;
1192
+ allocated = 0;
1193
+ }
1194
+ }
1195
+
1196
+ /**
1197
+ * @brief Allocate or reallocate the workspace buffer.
1198
+ *
1199
+ * If the requested size is larger than the currently allocated size,
1200
+ * the old buffer will be freed and a new buffer of the requested size
1201
+ * will be allocated on the device.
1202
+ *
1203
+ * @param new_size Size in bytes to allocate for the workspace.
1204
+ */
1205
+ void realloc(size_t new_size) {
1206
+ if (new_size > allocated) {
1207
+ clear();
1208
+ ACL_CHECK(aclrtMalloc(&ptr, new_size, ACL_MEM_MALLOC_HUGE_FIRST));
1209
+ allocated = new_size;
1210
+ }
1211
+ }
1212
+
1213
+ /**
1214
+ * @brief Get the device buffer pointer.
1215
+ *
1216
+ * @return Pointer to the allocated buffer, or nullptr if not allocated.
1217
+ */
1218
+ void * get() const { return ptr; }
1219
+ };
1220
+
1221
+ /**
1222
+ * @brief Global array of NZ workspaces, one per device.
1223
+ */
1224
+ static ggml_cann_nz_workspace g_nz_workspaces[GGML_CANN_MAX_DEVICES];
1225
+
1226
+ /**
1227
+ * @brief Convert tensor weights to NZ format using Ascend CANN API.
1228
+ *
1229
+ * This function creates a transposed tensor descriptor and performs the
1230
+ * TransMatmulWeight operation. Converting tensor formats can significantly
1231
+ * improve performance on certain hardware.
1232
+ *
1233
+ * @param tensor Pointer to the input ggml_tensor containing the weights.
1234
+ * @param offset Byte offset within the tensor data buffer where weights start.
1235
+ * @param device device id.
1236
+ *
1237
+ * @note The workspace buffer used in this function is managed globally and reused
1238
+ * across calls. This reduces overhead from repeated memory allocation and deallocation.
1239
+ */
1240
+ static void weight_format_to_nz(ggml_tensor * tensor, int device) {
1241
+ acl_tensor_ptr weightTransposed = ggml_cann_create_tensor(tensor, tensor->ne, tensor->nb, 2, ACL_FORMAT_ND, 0);
1242
+ uint64_t workspaceSize = 0;
1243
+ aclOpExecutor * executor;
1244
+
1245
+ // TransMatmulWeight
1246
+ ACL_CHECK(aclnnTransMatmulWeightGetWorkspaceSize(weightTransposed.get(), &workspaceSize, &executor));
1247
+
1248
+ std::lock_guard<std::mutex> lock(g_nz_workspaces[device].mtx);
1249
+ // Avoid frequent malloc/free of the workspace.
1250
+ g_nz_workspaces[device].realloc(workspaceSize);
1251
+
1252
+ void * g_nz_workspace = g_nz_workspaces[device].get();
1253
+
1254
+ ACL_CHECK(aclnnTransMatmulWeight(g_nz_workspace, workspaceSize, executor, nullptr));
1255
+ }
1256
+
1257
+ // TODO: need handle tensor which has paddings.
1258
+ /**
1259
+ * @brief Set tensor data in a CANN buffer.
1260
+ *
1261
+ * This function sets tensor data in a CANN buffer, handling transformations
1262
+ * if needed based on the tensor's type. It supports multi-threaded calls
1263
+ * where different threads write different chunks of the same tensor.
1264
+ *
1265
+ * For quantized tensors (Q4_0/Q8_0), data is staged in a host buffer and
1266
+ * the format transform is deferred until all chunks are written.
1267
+ * For NZ weight tensors, chunks are uploaded directly but the ND-to-NZ
1268
+ * conversion is deferred until all chunks are written.
1269
+ *
1270
+ * @param buffer The CANN buffer where the tensor data will be set.
1271
+ * @param tensor Pointer to the tensor whose data will be set.
1272
+ * @param data Pointer to the source data to be copied into the tensor.
1273
+ * @param offset Offset in the source data from where to start copying.
1274
+ * @param size Size of the data to be copied, in bytes.
1275
+ */
1276
+ static void ggml_backend_cann_buffer_set_tensor(ggml_backend_buffer_t buffer,
1277
+ ggml_tensor * tensor,
1278
+ const void * data,
1279
+ size_t offset,
1280
+ size_t size) {
1281
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
1282
+
1283
+ ggml_cann_set_device(ctx->device);
1284
+
1285
+ // Only check env once.
1286
+ static bool weight_to_nz = parse_bool(get_env_as_lowercase("GGML_CANN_WEIGHT_NZ").value_or("on"));
1287
+
1288
+ bool is_quantized = need_transform(tensor->type);
1289
+ bool is_nz = !is_quantized && tensor->type != GGML_TYPE_BF16 && weight_to_nz &&
1290
+ is_matmul_weight((const ggml_tensor *) tensor);
1291
+
1292
+ // Plain tensor (not quantized, not NZ): direct copy, no tracking needed
1293
+ if (!is_quantized && !is_nz) {
1294
+ ACL_CHECK(aclrtMemcpy((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
1295
+ return;
1296
+ }
1297
+
1298
+ // Single-shot write (full tensor at once): handle directly without tracking overhead
1299
+ if (offset == 0 && size == ggml_nbytes(tensor)) {
1300
+ if (is_quantized) {
1301
+ void * transform_buffer = malloc(size);
1302
+ ggml_backend_cann_transform(tensor, data, transform_buffer);
1303
+ ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size, ACL_MEMCPY_HOST_TO_DEVICE));
1304
+ free(transform_buffer);
1305
+ } else {
1306
+ // NZ weight
1307
+ GGML_ASSERT(tensor->ne[2] == 1);
1308
+ GGML_ASSERT(tensor->ne[3] == 1);
1309
+ ACL_CHECK(aclrtMemcpy(tensor->data, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
1310
+ weight_format_to_nz(tensor, ctx->device);
1311
+ }
1312
+ return;
1313
+ }
1314
+
1315
+ // Chunked write: use tracker to accumulate progress and defer transform/conversion
1316
+ TensorSetTracker * tracker = ctx->get_or_create_tracker(tensor);
1317
+ std::unique_lock<std::mutex> lock(tracker->mtx);
1318
+
1319
+ if (is_quantized) {
1320
+ // Stage data in host buffer; transform requires full tensor data
1321
+ if (tracker->host_buffer.empty()) {
1322
+ tracker->host_buffer.resize(tracker->total_bytes);
1323
+ }
1324
+ memcpy(tracker->host_buffer.data() + offset, data, size);
1325
+ } else {
1326
+ // NZ weight: upload chunk to device immediately, defer conversion
1327
+ ACL_CHECK(aclrtMemcpy((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
1328
+ }
1329
+
1330
+ tracker->bytes_written += size;
1331
+
1332
+ // All chunks received: perform deferred transform/conversion
1333
+ if (tracker->bytes_written >= tracker->total_bytes) {
1334
+ if (is_quantized) {
1335
+ void * transform_buffer = malloc(tracker->total_bytes);
1336
+ ggml_backend_cann_transform(tensor, tracker->host_buffer.data(), transform_buffer);
1337
+ ACL_CHECK(aclrtMemcpy(tensor->data, tracker->total_bytes, transform_buffer, tracker->total_bytes, ACL_MEMCPY_HOST_TO_DEVICE));
1338
+ free(transform_buffer);
1339
+ }
1340
+
1341
+ if (is_nz) {
1342
+ GGML_ASSERT(tensor->ne[2] == 1);
1343
+ GGML_ASSERT(tensor->ne[3] == 1);
1344
+ weight_format_to_nz(tensor, ctx->device);
1345
+ }
1346
+
1347
+ // Unlock before removing tracker, as remove_tracker destroys the mutex
1348
+ lock.unlock();
1349
+ ctx->remove_tracker(tensor);
1350
+ }
1351
+ }
1352
+
1353
+ /**
1354
+ * @brief Get tensor data from a CANN buffer.
1355
+ *
1356
+ * This function retrieves tensor data from a CANN buffer, handling
1357
+ * transformations if needed based on the tensor's type.
1358
+ *
1359
+ * @param buffer The CANN buffer from which to retrieve tensor data.
1360
+ * @param tensor Pointer to the tensor whose data will be retrieved.
1361
+ * @param data Pointer to the destination buffer where the tensor data will be
1362
+ * copied.
1363
+ * @param offset Offset in the destination buffer where to start copying.
1364
+ * @param size Size of the data to be copied, in bytes.
1365
+ */
1366
+ static void ggml_backend_cann_buffer_get_tensor(ggml_backend_buffer_t buffer,
1367
+ const ggml_tensor * tensor,
1368
+ void * data,
1369
+ size_t offset,
1370
+ size_t size) {
1371
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
1372
+
1373
+ ggml_cann_set_device(ctx->device);
1374
+
1375
+ if (!need_transform(tensor->type)) {
1376
+ ACL_CHECK(aclrtMemcpy(data, size, (char *) tensor->data + offset, size, ACL_MEMCPY_DEVICE_TO_HOST));
1377
+ } else {
1378
+ void * transform_buffer = malloc(size);
1379
+ ACL_CHECK(aclrtMemcpy(transform_buffer, size, (char *) tensor->data + offset, size, ACL_MEMCPY_DEVICE_TO_HOST));
1380
+ ggml_backend_cann_transform_back(tensor, transform_buffer, data);
1381
+ free(transform_buffer);
1382
+ }
1383
+ }
1384
+
1385
+ /**
1386
+ * @brief Copy tensor data between CANN buffers if possible.
1387
+ *
1388
+ * This function copies tensor data between CANN buffers if the source and
1389
+ * destination buffers are CANN buffers and they meet the necessary conditions
1390
+ * (same device or devices can access each other).
1391
+ *
1392
+ * @param buffer The destination CANN buffer where the tensor data will be
1393
+ * copied.
1394
+ * @param src Pointer to the source tensor whose data will be copied.
1395
+ * @param dst Pointer to the destination tensor where the data will be copied.
1396
+ * @return true if the copy operation succeeded, false otherwise.
1397
+ */
1398
+ static bool ggml_backend_cann_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
1399
+ const ggml_tensor * src,
1400
+ ggml_tensor * dst) {
1401
+ if (ggml_backend_buft_is_cann(src->buffer->buft)) {
1402
+ ggml_backend_cann_buffer_context * src_ctx = (ggml_backend_cann_buffer_context *) src->buffer->context;
1403
+ ggml_backend_cann_buffer_context * dst_ctx = (ggml_backend_cann_buffer_context *) buffer->context;
1404
+
1405
+ size_t memcpy_size = ggml_nbytes(src);
1406
+ // Same device.
1407
+ if (src_ctx->device == dst_ctx->device) {
1408
+ ACL_CHECK(aclrtMemcpy((char *) dst->data, memcpy_size, (const char *) src->data, memcpy_size,
1409
+ ACL_MEMCPY_DEVICE_TO_DEVICE));
1410
+ return true;
1411
+ } else {
1412
+ #ifdef ASCEND_310P
1413
+ // TODO: Support 310p P2P copy
1414
+ return false;
1415
+ #endif
1416
+ // Different device but can access by peer.
1417
+ int32_t canAccessPeer = 0;
1418
+ ACL_CHECK(aclrtDeviceCanAccessPeer(&canAccessPeer, src_ctx->device, dst_ctx->device));
1419
+ if (canAccessPeer) {
1420
+ ggml_cann_set_device(src_ctx->device);
1421
+ ACL_CHECK(aclrtDeviceEnablePeerAccess(dst_ctx->device, 0));
1422
+ ACL_CHECK(aclrtMemcpy((char *) dst->data, memcpy_size, (const char *) src->data, memcpy_size,
1423
+ ACL_MEMCPY_DEVICE_TO_DEVICE));
1424
+ return true;
1425
+ }
1426
+ }
1427
+ }
1428
+ return false;
1429
+ }
1430
+
1431
+ /**
1432
+ * @brief Set a region of a tensor's device memory to a specified value.
1433
+ *
1434
+ * @param buffer The CANN buffer containing the tensor.
1435
+ * @param tensor Pointer to the tensor whose memory will be set.
1436
+ * @param value The value to which each byte in the region will be set.
1437
+ * @param offset Byte offset within the tensor's data to start setting.
1438
+ * @param size Number of bytes to set.
1439
+ */
1440
+ static void ggml_backend_cann_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
1441
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
1442
+
1443
+ ggml_cann_set_device(ctx->device);
1444
+ ACL_CHECK(aclrtMemset((char *) tensor->data + offset, size, value, size));
1445
+ }
1446
+
1447
+ /**
1448
+ * @brief Clear a CANN buffer by setting all its memory to a specified value.
1449
+ *
1450
+ * This function clears a CANN buffer by setting all its memory to a specified
1451
+ * value.
1452
+ *
1453
+ * @param buffer The CANN buffer to be cleared.
1454
+ * @param value The value to which each byte in the buffer will be set.
1455
+ */
1456
+ static void ggml_backend_cann_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
1457
+ ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
1458
+
1459
+ ggml_cann_set_device(ctx->device);
1460
+ ACL_CHECK(aclrtMemset(ctx->dev_ptr, buffer->size, value, buffer->size));
1461
+ }
1462
+
1463
+ /**
1464
+ * @brief Interface for a CANN buffer in the backend.
1465
+ *
1466
+ * This structure defines function pointers to operations that can be performed
1467
+ * on a CANN buffer within the backend.
1468
+ */
1469
+ static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
1470
+ /* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
1471
+ /* .get_base = */ ggml_backend_cann_buffer_get_base,
1472
+ /* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
1473
+ /* .memset_tensor = */ ggml_backend_cann_buffer_memset_tensor,
1474
+ /* .set_tensor = */ ggml_backend_cann_buffer_set_tensor,
1475
+ /* .get_tensor = */ ggml_backend_cann_buffer_get_tensor,
1476
+ /* .set_tensor_2d = */ NULL,
1477
+ /* .get_tensor_2d = */ NULL,
1478
+ /* .cpy_tensor = */ ggml_backend_cann_buffer_cpy_tensor,
1479
+ /* .clear = */ ggml_backend_cann_buffer_clear,
1480
+ /* .reset = */ NULL,
1481
+ };
1482
+
1483
+ /**
1484
+ * @brief Allocates a new CANN buffer of the specified type and size.
1485
+ *
1486
+ * This function allocates a new CANN buffer on the specified device with the
1487
+ * given size.
1488
+ *
1489
+ * @param buft Pointer to the buffer type context.
1490
+ * @param size Size in bytes of the buffer to allocate.
1491
+ * @return Pointer to the allocated buffer, or nullptr if allocation fails.
1492
+ */
1493
+ static ggml_backend_buffer_t ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
1494
+ ggml_backend_cann_buffer_type_context * buft_ctx = (ggml_backend_cann_buffer_type_context *) buft->context;
1495
+
1496
+ ggml_cann_set_device(buft_ctx->device);
1497
+
1498
+ const size_t alignment = 128;
1499
+ size = GGML_PAD(size, alignment);
1500
+ if (size == 0) {
1501
+ size = alignment;
1502
+ }
1503
+ void * dev_ptr;
1504
+ aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
1505
+ if (err != ACL_SUCCESS) {
1506
+ GGML_LOG_ERROR("%s: allocating %.2f MiB on device %d: aclrtMalloc failed: %s\n", __func__,
1507
+ size / 1024.0 / 1024.0, buft_ctx->device, aclGetRecentErrMsg());
1508
+ return nullptr;
1509
+ }
1510
+
1511
+ ggml_backend_cann_buffer_context * ctx = new ggml_backend_cann_buffer_context(buft_ctx->device, dev_ptr);
1512
+
1513
+ return ggml_backend_buffer_init(buft, ggml_backend_cann_buffer_interface, ctx, size);
1514
+ }
1515
+
1516
+ /**
1517
+ * @brief Retrieves the memory alignment requirement for CANN buffers of this
1518
+ * type.
1519
+ *
1520
+ * This function returns the alignment requirement in bytes for memory allocated
1521
+ * by the CANN buffer type.
1522
+ *
1523
+ * @param buft Pointer to the buffer type context (unused in this
1524
+ * implementation).
1525
+ * @return The alignment requirement in bytes (fixed at 128 bytes for CANN
1526
+ * buffers).
1527
+ */
1528
+ static size_t ggml_backend_cann_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
1529
+ return 128;
1530
+
1531
+ GGML_UNUSED(buft);
1532
+ }
1533
+
1534
+ /**
1535
+ * @brief Calculates the allocation size required for a tensor in a CANN buffer.
1536
+ *
1537
+ * Computes the total allocation size needed for storing the tensor's data in a
1538
+ * CANN buffer, considering any necessary padding or adjustments for quantized
1539
+ * types.
1540
+ *
1541
+ * @param buft Pointer to the buffer type context (unused in this
1542
+ * implementation).
1543
+ * @param tensor Pointer to the tensor for which the allocation size is
1544
+ * calculated.
1545
+ * @return The total allocation size in bytes required for the tensor in the
1546
+ * CANN buffer.
1547
+ */
1548
+ static size_t ggml_backend_cann_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
1549
+ const ggml_tensor * tensor) {
1550
+ size_t size = ggml_nbytes(tensor);
1551
+ int64_t ne0 = tensor->ne[0];
1552
+
1553
+ // Only check env once.
1554
+ static bool weight_to_nz = parse_bool(get_env_as_lowercase("GGML_CANN_WEIGHT_NZ").value_or("on"));
1555
+
1556
+ // last line must bigger than 32, because every single op deal at
1557
+ // least 32 bytes.
1558
+ // TODO: quantized type?
1559
+ // int64_t line_size = ne0 * ggml_element_size(tensor);
1560
+ // int64_t line_size_align_32 = (line_size + 31) & ~31;
1561
+ // size += (line_size_align_32 - line_size);
1562
+ if (ggml_is_quantized(tensor->type)) {
1563
+ if (ne0 % MATRIX_ROW_PADDING != 0) {
1564
+ size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
1565
+ }
1566
+ } else if (weight_to_nz && tensor->type != GGML_TYPE_BF16
1567
+ && is_matmul_weight((const ggml_tensor *) tensor)) {
1568
+ // NZ format weight are not support quantized yet.
1569
+ // If ND tensor transform to NZ, size may changed.
1570
+ int64_t shape[] = { tensor->ne[1], tensor->ne[0] };
1571
+ GGML_ASSERT(tensor->ne[2] == 1);
1572
+ GGML_ASSERT(tensor->ne[3] == 1);
1573
+ const aclIntArray * acl_shape = aclCreateIntArray(shape, 2);
1574
+ size_t new_size;
1575
+ ACL_CHECK(aclnnCalculateMatmulWeightSizeV2(acl_shape, ggml_cann_type_mapping(tensor->type), &new_size));
1576
+ ACL_CHECK(aclDestroyIntArray(acl_shape));
1577
+ size = std::max(size, new_size);
1578
+ }
1579
+
1580
+ return size;
1581
+
1582
+ GGML_UNUSED(buft);
1583
+ }
1584
+
1585
+ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
1586
+ return false;
1587
+
1588
+ GGML_UNUSED(buft);
1589
+ }
1590
+
1591
+ /**
1592
+ * @brief Interface for managing CANN buffer types in the GGML backend.
1593
+ *
1594
+ * Provides function pointers for allocating, querying properties, and managing
1595
+ * memory for CANN buffer types in the GGML backend.
1596
+ */
1597
+ static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
1598
+ /* .get_name = */ ggml_backend_cann_buffer_type_name,
1599
+ /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
1600
+ /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
1601
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1602
+ /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
1603
+ /* .is_host = */ ggml_backend_cann_buffer_type_is_host,
1604
+ };
1605
+
1606
+ /**
1607
+ * @brief Retrieves the CANN buffer type for a specified device.
1608
+ *
1609
+ * This function initializes and returns the buffer type interface associated
1610
+ * with the given device. It ensures thread-safe access using a mutex.
1611
+ *
1612
+ * @param device The device index for which to retrieve the buffer type.
1613
+ * @return A pointer to the buffer type interface for the specified device, or
1614
+ * nullptr if the device index is out of range.
1615
+ */
1616
+ ggml_backend_buffer_type_t ggml_backend_cann_buffer_type(int32_t device) {
1617
+ static std::mutex mutex;
1618
+ std::lock_guard<std::mutex> lock(mutex);
1619
+
1620
+ if (device >= ggml_backend_cann_get_device_count()) {
1621
+ return nullptr;
1622
+ }
1623
+
1624
+ static ggml_backend_buffer_type ggml_backend_cann_buffer_types[GGML_CANN_MAX_DEVICES];
1625
+
1626
+ static bool ggml_backend_cann_buffer_type_initialized = false;
1627
+
1628
+ if (!ggml_backend_cann_buffer_type_initialized) {
1629
+ for (int32_t i = 0; i < ggml_cann_info().device_count; i++) {
1630
+ ggml_backend_cann_buffer_types[i] = {
1631
+ /* .iface = */ ggml_backend_cann_buffer_type_interface,
1632
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), i),
1633
+ /* .context = */
1634
+ new ggml_backend_cann_buffer_type_context{ i, "CANN" + std::to_string(i) },
1635
+ };
1636
+ }
1637
+ ggml_backend_cann_buffer_type_initialized = true;
1638
+ }
1639
+
1640
+ return &ggml_backend_cann_buffer_types[device];
1641
+ }
1642
+
1643
+ /**
1644
+ * @brief Retrieves the name associated with a CANN host buffer type.
1645
+ *
1646
+ * This function returns the descriptive name associated with the specified
1647
+ * CANN host buffer type context.
1648
+ *
1649
+ * @param buft Pointer to the host buffer type context.
1650
+ * @return Const pointer to the C-style string containing the name.
1651
+ */
1652
+ static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
1653
+ return "CANN_Host";
1654
+
1655
+ GGML_UNUSED(buft);
1656
+ }
1657
+
1658
+ /**
1659
+ * @brief Retrieves the name associated with a CANN host buffer.
1660
+ *
1661
+ * This function returns the descriptive name associated with the specified
1662
+ * CANN host buffer context.
1663
+ *
1664
+ * @param buft Pointer to the host buffer context.
1665
+ * @return Const pointer to the C-style string containing the name.
1666
+ */
1667
+ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
1668
+ return "CANN_Host";
1669
+
1670
+ GGML_UNUSED(buffer);
1671
+ }
1672
+
1673
+ /**
1674
+ * @brief Free resources associated with a CANN host buffer.
1675
+ *
1676
+ * This function frees the resources associated with a CANN host buffer, including
1677
+ * its context.
1678
+ *
1679
+ * @param buffer The CANN host buffer to free.
1680
+ */
1681
+ static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
1682
+ ACL_CHECK(aclrtFreeHost(buffer->context));
1683
+ }
1684
+
1685
+ /**
1686
+ * @brief Allocates a new CANN host buffer of the specified size.
1687
+ *
1688
+ * This function allocates a new CANN host buffer with the given size.
1689
+ * @param size Size in bytes of the host buffer to allocate.
1690
+ * @return Pointer to the allocated host buffer, or nullptr if allocation fails.
1691
+ */
1692
+ static void * ggml_cann_host_malloc(size_t size) {
1693
+ if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
1694
+ return nullptr;
1695
+ }
1696
+
1697
+ const size_t alignment = 128;
1698
+ size = GGML_PAD(size, alignment);
1699
+ if (size == 0) {
1700
+ size = alignment;
1701
+ }
1702
+
1703
+ void * hostPtr = nullptr;
1704
+ aclError err = aclrtMallocHost((void **) &hostPtr, size);
1705
+ if (err != ACL_SUCCESS) {
1706
+ GGML_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__, size / 1024.0 / 1024.0,
1707
+ aclGetRecentErrMsg());
1708
+ return nullptr;
1709
+ }
1710
+ return hostPtr;
1711
+ }
1712
+
1713
+ /**
1714
+ * @brief Allocates a new CANN host buffer of the specified type and size.
1715
+ *
1716
+ * @param buft Pointer to the host buffer type context.
1717
+ * @param size Size in bytes of the host buffer to allocate.
1718
+ * @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
1719
+ */
1720
+ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
1721
+ size_t size) {
1722
+ void * hostPtr = ggml_cann_host_malloc(size);
1723
+
1724
+ if (hostPtr == nullptr) {
1725
+ // fallback to cpu buffer
1726
+ return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
1727
+ }
1728
+
1729
+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
1730
+ buffer->buft = buft;
1731
+ buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
1732
+
1733
+ return buffer;
1734
+ }
1735
+
1736
+ /**
1737
+ * @brief Interface for managing CANN host buffer types in the GGML backend.
1738
+ *
1739
+ * Provides function pointers for allocating, querying properties, and managing
1740
+ * memory for CANN buffer types in the GGML backend.
1741
+ */
1742
+ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1743
+ static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
1744
+ /* .iface = */ {
1745
+ /* .get_name = */ ggml_backend_cann_host_buffer_type_name,
1746
+ /* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
1747
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
1748
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1749
+ /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1750
+ /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1751
+ },
1752
+ /* .device = */
1753
+ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
1754
+ /* .context = */ nullptr,
1755
+ };
1756
+
1757
+ return &ggml_backend_cann_buffer_type_host;
1758
+ }
1759
+
1760
+ /**
1761
+ * @brief Computes the forward operation for a given tensor using CANN
1762
+ * operations.
1763
+ *
1764
+ * This function selects the appropriate CANN operation based on the type of
1765
+ * operation specified in the tensor and performs the computation.
1766
+ *
1767
+ * @param ctx The CANN context containing necessary resources and
1768
+ * configurations.
1769
+ * @param dst The destination tensor where the result of the computation will be
1770
+ * stored.
1771
+ * @return true if the computation was successful; false otherwise.
1772
+ */
1773
+ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct ggml_tensor * dst) {
1774
+ switch (dst->op) {
1775
+ case GGML_OP_REPEAT:
1776
+ ggml_cann_repeat(ctx, dst);
1777
+ break;
1778
+ case GGML_OP_GET_ROWS:
1779
+ ggml_cann_get_rows(ctx, dst);
1780
+ break;
1781
+ case GGML_OP_SET_ROWS:
1782
+ ggml_cann_set_rows(ctx, dst);
1783
+ break;
1784
+ case GGML_OP_DUP:
1785
+ ggml_cann_dup(ctx, dst);
1786
+ break;
1787
+ case GGML_OP_ADD:
1788
+ case GGML_OP_ADD1:
1789
+ ggml_cann_binary_op<aclnn_add>(ctx, dst);
1790
+ break;
1791
+ case GGML_OP_SUB:
1792
+ ggml_cann_binary_op<aclnn_sub>(ctx, dst);
1793
+ break;
1794
+ case GGML_OP_ACC:
1795
+ ggml_cann_acc(ctx, dst);
1796
+ break;
1797
+ case GGML_OP_MUL:
1798
+ ggml_cann_binary_op<aclnn_mul>(ctx, dst);
1799
+ break;
1800
+ case GGML_OP_DIV:
1801
+ ggml_cann_binary_op<aclnn_div>(ctx, dst);
1802
+ break;
1803
+ case GGML_OP_UNARY:
1804
+ switch (ggml_get_unary_op(dst)) {
1805
+ case GGML_UNARY_OP_ABS:
1806
+ GGML_CANN_CALL_OP_UNARY(Abs);
1807
+ break;
1808
+ case GGML_UNARY_OP_NEG:
1809
+ GGML_CANN_CALL_OP_UNARY(Neg);
1810
+ break;
1811
+ case GGML_UNARY_OP_GELU:
1812
+ case GGML_UNARY_OP_GELU_ERF:
1813
+ // aclnnGelu internally uses the erf-based approximation.
1814
+ GGML_CANN_CALL_OP_UNARY(Gelu);
1815
+ break;
1816
+ case GGML_UNARY_OP_SILU:
1817
+ GGML_CANN_CALL_OP_UNARY(Silu);
1818
+ break;
1819
+ case GGML_UNARY_OP_GELU_QUICK:
1820
+ {
1821
+ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) {
1822
+ GGML_CANN_CALL_ACLNN_OP(ctx, GeluV2, acl_src, 0, acl_dst);
1823
+ };
1824
+ ggml_cann_op_unary(lambda, ctx, dst);
1825
+ }
1826
+ break;
1827
+ case GGML_UNARY_OP_TANH:
1828
+ GGML_CANN_CALL_OP_UNARY(Tanh);
1829
+ break;
1830
+ case GGML_UNARY_OP_RELU:
1831
+ GGML_CANN_CALL_OP_UNARY(Relu);
1832
+ break;
1833
+ case GGML_UNARY_OP_SIGMOID:
1834
+ GGML_CANN_CALL_OP_UNARY(Sigmoid);
1835
+ break;
1836
+ case GGML_UNARY_OP_HARDSIGMOID:
1837
+ GGML_CANN_CALL_OP_UNARY(Hardsigmoid);
1838
+ break;
1839
+ case GGML_UNARY_OP_HARDSWISH:
1840
+ GGML_CANN_CALL_OP_UNARY(Hardswish);
1841
+ break;
1842
+ case GGML_UNARY_OP_EXP:
1843
+ GGML_CANN_CALL_OP_UNARY(Exp);
1844
+ break;
1845
+ case GGML_UNARY_OP_ELU:
1846
+ ggml_cann_elu(ctx, dst);
1847
+ break;
1848
+ case GGML_UNARY_OP_SGN:
1849
+ GGML_CANN_CALL_OP_UNARY(Sign);
1850
+ break;
1851
+ case GGML_UNARY_OP_STEP:
1852
+ ggml_cann_step(ctx, dst);
1853
+ break;
1854
+ case GGML_UNARY_OP_SOFTPLUS:
1855
+ ggml_cann_softplus(ctx, dst);
1856
+ break;
1857
+ default:
1858
+ return false;
1859
+ }
1860
+ break;
1861
+ case GGML_OP_GLU:
1862
+ switch (ggml_get_glu_op(dst)) {
1863
+ case GGML_GLU_OP_REGLU:
1864
+ GGML_CANN_CALL_OP_UNARY_GATED(Relu);
1865
+ break;
1866
+ case GGML_GLU_OP_GEGLU:
1867
+ ggml_cann_geglu(ctx, dst, 0); // approximate=0 → tanh
1868
+ break;
1869
+ case GGML_GLU_OP_GEGLU_ERF:
1870
+ ggml_cann_geglu(ctx, dst, 1); // approximate=1 → erf
1871
+ break;
1872
+ case GGML_GLU_OP_SWIGLU:
1873
+ ggml_cann_swiglu(ctx, dst);
1874
+ break;
1875
+ case GGML_GLU_OP_GEGLU_QUICK:
1876
+ ggml_cann_geglu_quick(ctx, dst);
1877
+ break;
1878
+ default:
1879
+ return false;
1880
+ }
1881
+ break;
1882
+ case GGML_OP_NORM:
1883
+ ggml_cann_norm(ctx, dst);
1884
+ break;
1885
+ case GGML_OP_GROUP_NORM:
1886
+ ggml_cann_group_norm(ctx, dst);
1887
+ break;
1888
+ case GGML_OP_L2_NORM:
1889
+ ggml_cann_l2_norm(ctx, dst);
1890
+ break;
1891
+ case GGML_OP_CROSS_ENTROPY_LOSS:
1892
+ ggml_cann_cross_entropy_loss(ctx, dst);
1893
+ break;
1894
+ case GGML_OP_CONCAT:
1895
+ ggml_cann_concat(ctx, dst);
1896
+ break;
1897
+ case GGML_OP_UPSCALE:
1898
+ ggml_cann_upsample_nearest2d(ctx, dst);
1899
+ break;
1900
+ case GGML_OP_PAD:
1901
+ ggml_cann_pad(ctx, dst);
1902
+ break;
1903
+ case GGML_OP_ARANGE:
1904
+ ggml_cann_arange(ctx, dst);
1905
+ break;
1906
+ case GGML_OP_TIMESTEP_EMBEDDING:
1907
+ ggml_cann_timestep_embedding(ctx, dst);
1908
+ break;
1909
+ case GGML_OP_LEAKY_RELU:
1910
+ ggml_cann_leaky_relu(ctx, dst);
1911
+ break;
1912
+ case GGML_OP_RMS_NORM:
1913
+ ggml_cann_rms_norm(ctx, dst);
1914
+ break;
1915
+ case GGML_OP_MUL_MAT:
1916
+ ggml_cann_mul_mat(ctx, dst);
1917
+ break;
1918
+ case GGML_OP_MUL_MAT_ID:
1919
+ ggml_cann_mul_mat_id(ctx, dst);
1920
+ break;
1921
+ case GGML_OP_SCALE:
1922
+ ggml_cann_scale(ctx, dst);
1923
+ break;
1924
+ case GGML_OP_SQR:
1925
+ GGML_ASSERT(dst->src[1] == nullptr);
1926
+ dst->src[1] = dst->src[0];
1927
+ ggml_cann_binary_op<aclnn_mul>(ctx, dst);
1928
+ break;
1929
+ case GGML_OP_SQRT:
1930
+ GGML_CANN_CALL_OP_UNARY(Sqrt);
1931
+ break;
1932
+ case GGML_OP_CLAMP:
1933
+ ggml_cann_clamp(ctx, dst);
1934
+ break;
1935
+ case GGML_OP_CPY:
1936
+ ggml_cann_cpy(ctx, dst);
1937
+ break;
1938
+ case GGML_OP_SET:
1939
+ ggml_cann_set(ctx, dst);
1940
+ break;
1941
+ case GGML_OP_CONT:
1942
+ ggml_cann_dup(ctx, dst);
1943
+ break;
1944
+ case GGML_OP_NONE:
1945
+ case GGML_OP_RESHAPE:
1946
+ case GGML_OP_VIEW:
1947
+ case GGML_OP_PERMUTE:
1948
+ case GGML_OP_TRANSPOSE:
1949
+ break;
1950
+ case GGML_OP_DIAG_MASK_INF:
1951
+ ggml_cann_diag_mask(ctx, dst, -INFINITY);
1952
+ break;
1953
+ case GGML_OP_SOFT_MAX:
1954
+ ggml_cann_softmax(ctx, dst);
1955
+ break;
1956
+ case GGML_OP_ROPE:
1957
+ ggml_cann_rope(ctx, dst);
1958
+ break;
1959
+ case GGML_OP_IM2COL:
1960
+ ggml_cann_im2col(ctx, dst);
1961
+ break;
1962
+ case GGML_OP_POOL_2D:
1963
+ ggml_cann_pool2d(ctx, dst);
1964
+ break;
1965
+ case GGML_OP_SUM:
1966
+ ggml_cann_sum(ctx, dst);
1967
+ break;
1968
+ case GGML_OP_SUM_ROWS:
1969
+ ggml_cann_sum_rows(ctx, dst);
1970
+ break;
1971
+ case GGML_OP_ARGSORT:
1972
+ ggml_cann_argsort(ctx, dst);
1973
+ break;
1974
+ case GGML_OP_ARGMAX:
1975
+ ggml_cann_argmax(ctx, dst);
1976
+ break;
1977
+ case GGML_OP_COS:
1978
+ ggml_cann_op_unary<aclnn_cos>(ctx, dst);
1979
+ break;
1980
+ case GGML_OP_SIN:
1981
+ ggml_cann_op_unary<aclnn_sin>(ctx, dst);
1982
+ break;
1983
+ case GGML_OP_CONV_TRANSPOSE_1D:
1984
+ ggml_cann_conv_transpose_1d(ctx, dst);
1985
+ break;
1986
+ case GGML_OP_LOG:
1987
+ GGML_CANN_CALL_OP_UNARY(Log);
1988
+ break;
1989
+ case GGML_OP_MEAN:
1990
+ ggml_cann_mean(ctx, dst);
1991
+ break;
1992
+ case GGML_OP_PAD_REFLECT_1D:
1993
+ ggml_cann_pad_reflect_1d(ctx, dst);
1994
+ break;
1995
+ case GGML_OP_COUNT_EQUAL:
1996
+ ggml_cann_count_equal(ctx, dst);
1997
+ break;
1998
+ case GGML_OP_FLASH_ATTN_EXT:
1999
+ ggml_cann_flash_attn_ext(ctx, dst);
2000
+ break;
2001
+ case GGML_OP_OUT_PROD:
2002
+ ggml_cann_out_prod(ctx, dst);
2003
+ break;
2004
+ case GGML_OP_GATED_LINEAR_ATTN:
2005
+ ggml_cann_gated_linear_attn(ctx, dst);
2006
+ break;
2007
+ case GGML_OP_SSM_CONV:
2008
+ ggml_cann_ssm_conv(ctx, dst);
2009
+ break;
2010
+ case GGML_OP_CUMSUM:
2011
+ ggml_cann_cumsum(ctx, dst);
2012
+ break;
2013
+ case GGML_OP_TRI:
2014
+ ggml_cann_tri(ctx, dst);
2015
+ break;
2016
+ case GGML_OP_FILL:
2017
+ ggml_cann_fill(ctx, dst);
2018
+ break;
2019
+ case GGML_OP_DIAG:
2020
+ ggml_cann_diag(ctx, dst);
2021
+ break;
2022
+ case GGML_OP_SOLVE_TRI:
2023
+ ggml_cann_solve_tri(ctx, dst);
2024
+ break;
2025
+ default:
2026
+ return false;
2027
+ }
2028
+
2029
+ return true;
2030
+ }
2031
+
2032
+ // backend
2033
+ /**
2034
+ * @brief Retrieves the name associated with the CANN backend.
2035
+ *
2036
+ * This function returns the name assigned to the CANN backend, which is stored
2037
+ * in the context of the provided backend structure.
2038
+ *
2039
+ * @param backend Pointer to the CANN backend structure.
2040
+ * @return A pointer to a constant string representing the backend name.
2041
+ */
2042
+ static const char * ggml_backend_cann_name(ggml_backend_t backend) {
2043
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2044
+
2045
+ return cann_ctx->name.c_str();
2046
+ }
2047
+
2048
+ /**
2049
+ * @brief Frees resources associated with the CANN backend.
2050
+ *
2051
+ * This function releases resources associated with the CANN backend context
2052
+ * and resets the device associated with the backend to its initial state.
2053
+ *
2054
+ * @param backend Pointer to the CANN backend structure to be freed.
2055
+ */
2056
+ static void ggml_backend_cann_free(ggml_backend_t backend) {
2057
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2058
+ ACL_CHECK(aclrtSynchronizeDevice());
2059
+ ACL_CHECK(aclrtResetDevice(cann_ctx->device));
2060
+
2061
+ delete cann_ctx;
2062
+ delete backend;
2063
+ }
2064
+
2065
+ /**
2066
+ * @brief Sets tensor data asynchronously in the CANN backend.
2067
+ *
2068
+ * This function asynchronously sets tensor data in the CANN backend.
2069
+ *
2070
+ * @param backend Pointer to the CANN backend structure.
2071
+ * @param tensor Pointer to the tensor structure to set data for.
2072
+ * @param data Pointer to the host data to copy to the tensor.
2073
+ * @param offset Offset in bytes within the host data.
2074
+ * @param size Size of the data to copy in bytes.
2075
+ */
2076
+ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
2077
+ ggml_tensor * tensor,
2078
+ const void * data,
2079
+ size_t offset,
2080
+ size_t size) {
2081
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2082
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
2083
+
2084
+ GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) && "unsupported buffer type");
2085
+ GGML_ASSERT(!ggml_is_quantized(tensor->type));
2086
+
2087
+ ACL_CHECK(aclrtMemcpyAsync((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE,
2088
+ cann_ctx->stream()));
2089
+ }
2090
+
2091
+ /**
2092
+ * @brief Gets tensor data asynchronously in the CANN backend.
2093
+ *
2094
+ * This function asynchronously gets tensor data in the CANN backend.
2095
+ *
2096
+ * @param backend Pointer to the CANN backend structure.
2097
+ * @param tensor Pointer to the tensor structure to get data from.
2098
+ * @param data Pointer to the host data to copy from the tensor.
2099
+ * @param offset Offset in bytes within the host data.
2100
+ * @param size Size of the data to copy in bytes.
2101
+ */
2102
+ static void ggml_backend_cann_get_tensor_async(ggml_backend_t backend,
2103
+ const ggml_tensor * tensor,
2104
+ void * data,
2105
+ size_t offset,
2106
+ size_t size) {
2107
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2108
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
2109
+
2110
+ GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) && "unsupported buffer type");
2111
+ GGML_ASSERT(!ggml_is_quantized(tensor->type));
2112
+
2113
+ ACL_CHECK(aclrtMemcpyAsync(data, size, (char *) tensor->data + offset, size, ACL_MEMCPY_DEVICE_TO_HOST,
2114
+ cann_ctx->stream()));
2115
+ }
2116
+
2117
+ /**
2118
+ * @brief Asynchronously copies tensor data between CANN backends.
2119
+ *
2120
+ * This function copies tensor data asynchronously between two CANN backends. It
2121
+ * checks if both tensors reside in CANN buffers and whether the devices support
2122
+ * peer-to-peer access for direct copying. If not, it returns false.
2123
+ *
2124
+ * @param backend_src Pointer to the source CANN backend structure.
2125
+ * @param backend_dst Pointer to the destination CANN backend structure.
2126
+ * @param src Pointer to the source tensor to copy data from.
2127
+ * @param dst Pointer to the destination tensor to copy data to.
2128
+ * @return true if the copy operation succeeds, false otherwise.
2129
+ */
2130
+ static bool ggml_backend_cann_cpy_tensor_async(ggml_backend_t backend_src,
2131
+ ggml_backend_t backend_dst,
2132
+ const ggml_tensor * src,
2133
+ ggml_tensor * dst) {
2134
+ GGML_ASSERT(ggml_backend_is_cann(backend_src) || ggml_backend_is_cann(backend_dst));
2135
+
2136
+ GGML_ASSERT(!is_matmul_weight((const ggml_tensor *) src));
2137
+
2138
+ if (!ggml_backend_buft_is_cann(src->buffer->buft) || !ggml_backend_buft_is_cann(dst->buffer->buft)) {
2139
+ return false;
2140
+ }
2141
+
2142
+ ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
2143
+ ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
2144
+
2145
+ ggml_backend_cann_context * cann_ctx_src = (ggml_backend_cann_context *) backend_src->context;
2146
+ ggml_backend_cann_context * cann_ctx_dst = (ggml_backend_cann_context *) backend_dst->context;
2147
+
2148
+ size_t copy_size = ggml_nbytes(dst);
2149
+ if (copy_size == 0) {
2150
+ return true;
2151
+ }
2152
+ if (backend_src != backend_dst) {
2153
+ #ifdef ASCEND_310P
2154
+ // TODO: Support 310p P2P copy
2155
+ return false;
2156
+ #endif
2157
+ ggml_backend_cann_buffer_context * buf_ctx_src = (ggml_backend_cann_buffer_context *) buf_src->context;
2158
+ ggml_backend_cann_buffer_context * buf_ctx_dst = (ggml_backend_cann_buffer_context *) buf_dst->context;
2159
+
2160
+ GGML_ASSERT(cann_ctx_src->device == buf_ctx_src->device);
2161
+ GGML_ASSERT(cann_ctx_dst->device == buf_ctx_dst->device);
2162
+
2163
+ int32_t canAccessPeer = 0;
2164
+ ACL_CHECK(aclrtDeviceCanAccessPeer(&canAccessPeer, cann_ctx_src->device, cann_ctx_dst->device));
2165
+ if (!canAccessPeer) {
2166
+ return false;
2167
+ }
2168
+
2169
+ // need open both directions for memcpyasync between devices.
2170
+ ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_src->device, 0));
2171
+ ggml_cann_set_device(cann_ctx_src->device);
2172
+ ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_dst->device, 0));
2173
+
2174
+ // wait for task_queue empty to keep task order.
2175
+ ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size, ACL_MEMCPY_DEVICE_TO_DEVICE,
2176
+ cann_ctx_src->stream()));
2177
+ // record event on src stream after the copy
2178
+ // TODO: this event is not effective with acl graph mode, change to use aclrtSynchronizeStream
2179
+ // if (!cann_ctx_src->copy_event) {
2180
+ // ACL_CHECK(aclrtCreateEventWithFlag(&cann_ctx_src->copy_event, ACL_EVENT_SYNC));
2181
+ // }
2182
+ // ACL_CHECK(aclrtRecordEvent(cann_ctx_src->copy_event, cann_ctx_src->stream()));
2183
+
2184
+ // // wait on dst stream for the copy to complete
2185
+ // ggml_cann_set_device(cann_ctx_dst->device);
2186
+ // ACL_CHECK(aclrtStreamWaitEvent(cann_ctx_dst->stream(), cann_ctx_src->copy_event));
2187
+ ACL_CHECK(aclrtSynchronizeStream(cann_ctx_src->stream()));
2188
+ } else {
2189
+ // src and dst are on the same backend
2190
+ ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size, ACL_MEMCPY_DEVICE_TO_DEVICE,
2191
+ cann_ctx_dst->stream()));
2192
+ }
2193
+
2194
+ return true;
2195
+ }
2196
+
2197
+ /**
2198
+ * @brief Synchronizes a CANN backend.
2199
+ *
2200
+ * This function synchronizes the specified CANN backend by waiting for all
2201
+ * operations in its associated stream to complete.
2202
+ *
2203
+ * @param backend Pointer to the CANN backend structure to synchronize.
2204
+ */
2205
+ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
2206
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2207
+ ggml_cann_set_device(cann_ctx->device);
2208
+ ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
2209
+ }
2210
+
2211
+ /**
2212
+ * @brief Check if CANN backend can fuse the specified operation sequence
2213
+ *
2214
+ * This function determines whether an operation sequence starting from the specified node
2215
+ * can be fused into an optimized operation in the CANN backend. Operation fusion can reduce
2216
+ * memory access overhead and improve computational efficiency.
2217
+ *
2218
+ * @param cgraph Pointer to the computation graph
2219
+ * @param node_idx Index of the starting node in the computation graph
2220
+ * @param ops Sequence of operation types to check for fusion
2221
+ * @return true if the operations can be fused
2222
+ * @return false if the operations cannot be fused
2223
+ */
2224
+ static bool ggml_cann_can_fuse(const struct ggml_cgraph * cgraph,
2225
+ int node_idx,
2226
+ std::initializer_list<enum ggml_op> ops) {
2227
+ if (!ggml_can_fuse(cgraph, node_idx, ops)) {
2228
+ return false;
2229
+ }
2230
+
2231
+ // CANN backend supports fusing ADD + RMS_NORM operations
2232
+ if ((ops.size() == 2) && ops.begin()[0] == GGML_OP_ADD && ops.begin()[1] == GGML_OP_RMS_NORM) {
2233
+ ggml_tensor * add_node = cgraph->nodes[node_idx];
2234
+ // TODO: support broadcast for ADD + RMS_NORM
2235
+ if (add_node->src[0]->ne[0] != add_node->src[1]->ne[0] || add_node->src[0]->ne[1] != add_node->src[1]->ne[1] ||
2236
+ add_node->src[0]->ne[2] != add_node->src[1]->ne[2] || add_node->src[0]->ne[3] != add_node->src[1]->ne[3]) {
2237
+ return false;
2238
+ }
2239
+ return true;
2240
+ }
2241
+
2242
+ return false;
2243
+ }
2244
+
2245
+ /**
2246
+ * @brief Evaluate the computation graph and optionally capture or execute it using CANN graph API.
2247
+ *
2248
+ * If CANN graph execution is enabled and graph capture is required, this function begins
2249
+ * graph capture, runs the graph, ends capture, and stores the captured graph.
2250
+ *
2251
+ * Otherwise, it falls back to op-by-op execution using the CANN compute kernel dispatcher.
2252
+ *
2253
+ * @param cann_ctx The CANN backend context.
2254
+ * @param cgraph The ggml computation graph.
2255
+ * @param use_cann_graph Whether to use CANN graph execution.
2256
+ * @param cann_graph_capture_required Whether graph capture is needed due to graph changes.
2257
+ */
2258
+ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx,
2259
+ ggml_cgraph * cgraph,
2260
+ bool use_cann_graph,
2261
+ bool cann_graph_capture_required) {
2262
+ #ifdef USE_ACL_GRAPH
2263
+ if (use_cann_graph && cann_graph_capture_required) { // Begin CANN graph capture
2264
+ ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
2265
+ }
2266
+ #endif // USE_ACL_GRAPH
2267
+ // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
2268
+ // With the use of CANN graphs, the execution will be performed by the graph launch.
2269
+ static bool opt_fusion = parse_bool(get_env_as_lowercase("GGML_CANN_OPERATOR_FUSION").value_or(""));
2270
+
2271
+ if (!use_cann_graph || cann_graph_capture_required) {
2272
+ for (int i = 0; i < cgraph->n_nodes; i++) {
2273
+ ggml_tensor * node = cgraph->nodes[i];
2274
+ if (opt_fusion) {
2275
+ if (ggml_cann_can_fuse(cgraph, i, { GGML_OP_ADD, GGML_OP_RMS_NORM })) {
2276
+ ggml_cann_op_add_rms_norm_fused(*cann_ctx, node, cgraph->nodes[i + 1]);
2277
+ i++;
2278
+ continue;
2279
+ }
2280
+ }
2281
+
2282
+ if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE ||
2283
+ node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
2284
+ continue;
2285
+ }
2286
+
2287
+ if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
2288
+ continue;
2289
+ }
2290
+
2291
+ bool ok = ggml_cann_compute_forward(*cann_ctx, node);
2292
+ if (!ok) {
2293
+ GGML_LOG_ERROR("%s: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
2294
+ }
2295
+ GGML_ASSERT(ok);
2296
+ }
2297
+ }
2298
+
2299
+ #ifdef USE_ACL_GRAPH
2300
+ if (use_cann_graph) {
2301
+ GGML_ASSERT(!cann_ctx->graph_lru_cache.cache_list.empty());
2302
+ ggml_cann_graph * matched_graph = cann_ctx->graph_lru_cache.cache_list.front();
2303
+
2304
+ if (cann_graph_capture_required) { // End CANN graph capture
2305
+ ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &matched_graph->graph));
2306
+ }
2307
+
2308
+ // Execute CANN graph
2309
+ ACL_CHECK(aclmdlRIExecuteAsync(matched_graph->graph, cann_ctx->stream()));
2310
+ }
2311
+ #endif // USE_ACL_GRAPH
2312
+ }
2313
+
2314
+ /**
2315
+ * @brief Computes a computational graph using a CANN backend.
2316
+ *
2317
+ * This function computes the operations defined in the computational graph
2318
+ * using the specified CANN backend.
2319
+ *
2320
+ * @param backend Pointer to the CANN backend structure to use for computation.
2321
+ * @param cgraph Pointer to the computational graph structure containing nodes
2322
+ * representing operations to be computed.
2323
+ * @return enum ggml_status Returns GGML_STATUS_SUCCESS if computation
2324
+ * completes successfully, otherwise an appropriate error status.
2325
+ */
2326
+ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
2327
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2328
+ ggml_cann_set_device(cann_ctx->device);
2329
+ g_nz_workspaces[cann_ctx->device].clear();
2330
+
2331
+ // calculate rope cache for fist layer in current device.
2332
+ cann_ctx->rope_cache.cached = false;
2333
+
2334
+ bool graph_capture_required = false;
2335
+ #ifdef USE_ACL_GRAPH
2336
+ bool use_cann_graph = true;
2337
+
2338
+ static bool prefill_use_graph = parse_bool(get_env_as_lowercase("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
2339
+ if (!prefill_use_graph) {
2340
+ // Do not use acl_graph for prefill.
2341
+ for (int i = 0; i < cgraph->n_nodes; i++) {
2342
+ ggml_tensor * node = cgraph->nodes[i];
2343
+ // TODO: Optimize here. Currently, we can only
2344
+ // get seq_len by FA's input.
2345
+ if (node->op == GGML_OP_FLASH_ATTN_EXT) {
2346
+ // Q -> src[0], shape: [B, S, N, D]
2347
+ use_cann_graph = (node->src[0]->ne[1] == 1);
2348
+ break;
2349
+ }
2350
+ }
2351
+ }
2352
+
2353
+ if (!cann_ctx->acl_graph_mode) {
2354
+ use_cann_graph = false;
2355
+ }
2356
+
2357
+ if (use_cann_graph) {
2358
+ // If no matching graph is found, the graph needs to be recaptured.
2359
+ graph_capture_required = !cann_ctx->graph_lru_cache.find_and_move_to_front(cgraph);
2360
+
2361
+ if (graph_capture_required) {
2362
+ // If no matching graph is found, add a new ACL graph.
2363
+ ggml_cann_graph * new_graph = ggml_cann_graph::create_from_cgraph(cgraph);
2364
+ cann_ctx->graph_lru_cache.push(new_graph);
2365
+
2366
+ // Pre-load rope cache before graph capture. During capture the
2367
+ // stream cannot perform host-to-device memcpy or device memory
2368
+ // malloc/free. Running the full cache init now populates the
2369
+ // cache metadata so these branches are skipped during capture,
2370
+ // while also warming up the memory pool.
2371
+ for (int i = 0; i < cgraph->n_nodes; i++) {
2372
+ ggml_tensor * node = cgraph->nodes[i];
2373
+ if (node->op == GGML_OP_ROPE) {
2374
+ ggml_cann_rope_cache_preload(*cann_ctx, node);
2375
+ break;
2376
+ }
2377
+ }
2378
+ }
2379
+ }
2380
+ #else
2381
+ bool use_cann_graph = false;
2382
+ #endif // USE_ACL_GRAPH
2383
+ evaluate_and_capture_cann_graph(cann_ctx, cgraph, use_cann_graph, graph_capture_required);
2384
+
2385
+ return GGML_STATUS_SUCCESS;
2386
+ }
2387
+
2388
+ /**
2389
+ * @brief Checks if the CANN backend supports a specific operation.
2390
+ *
2391
+ * This function checks whether the specified operation is supported by the
2392
+ * CANN backend.
2393
+ *
2394
+ * @param backend Pointer to the CANN backend structure to check support for
2395
+ * the operation.
2396
+ * @param op Pointer to the tensor representing the operation to check.
2397
+ * @return bool Returns true if the operation is supported by the backend,
2398
+ * otherwise false.
2399
+ */
2400
+ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
2401
+ switch (op->op) {
2402
+ case GGML_OP_UNARY:
2403
+ switch (ggml_get_unary_op(op)) {
2404
+ case GGML_UNARY_OP_ABS:
2405
+ case GGML_UNARY_OP_NEG:
2406
+ case GGML_UNARY_OP_GELU:
2407
+ case GGML_UNARY_OP_SILU:
2408
+ case GGML_UNARY_OP_RELU:
2409
+ case GGML_UNARY_OP_SIGMOID:
2410
+ case GGML_UNARY_OP_HARDSIGMOID:
2411
+ case GGML_UNARY_OP_HARDSWISH:
2412
+ case GGML_UNARY_OP_GELU_QUICK:
2413
+ case GGML_UNARY_OP_TANH:
2414
+ case GGML_UNARY_OP_EXP:
2415
+ case GGML_UNARY_OP_ELU:
2416
+ case GGML_UNARY_OP_SGN:
2417
+ case GGML_UNARY_OP_STEP:
2418
+ case GGML_UNARY_OP_GELU_ERF:
2419
+ case GGML_UNARY_OP_SOFTPLUS:
2420
+ return true;
2421
+ default:
2422
+ return false;
2423
+ }
2424
+ case GGML_OP_GLU:
2425
+ switch (ggml_get_glu_op(op)) {
2426
+ case GGML_GLU_OP_REGLU:
2427
+ case GGML_GLU_OP_GEGLU:
2428
+ case GGML_GLU_OP_SWIGLU:
2429
+ case GGML_GLU_OP_GEGLU_ERF:
2430
+ case GGML_GLU_OP_GEGLU_QUICK:
2431
+ return true;
2432
+ default:
2433
+ return false;
2434
+ }
2435
+ break;
2436
+ case GGML_OP_MUL_MAT:
2437
+ {
2438
+ switch (op->src[0]->type) {
2439
+ #ifndef ASCEND_310P
2440
+ case GGML_TYPE_BF16:
2441
+ #endif
2442
+ case GGML_TYPE_F16:
2443
+ case GGML_TYPE_F32:
2444
+ return true;
2445
+ case GGML_TYPE_Q8_0:
2446
+ case GGML_TYPE_Q4_0:
2447
+ #ifdef ASCEND_310P
2448
+ // Q4 && Q8 per group is not support on 310p device
2449
+ return false;
2450
+ #endif
2451
+ // only support contiguous for quantized types.
2452
+ return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
2453
+ default:
2454
+ return false;
2455
+ }
2456
+ }
2457
+ case GGML_OP_MUL_MAT_ID:
2458
+ switch (op->src[0]->type) {
2459
+ case GGML_TYPE_F16:
2460
+ case GGML_TYPE_F32:
2461
+ return true;
2462
+ case GGML_TYPE_Q8_0:
2463
+ case GGML_TYPE_Q4_0:
2464
+ #ifdef ASCEND_310P
2465
+ // Q4 && Q8 per group is not support on 310p device
2466
+ return false;
2467
+ #endif
2468
+ // only support contiguous for quantized types.
2469
+ return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
2470
+ default:
2471
+ return false;
2472
+ }
2473
+ // embedding
2474
+ case GGML_OP_GET_ROWS:
2475
+ {
2476
+ switch (op->src[0]->type) {
2477
+ case GGML_TYPE_F32:
2478
+ case GGML_TYPE_F16:
2479
+ #ifndef ASCEND_310P
2480
+ case GGML_TYPE_BF16:
2481
+ #endif
2482
+ case GGML_TYPE_Q8_0:
2483
+ return true;
2484
+ default:
2485
+ return false;
2486
+ }
2487
+ }
2488
+ break;
2489
+ case GGML_OP_SET_ROWS:
2490
+ {
2491
+ switch (op->type) {
2492
+ case GGML_TYPE_F32:
2493
+ case GGML_TYPE_F16:
2494
+ #ifndef ASCEND_310P
2495
+ case GGML_TYPE_BF16:
2496
+ #endif
2497
+ return true;
2498
+ default:
2499
+ return false;
2500
+ }
2501
+ }
2502
+ break;
2503
+ case GGML_OP_CPY:
2504
+ {
2505
+ ggml_tensor * src = op->src[0];
2506
+ #ifdef ASCEND_310P
2507
+ if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
2508
+ (src->type != GGML_TYPE_F32 && src->type != GGML_TYPE_F16)) {
2509
+ // only support F32 and F16 on 310P.
2510
+ return false;
2511
+ }
2512
+ #else
2513
+ if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_BF16) ||
2514
+ (src->type != GGML_TYPE_F32 && src->type != GGML_TYPE_F16 && src->type != GGML_TYPE_BF16)) {
2515
+ // only support F32, F16 and BF16.
2516
+ return false;
2517
+ }
2518
+ #endif
2519
+ return true;
2520
+ }
2521
+ break;
2522
+ case GGML_OP_CONT:
2523
+ {
2524
+ switch (op->src[0]->type) {
2525
+ case GGML_TYPE_F32:
2526
+ case GGML_TYPE_F16:
2527
+ #ifndef ASCEND_310P
2528
+ case GGML_TYPE_BF16:
2529
+ #endif
2530
+ return true;
2531
+ default:
2532
+ return false;
2533
+ }
2534
+ }
2535
+ case GGML_OP_ROPE:
2536
+ {
2537
+ if (op->src[0]->ne[0] > 896) {
2538
+ return false;
2539
+ }
2540
+ #ifdef ASCEND_310P
2541
+ // TODO: Support rope_dim < ne00(dim)
2542
+ if (op->src[0]->ne[0] != op->op_params[1]) {
2543
+ return false;
2544
+ }
2545
+ if (!ggml_is_contiguous(op->src[0])) {
2546
+ return false;
2547
+ }
2548
+ #endif
2549
+ return true;
2550
+ }
2551
+ case GGML_OP_UPSCALE:
2552
+ {
2553
+ // aclnnUpsampleNearest2dGetWorkspaceSize not support
2554
+ // selfDimN[2]/outDimN[2] or selfDimC[3]/outDimC[3] not equal
2555
+ if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
2556
+ return false;
2557
+ }
2558
+ if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
2559
+ return false;
2560
+ }
2561
+ if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) {
2562
+ return false;
2563
+ }
2564
+ return true;
2565
+ }
2566
+ case GGML_OP_POOL_2D:
2567
+ {
2568
+ const int32_t * opts = (const int32_t *) op->op_params;
2569
+ #ifdef ASCEND_310P
2570
+ enum ggml_op_pool opt = static_cast<ggml_op_pool>(opts[0]);
2571
+ if (opt == GGML_OP_POOL_MAX) {
2572
+ return false;
2573
+ }
2574
+ #endif
2575
+ const int k0 = opts[1];
2576
+ const int k1 = opts[2];
2577
+ const int p0 = opts[5];
2578
+ const int p1 = opts[6];
2579
+ // value of paddingH should be at most half of kernelH
2580
+ // value of paddingW should be at most half of kernelW
2581
+ return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
2582
+ }
2583
+ case GGML_OP_SUM:
2584
+ return ggml_is_contiguous_rows(op->src[0]);
2585
+ case GGML_OP_L2_NORM:
2586
+ case GGML_OP_CROSS_ENTROPY_LOSS:
2587
+ case GGML_OP_DUP:
2588
+ case GGML_OP_IM2COL:
2589
+ case GGML_OP_CONCAT:
2590
+ case GGML_OP_REPEAT:
2591
+ case GGML_OP_NONE:
2592
+ case GGML_OP_RESHAPE:
2593
+ case GGML_OP_VIEW:
2594
+ case GGML_OP_PERMUTE:
2595
+ case GGML_OP_TRANSPOSE:
2596
+ case GGML_OP_NORM:
2597
+ case GGML_OP_ADD:
2598
+ case GGML_OP_ADD1:
2599
+ case GGML_OP_SUB:
2600
+ case GGML_OP_MUL:
2601
+ case GGML_OP_DIV:
2602
+ case GGML_OP_RMS_NORM:
2603
+ case GGML_OP_SQR:
2604
+ case GGML_OP_SQRT:
2605
+ case GGML_OP_CLAMP:
2606
+ case GGML_OP_DIAG_MASK_INF:
2607
+ case GGML_OP_SUM_ROWS:
2608
+ case GGML_OP_ARGSORT:
2609
+ case GGML_OP_ACC:
2610
+ case GGML_OP_SET:
2611
+ case GGML_OP_GROUP_NORM:
2612
+ return true;
2613
+ case GGML_OP_PAD:
2614
+ // TODO: add circular padding support for cann, see https://github.com/ggml-org/llama.cpp/pull/16985
2615
+ return ggml_get_op_params_i32(op, 8) == 0;
2616
+ case GGML_OP_ARANGE:
2617
+ case GGML_OP_TIMESTEP_EMBEDDING:
2618
+ case GGML_OP_LEAKY_RELU:
2619
+ case GGML_OP_ARGMAX:
2620
+ case GGML_OP_COS:
2621
+ case GGML_OP_SIN:
2622
+ case GGML_OP_LOG:
2623
+ case GGML_OP_MEAN:
2624
+ case GGML_OP_PAD_REFLECT_1D:
2625
+ case GGML_OP_COUNT_EQUAL:
2626
+ case GGML_OP_GATED_LINEAR_ATTN:
2627
+ return true;
2628
+ case GGML_OP_OUT_PROD:
2629
+ {
2630
+ #ifdef ASCEND_310P
2631
+ // Ger is not supported on 310p device
2632
+ return false;
2633
+ #endif
2634
+ switch (op->src[0]->type) {
2635
+ case GGML_TYPE_F16:
2636
+ case GGML_TYPE_F32:
2637
+ return true;
2638
+ default:
2639
+ return false;
2640
+ }
2641
+ }
2642
+ case GGML_OP_CONV_TRANSPOSE_1D:
2643
+ return true;
2644
+ case GGML_OP_SCALE:
2645
+ float bias;
2646
+ memcpy(&bias, (const float *) (op->op_params) + 1, sizeof(float));
2647
+ return bias == 0.0f; // TODO: support bias != 0.0f
2648
+ case GGML_OP_SOFT_MAX:
2649
+ // TODO: support attention sinks [TAG_ATTN_SINKS]
2650
+ if (op->src[2]) {
2651
+ return false;
2652
+ }
2653
+ return true;
2654
+ case GGML_OP_FLASH_ATTN_EXT:
2655
+ {
2656
+ #ifdef ASCEND_310P
2657
+ // FA not support on 310p device
2658
+ return false;
2659
+ #endif
2660
+ // derived from [ggml-cuda.cu]
2661
+ if (op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16) {
2662
+ return false;
2663
+ }
2664
+ if (op->src[1]->type != GGML_TYPE_F16 && op->src[1]->type != GGML_TYPE_F32 &&
2665
+ op->src[1]->type != GGML_TYPE_BF16) {
2666
+ return false;
2667
+ }
2668
+ if (op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_BF16) {
2669
+ return false;
2670
+ }
2671
+ // TODO: support attention sinks [TAG_ATTN_SINKS]
2672
+ if (op->src[4]) {
2673
+ return false;
2674
+ }
2675
+ if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
2676
+ // different head sizes of K and V are not supported yet
2677
+ return false;
2678
+ }
2679
+ float logitSoftcap = 0.0f;
2680
+ memcpy(&logitSoftcap, (const float *) (op->op_params) + 2, sizeof(float));
2681
+ if (logitSoftcap != 0.0f) {
2682
+ return false;
2683
+ }
2684
+ return true;
2685
+ }
2686
+ case GGML_OP_SSM_CONV:
2687
+ return true;
2688
+ case GGML_OP_CUMSUM:
2689
+ return op->src[0]->type == GGML_TYPE_F32;
2690
+ case GGML_OP_TRI:
2691
+ return op->src[0]->type == GGML_TYPE_F32;
2692
+ case GGML_OP_FILL:
2693
+ return op->src[0]->type == GGML_TYPE_F32;
2694
+ case GGML_OP_DIAG:
2695
+ return op->src[0]->type == GGML_TYPE_F32;
2696
+ case GGML_OP_SOLVE_TRI:
2697
+ return op->src[0]->type == GGML_TYPE_F32;
2698
+ default:
2699
+ return false;
2700
+ }
2701
+
2702
+ GGML_UNUSED(dev);
2703
+ }
2704
+
2705
+ /**
2706
+ * @brief Records an event on the CANN backend stream.
2707
+ *
2708
+ * This function records the given event on the ACL runtime stream associated
2709
+ * with the backend context.
2710
+ *
2711
+ * @param event Pointer to the event structure to be recorded.
2712
+ */
2713
+ static void ggml_backend_cann_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
2714
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2715
+ ACL_CHECK(aclrtRecordEvent((aclrtEvent) event->context, cann_ctx->stream()));
2716
+ }
2717
+
2718
+ /**
2719
+ * @brief Waits for a recorded event to complete on the CANN backend stream.
2720
+ *
2721
+ * This function makes the given backend wait for the event to complete on its
2722
+ * ACL runtime stream.
2723
+ *
2724
+ * @param backend Pointer to the backend structure.
2725
+ * @param event Pointer to the event structure that the backend needs to wait
2726
+ * for.
2727
+ */
2728
+ static void ggml_backend_cann_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
2729
+ ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
2730
+ if (ggml_backend_is_cann(backend)) {
2731
+ ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(), (aclrtEvent) event->context));
2732
+ } else {
2733
+ GGML_ABORT("fatal error");
2734
+ }
2735
+ }
2736
+
2737
+ /**
2738
+ * @brief Structure defining the interface for the CANN backend.
2739
+ *
2740
+ * This structure contains function pointers for various operations
2741
+ * supported by the CANN backend, including name retrieval, memory
2742
+ * management, tensor operations, synchronization, and event handling.
2743
+ */
2744
+ static const ggml_backend_i ggml_backend_cann_interface = {
2745
+ /* .get_name = */ ggml_backend_cann_name,
2746
+ /* .free = */ ggml_backend_cann_free,
2747
+ /* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
2748
+ /* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
2749
+ /* .set_tensor_2d_async = */ NULL,
2750
+ /* .get_tensor_2d_async = */ NULL,
2751
+ /* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
2752
+ /* .synchronize = */ ggml_backend_cann_synchronize,
2753
+ /* .graph_plan_create = */ NULL,
2754
+ /* .graph_plan_free = */ NULL,
2755
+ /* .graph_plan_update = */ NULL,
2756
+ /* .graph_plan_compute = */ NULL,
2757
+ /* .graph_compute = */ ggml_backend_cann_graph_compute,
2758
+ /* .event_record = */ ggml_backend_cann_event_record,
2759
+ /* .event_wait = */ ggml_backend_cann_event_wait,
2760
+ /* .graph_optimize = */ NULL,
2761
+ };
2762
+
2763
+ /**
2764
+ * @brief Return the hardcoded GUID for the CANN backend.
2765
+ *
2766
+ * This function returns a static GUID which uniquely identifies the CANN
2767
+ * backend.
2768
+ *
2769
+ * @return A pointer to the static GUID.
2770
+ */
2771
+ static ggml_guid_t ggml_backend_cann_guid() {
2772
+ static ggml_guid guid = { 0xa1, 0x94, 0xaf, 0xac, 0xbd, 0x4f, 0x47, 0x34,
2773
+ 0xbe, 0x1a, 0x9e, 0x71, 0x1f, 0x9e, 0xed, 0x64 };
2774
+ return &guid;
2775
+ }
2776
+
2777
+ // backend device
2778
+ struct ggml_backend_cann_device_context {
2779
+ int device;
2780
+ std::string name;
2781
+ std::string description;
2782
+ int op_offload_min_batch_size;
2783
+ };
2784
+
2785
+ static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
2786
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *) dev->context;
2787
+ return ctx->name.c_str();
2788
+ }
2789
+
2790
+ static const char * ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
2791
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *) dev->context;
2792
+ return ctx->description.c_str();
2793
+ }
2794
+
2795
+ static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
2796
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *) dev->context;
2797
+ ggml_backend_cann_get_device_memory(ctx->device, free, total);
2798
+ }
2799
+
2800
+ static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
2801
+ GGML_UNUSED(dev);
2802
+ return GGML_BACKEND_DEVICE_TYPE_GPU;
2803
+ }
2804
+
2805
+ static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
2806
+ props->name = ggml_backend_cann_device_get_name(dev);
2807
+ props->description = ggml_backend_cann_device_get_description(dev);
2808
+ props->type = ggml_backend_cann_device_get_type(dev);
2809
+ ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
2810
+
2811
+ bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
2812
+
2813
+ props->caps = {
2814
+ /* .async = */ false,
2815
+ /* .host_buffer = */ host_buffer,
2816
+ /* .buffer_from_host_ptr = */ false,
2817
+ /* .events = */ true,
2818
+ };
2819
+ }
2820
+
2821
+ static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
2822
+ GGML_UNUSED(params);
2823
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *) dev->context;
2824
+ return ggml_backend_cann_init(ctx->device);
2825
+ }
2826
+
2827
+ /**
2828
+ * @brief Checks if the CANN backend supports a specific backend buffer type.
2829
+ *
2830
+ * This function determines whether the CANN backend supports the given backend
2831
+ * buffer type by comparing the device context of the backend and buffer type.
2832
+ * It returns true if the devices are same between the backend context and
2833
+ * buffer type context.
2834
+ *
2835
+ * @param backend Pointer to the CANN backend.
2836
+ * @param buft Pointer to the backend buffer type to check.
2837
+ * @return bool Returns true if the CANN backend supports the buffer type,
2838
+ * otherwise false.
2839
+ */
2840
+ static bool ggml_backend_cann_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2841
+ if (ggml_backend_buft_is_cann(buft)) {
2842
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *) dev->context;
2843
+ ggml_backend_cann_buffer_type_context * buft_ctx = (ggml_backend_cann_buffer_type_context *) buft->context;
2844
+ return buft_ctx->device == dev_ctx->device;
2845
+ }
2846
+ return false;
2847
+ }
2848
+
2849
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
2850
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *) dev->context;
2851
+ return ggml_backend_cann_buffer_type(ctx->device);
2852
+ }
2853
+
2854
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
2855
+ GGML_UNUSED(dev);
2856
+ return ggml_backend_cann_host_buffer_type();
2857
+ }
2858
+
2859
+ /**
2860
+ * @brief Determines if a tensor operation should be offloaded to the CANN
2861
+ * backend.
2862
+ *
2863
+ * This function checks if a given tensor operation should be offloaded to the
2864
+ * CANN backend based on the operation type and the size of the tensor. It
2865
+ * returns true if the second dimension (ne[1]) of the tensor is greater than or
2866
+ * equal to the minimum batch size and the operation is not GGML_OP_GET_ROWS.
2867
+ *
2868
+ * @param backend Pointer to the CANN backend.
2869
+ * @param op Pointer to the tensor operation to check.
2870
+ * @return bool Returns true if the operation should be offloaded, otherwise
2871
+ * false.
2872
+ */
2873
+ static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
2874
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
2875
+
2876
+ return op->ne[1] >= dev_ctx->op_offload_min_batch_size && op->op != GGML_OP_GET_ROWS;
2877
+ }
2878
+
2879
+ /**
2880
+ * @brief Creates a new event for the CANN backend device.
2881
+ *
2882
+ * This function initializes a new event for the CANN backend by setting the
2883
+ * device and creating an ACL runtime event. The created event is then wrapped
2884
+ * in a ggml_backend_event structure and returned.
2885
+ *
2886
+ * @param backend Pointer to the CANN backend.
2887
+ * @return ggml_backend_event_t Returns a pointer to the new event structure.
2888
+ */
2889
+ static ggml_backend_event_t ggml_backend_cann_device_event_new(ggml_backend_dev_t dev) {
2890
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *) dev->context;
2891
+
2892
+ ggml_cann_set_device(dev_ctx->device);
2893
+
2894
+ aclrtEvent event;
2895
+ ACL_CHECK(aclrtCreateEvent(&event));
2896
+
2897
+ return new ggml_backend_event{
2898
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
2899
+ /* .context = */ event,
2900
+ };
2901
+ }
2902
+
2903
+ /**
2904
+ * @brief Frees a CANN backend event.
2905
+ *
2906
+ * This function destroys the ACL runtime event associated with the given CANN
2907
+ * backend event and then deletes the event structure itself.
2908
+ *
2909
+ * @param event Pointer to the event structure to be freed.
2910
+ */
2911
+ static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
2912
+ ACL_CHECK(aclrtDestroyEvent((aclrtEvent) event->context));
2913
+
2914
+ delete event;
2915
+ GGML_UNUSED(dev);
2916
+ }
2917
+
2918
+ /**
2919
+ * @brief Synchronizes the given event on the CANN backend.
2920
+ *
2921
+ * This function waits for the specified event to complete on the ACL runtime.
2922
+ *
2923
+ * @param event Pointer to the event structure to be synchronized.
2924
+ */
2925
+ static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
2926
+ ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent) event->context));
2927
+
2928
+ GGML_UNUSED(dev);
2929
+ }
2930
+
2931
+ static const ggml_backend_device_i ggml_backend_cann_device_interface = {
2932
+ /* .get_name = */ ggml_backend_cann_device_get_name,
2933
+ /* .get_description = */ ggml_backend_cann_device_get_description,
2934
+ /* .get_memory = */ ggml_backend_cann_device_get_memory,
2935
+ /* .get_type = */ ggml_backend_cann_device_get_type,
2936
+ /* .get_props = */ ggml_backend_cann_device_get_props,
2937
+ /* .init_backend = */ ggml_backend_cann_device_init, // called for every card
2938
+ /* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
2939
+ /* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
2940
+ /* .buffer_from_host_ptr = */ NULL, // not supported for CANN
2941
+ /* .supports_op = */ ggml_backend_cann_supports_op,
2942
+ /* .supports_buft = */ ggml_backend_cann_supports_buft,
2943
+ /* .offload_op = */ ggml_backend_cann_offload_op,
2944
+ /* .event_new = */ ggml_backend_cann_device_event_new,
2945
+ /* .event_free = */ ggml_backend_cann_device_event_free,
2946
+ /* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
2947
+ };
2948
+
2949
+ // backend reg
2950
+ struct ggml_backend_cann_reg_context {
2951
+ std::vector<ggml_backend_dev_t> devices;
2952
+ };
2953
+
2954
+ static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
2955
+ GGML_UNUSED(reg);
2956
+ return GGML_CANN_NAME;
2957
+ }
2958
+
2959
+ static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
2960
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *) reg->context;
2961
+ return ctx->devices.size();
2962
+ }
2963
+
2964
+ static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
2965
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *) reg->context;
2966
+ GGML_ASSERT(index < ctx->devices.size());
2967
+ return ctx->devices[index];
2968
+ }
2969
+
2970
+ static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
2971
+ GGML_UNUSED(reg);
2972
+ GGML_UNUSED(name);
2973
+ // reserved for future use
2974
+ return nullptr;
2975
+ }
2976
+
2977
+ static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
2978
+ /* .get_name = */ ggml_backend_cann_reg_get_name,
2979
+ /* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
2980
+ /* .get_device = */ ggml_backend_cann_reg_get_device,
2981
+ /* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
2982
+ };
2983
+
2984
+ // backend registry, called only once for cann backend
2985
+ ggml_backend_reg_t ggml_backend_cann_reg() {
2986
+ static ggml_backend_reg reg;
2987
+ static bool initialized = false;
2988
+
2989
+ {
2990
+ static std::mutex mutex;
2991
+ std::lock_guard<std::mutex> lock(mutex);
2992
+ if (!initialized) {
2993
+ aclInit(nullptr);
2994
+ ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
2995
+ const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
2996
+
2997
+ for (int i = 0; i < ggml_cann_info().device_count; i++) {
2998
+ ggml_backend_cann_device_context * dev_ctx = new ggml_backend_cann_device_context();
2999
+ dev_ctx->description = aclrtGetSocName();
3000
+ dev_ctx->device = i;
3001
+ dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
3002
+ dev_ctx->op_offload_min_batch_size = min_batch_size;
3003
+ ggml_cann_set_device(i);
3004
+ ggml_backend_dev_t dev = new ggml_backend_device{ /* .iface = */ ggml_backend_cann_device_interface,
3005
+ /* .reg = */ &reg,
3006
+ /* .context = */ dev_ctx };
3007
+ ctx->devices.push_back(dev);
3008
+ }
3009
+
3010
+ reg = ggml_backend_reg{ /* .api_version = */ GGML_BACKEND_API_VERSION,
3011
+ /* .iface = */ ggml_backend_cann_reg_interface,
3012
+ /* .context = */ ctx };
3013
+ }
3014
+
3015
+ initialized = true;
3016
+ }
3017
+
3018
+ return &reg;
3019
+ }
3020
+
3021
+ ggml_backend_t ggml_backend_cann_init(int32_t device) {
3022
+ aclInit(nullptr);
3023
+ if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
3024
+ GGML_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
3025
+ return nullptr;
3026
+ }
3027
+
3028
+ ggml_backend_cann_context * ctx = new ggml_backend_cann_context(device);
3029
+ if (ctx == nullptr) {
3030
+ GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
3031
+ return nullptr;
3032
+ }
3033
+ ggml_cann_set_device(ctx->device);
3034
+ ggml_backend_t cann_backend =
3035
+ new ggml_backend{ /* .guid = */ ggml_backend_cann_guid(),
3036
+ /* .interface = */ ggml_backend_cann_interface,
3037
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
3038
+ /* .context = */ ctx };
3039
+
3040
+ return cann_backend;
3041
+ }
3042
+
3043
+ bool ggml_backend_is_cann(ggml_backend_t backend) {
3044
+ return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cann_guid());
3045
+ }
3046
+
3047
+ int32_t ggml_backend_cann_get_device_count() {
3048
+ return ggml_cann_info().device_count;
3049
+ }
3050
+
3051
+ void ggml_backend_cann_get_device_description(int32_t device, char * description, size_t description_size) {
3052
+ ggml_cann_set_device(device);
3053
+ const char * soc_name = aclrtGetSocName();
3054
+ snprintf(description, description_size, "%s", soc_name);
3055
+ }
3056
+
3057
+ void ggml_backend_cann_get_device_memory(int32_t device, size_t * free, size_t * total) {
3058
+ ggml_cann_set_device(device);
3059
+ ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
3060
+ }
3061
+
3062
+ GGML_BACKEND_DL_IMPL(ggml_backend_cann_reg)