toy 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2107) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +1124 -0
  3. data/LICENSE +21 -0
  4. data/Makefile +2022 -0
  5. data/README.md +154 -0
  6. data/bin/toy +10 -0
  7. data/lib/toy/compute.rb +135 -0
  8. data/lib/toy/compute_cuda.rb +104 -0
  9. data/lib/toy/compute_metal.rb +97 -0
  10. data/lib/toy/core/cli/describe.rb +188 -0
  11. data/lib/toy/core/cli/eval.rb +385 -0
  12. data/lib/toy/core/cli/exit_codes.rb +15 -0
  13. data/lib/toy/core/cli/fetch.rb +238 -0
  14. data/lib/toy/core/cli/infer.rb +268 -0
  15. data/lib/toy/core/cli/install.rb +228 -0
  16. data/lib/toy/core/cli/list.rb +86 -0
  17. data/lib/toy/core/cli/manifest.rb +49 -0
  18. data/lib/toy/core/cli/new.rb +594 -0
  19. data/lib/toy/core/cli/serve.rb +237 -0
  20. data/lib/toy/core/cli/train.rb +471 -0
  21. data/lib/toy/core/cli.rb +165 -0
  22. data/lib/toy/core/config.rb +64 -0
  23. data/lib/toy/core/gguf_meta.rb +161 -0
  24. data/lib/toy/core/model_scan.rb +221 -0
  25. data/lib/toy/core/run_log.rb +94 -0
  26. data/lib/toy/core/toy_root.rb +95 -0
  27. data/lib/toy/dev/toy_card.rb +299 -0
  28. data/lib/toy/dev/toy_describe_flow.rb +412 -0
  29. data/lib/toy/dev/toy_logprobs.rb +86 -0
  30. data/lib/toy/dev/toy_tap.rb +183 -0
  31. data/lib/toy/dev/toy_token_drift.rb +121 -0
  32. data/lib/toy/ffi/tinynn.rb +1491 -0
  33. data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
  34. data/lib/toy/ffi/tinynn_metal.rb +359 -0
  35. data/lib/toy/ffi_manifest.rb +84 -0
  36. data/lib/toy/io/bpe.rb +325 -0
  37. data/lib/toy/io/gguf_kv.rb +35 -0
  38. data/lib/toy/io/gguf_load.rb +331 -0
  39. data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
  40. data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
  41. data/lib/toy/io/model_index.rb +206 -0
  42. data/lib/toy/io/run_bundle.rb +280 -0
  43. data/lib/toy/io/tokenizer.rb +613 -0
  44. data/lib/toy/io/toy_corpus_loader.rb +52 -0
  45. data/lib/toy/io/toy_events.rb +56 -0
  46. data/lib/toy/io/toy_image_loader.rb +48 -0
  47. data/lib/toy/llm/adamw.rb +169 -0
  48. data/lib/toy/llm/archs/llama_arch.rb +233 -0
  49. data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
  50. data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
  51. data/lib/toy/llm/blocks/transformer_block.rb +876 -0
  52. data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
  53. data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
  54. data/lib/toy/llm/classify_batch.rb +88 -0
  55. data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
  56. data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
  57. data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
  58. data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
  59. data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
  60. data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
  61. data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
  62. data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
  63. data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
  64. data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
  65. data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
  66. data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
  67. data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
  68. data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
  69. data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
  70. data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
  71. data/lib/toy/llm/labels.rb +142 -0
  72. data/lib/toy/llm/primitives/gqa.rb +62 -0
  73. data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
  74. data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
  75. data/lib/toy/llm/primitives/rms_norm.rb +39 -0
  76. data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
  77. data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
  78. data/lib/toy/llm/primitives/rope.rb +68 -0
  79. data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
  80. data/lib/toy/llm/primitives/rope_metal.rb +72 -0
  81. data/lib/toy/llm/primitives/swiglu.rb +41 -0
  82. data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
  83. data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
  84. data/lib/toy/llm/recipe_options.rb +71 -0
  85. data/lib/toy/llm/recipes/from_scratch.rb +105 -0
  86. data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
  87. data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
  88. data/lib/toy/llm/recipes/lora.rb +110 -0
  89. data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
  90. data/lib/toy/llm/recipes/lora_metal.rb +114 -0
  91. data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
  92. data/lib/toy/llm/recipes/warm_start.rb +235 -0
  93. data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
  94. data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
  95. data/lib/toy/llm/training_batch.rb +133 -0
  96. data/lib/toy/models/arch.rb +253 -0
  97. data/lib/toy/models/gpt2.rb +311 -0
  98. data/lib/toy/models/toy_gpt2.rb +177 -0
  99. data/lib/toy/models/toy_smollm2.rb +393 -0
  100. data/lib/toy/models/toy_vit.rb +83 -0
  101. data/lib/toy/models/transformer.rb +1494 -0
  102. data/lib/toy/models/transformer_lm.rb +298 -0
  103. data/lib/toy/models/transformer_lm_cuda.rb +159 -0
  104. data/lib/toy/models/transformer_lm_metal.rb +142 -0
  105. data/lib/toy/mri.rb +300 -0
  106. data/lib/toy/run/eval.rb +76 -0
  107. data/lib/toy/run/eval_cuda.rb +66 -0
  108. data/lib/toy/run/eval_lmc.rb +334 -0
  109. data/lib/toy/run/eval_metal.rb +67 -0
  110. data/lib/toy/run/infer.rb +130 -0
  111. data/lib/toy/run/infer_cuda.rb +118 -0
  112. data/lib/toy/run/infer_metal.rb +119 -0
  113. data/lib/toy/run/infer_trace.rb +37 -0
  114. data/lib/toy/run/serve.rb +144 -0
  115. data/lib/toy/run/train.rb +404 -0
  116. data/lib/toy/run/train_cuda.rb +397 -0
  117. data/lib/toy/run/train_gpt2.rb +103 -0
  118. data/lib/toy/run/train_gpt2_cuda.rb +85 -0
  119. data/lib/toy/run/train_gpt2_metal.rb +85 -0
  120. data/lib/toy/run/train_lora.rb +207 -0
  121. data/lib/toy/run/train_lora_cuda.rb +219 -0
  122. data/lib/toy/run/train_metal.rb +227 -0
  123. data/lib/toy/run/train_vit.rb +251 -0
  124. data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
  125. data/lib/toy/serve/openai/handlers.rb +143 -0
  126. data/lib/toy/serve/openai/server.rb +159 -0
  127. data/lib/toy/train/sampler.rb +314 -0
  128. data/lib/toy/train/toy_chat_template.rb +179 -0
  129. data/lib/toy/train/toy_drift_grad.rb +176 -0
  130. data/lib/toy/train/toy_gguf_fuse.rb +428 -0
  131. data/lib/toy/train/toy_gguf_writer.rb +100 -0
  132. data/lib/toy/train/toy_lr_schedule.rb +39 -0
  133. data/lib/toy/train/toy_sample.rb +125 -0
  134. data/lib/toy/train/toy_trainer.rb +86 -0
  135. data/lib/toy/train/training.rb +160 -0
  136. data/lib/toy/version.rb +11 -0
  137. data/lib/toy.rb +902 -0
  138. data/prep/progress +118 -0
  139. data/prep/quietly +64 -0
  140. data/sig/toy.rbs +397 -0
  141. data/sig/toy_compute.rbs +450 -0
  142. data/spinel-ext.json +122 -0
  143. data/tinynn/Makefile +71 -0
  144. data/tinynn/tinynn_backend_cuda.c +99 -0
  145. data/tinynn/tinynn_backend_metal.m +75 -0
  146. data/tinynn/tinynn_events.c +122 -0
  147. data/tinynn/tinynn_events.h +83 -0
  148. data/tinynn/tinynn_ggml.c +2460 -0
  149. data/tinynn/tinynn_ggml.h +545 -0
  150. data/tinynn/tinynn_gguf.c +783 -0
  151. data/tinynn/tinynn_gguf.h +167 -0
  152. data/tinynn/tinynn_trace.c +180 -0
  153. data/tinynn/tinynn_trace.h +85 -0
  154. data/vendor/ggml/AUTHORS +335 -0
  155. data/vendor/ggml/CMakeLists.txt +505 -0
  156. data/vendor/ggml/CONTRIBUTING.md +3 -0
  157. data/vendor/ggml/LICENSE +21 -0
  158. data/vendor/ggml/README.md +50 -0
  159. data/vendor/ggml/ci/run.sh +395 -0
  160. data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
  161. data/vendor/ggml/cmake/GitVars.cmake +22 -0
  162. data/vendor/ggml/cmake/common.cmake +50 -0
  163. data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
  164. data/vendor/ggml/docs/gguf.md +828 -0
  165. data/vendor/ggml/examples/CMakeLists.txt +34 -0
  166. data/vendor/ggml/examples/common-ggml.cpp +244 -0
  167. data/vendor/ggml/examples/common-ggml.h +18 -0
  168. data/vendor/ggml/examples/common.cpp +675 -0
  169. data/vendor/ggml/examples/common.h +322 -0
  170. data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
  171. data/vendor/ggml/examples/gpt-2/README.md +225 -0
  172. data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
  173. data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
  174. data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
  175. data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
  176. data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
  177. data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
  178. data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
  179. data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
  180. data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
  181. data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
  182. data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
  183. data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
  184. data/vendor/ggml/examples/gpt-j/README.md +239 -0
  185. data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
  186. data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
  187. data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
  188. data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
  189. data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
  190. data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
  191. data/vendor/ggml/examples/magika/README.md +23 -0
  192. data/vendor/ggml/examples/magika/convert.py +32 -0
  193. data/vendor/ggml/examples/magika/main.cpp +374 -0
  194. data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
  195. data/vendor/ggml/examples/mnist/README.md +206 -0
  196. data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
  197. data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
  198. data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
  199. data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
  200. data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
  201. data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
  202. data/vendor/ggml/examples/mnist/server.py +36 -0
  203. data/vendor/ggml/examples/mnist/web/index.html +178 -0
  204. data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
  205. data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
  206. data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
  207. data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
  208. data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
  209. data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
  210. data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
  211. data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
  212. data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
  213. data/vendor/ggml/examples/prompts/replit.txt +100 -0
  214. data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
  215. data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
  216. data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
  217. data/vendor/ggml/examples/prompts/whisper.txt +100 -0
  218. data/vendor/ggml/examples/python/README.md +115 -0
  219. data/vendor/ggml/examples/python/api.h +14 -0
  220. data/vendor/ggml/examples/python/example_add_quant.py +25 -0
  221. data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
  222. data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
  223. data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
  224. data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
  225. data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
  226. data/vendor/ggml/examples/python/ggml/utils.py +182 -0
  227. data/vendor/ggml/examples/python/regenerate.py +42 -0
  228. data/vendor/ggml/examples/python/stubs.py +128 -0
  229. data/vendor/ggml/examples/python/test_tensor.py +258 -0
  230. data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
  231. data/vendor/ggml/examples/sam/README.md +95 -0
  232. data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
  233. data/vendor/ggml/examples/sam/example.jpg +0 -0
  234. data/vendor/ggml/examples/sam/sam.cpp +2370 -0
  235. data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
  236. data/vendor/ggml/examples/simple/README.md +61 -0
  237. data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
  238. data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
  239. data/vendor/ggml/examples/stb_image.h +7987 -0
  240. data/vendor/ggml/examples/stb_image_write.h +1724 -0
  241. data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
  242. data/vendor/ggml/examples/test-cmake/README.md +3 -0
  243. data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
  244. data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
  245. data/vendor/ggml/examples/yolo/README.md +59 -0
  246. data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
  247. data/vendor/ggml/examples/yolo/data/coco.names +80 -0
  248. data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
  249. data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
  250. data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
  251. data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
  252. data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
  253. data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
  254. data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
  255. data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
  256. data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
  257. data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
  258. data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
  259. data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
  260. data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
  261. data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
  262. data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
  263. data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
  264. data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
  265. data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
  266. data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
  267. data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
  268. data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
  269. data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
  270. data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
  271. data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
  272. data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
  273. data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
  274. data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
  275. data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
  276. data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
  277. data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
  278. data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
  279. data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
  280. data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
  281. data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
  282. data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
  283. data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
  284. data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
  285. data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
  286. data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
  287. data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
  288. data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
  289. data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
  290. data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
  291. data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
  292. data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
  293. data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
  294. data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
  295. data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
  296. data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
  297. data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
  298. data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
  299. data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
  300. data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
  301. data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
  302. data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
  303. data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
  304. data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
  305. data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
  306. data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
  307. data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
  308. data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
  309. data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
  310. data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
  311. data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
  312. data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
  313. data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
  314. data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
  315. data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
  316. data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
  317. data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
  318. data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
  319. data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
  320. data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
  321. data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
  322. data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
  323. data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
  324. data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
  325. data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
  326. data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
  327. data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
  328. data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
  329. data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
  330. data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
  331. data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
  332. data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
  333. data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
  334. data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
  335. data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
  336. data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
  337. data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
  338. data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
  339. data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
  340. data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
  341. data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
  342. data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
  343. data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
  344. data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
  345. data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
  346. data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
  347. data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
  348. data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
  349. data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
  350. data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
  351. data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
  352. data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
  353. data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
  354. data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
  355. data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
  356. data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
  357. data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
  358. data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
  359. data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
  360. data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
  361. data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
  362. data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
  363. data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
  364. data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
  365. data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
  366. data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
  367. data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
  368. data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
  369. data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
  370. data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
  371. data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
  372. data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
  373. data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
  374. data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
  375. data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
  376. data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
  377. data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
  378. data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
  379. data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
  380. data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
  381. data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
  382. data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
  383. data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
  384. data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
  385. data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
  386. data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
  387. data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
  388. data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
  389. data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
  390. data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
  391. data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
  392. data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
  393. data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
  394. data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
  395. data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
  396. data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
  397. data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
  398. data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
  399. data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
  400. data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
  401. data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
  402. data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
  403. data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
  404. data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
  405. data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
  406. data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
  407. data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
  408. data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
  409. data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
  410. data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
  411. data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
  412. data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
  413. data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
  414. data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
  415. data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
  416. data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
  417. data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
  418. data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
  419. data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
  420. data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
  421. data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
  422. data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
  423. data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
  424. data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
  425. data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
  426. data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
  427. data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
  428. data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
  429. data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
  430. data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
  431. data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
  432. data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
  433. data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
  434. data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
  435. data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
  436. data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
  437. data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
  438. data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
  439. data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
  440. data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
  441. data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
  442. data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
  443. data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
  444. data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
  445. data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
  446. data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
  447. data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
  448. data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
  449. data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
  450. data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
  451. data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
  452. data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
  453. data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
  454. data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
  455. data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
  456. data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
  457. data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
  458. data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
  459. data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
  460. data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
  461. data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
  462. data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
  463. data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
  464. data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
  465. data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
  466. data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
  467. data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
  468. data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
  469. data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
  470. data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
  471. data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
  472. data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
  473. data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
  474. data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
  475. data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
  476. data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
  477. data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
  478. data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
  479. data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
  480. data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
  481. data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
  482. data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
  483. data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
  484. data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
  485. data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
  486. data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
  487. data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
  488. data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
  489. data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
  490. data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
  491. data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
  492. data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
  493. data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
  494. data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
  495. data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
  496. data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
  497. data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
  498. data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
  499. data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
  500. data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
  501. data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
  502. data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
  503. data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
  504. data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
  505. data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
  506. data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
  507. data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
  508. data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
  509. data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
  510. data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
  511. data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
  512. data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
  513. data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
  514. data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
  515. data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
  516. data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
  517. data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
  518. data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
  519. data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
  520. data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
  521. data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
  522. data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
  523. data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
  524. data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
  525. data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
  526. data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
  527. data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
  528. data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
  529. data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
  530. data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
  531. data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
  532. data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
  533. data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
  534. data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
  535. data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
  536. data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
  537. data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
  538. data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
  539. data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
  540. data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
  541. data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
  542. data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
  543. data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
  544. data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
  545. data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
  546. data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
  547. data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
  548. data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
  549. data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
  550. data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
  551. data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
  552. data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
  553. data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
  554. data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
  555. data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
  556. data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
  557. data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
  558. data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
  559. data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
  560. data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
  561. data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
  562. data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
  563. data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
  564. data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
  565. data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
  566. data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
  567. data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
  568. data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
  569. data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
  570. data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
  571. data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
  572. data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
  573. data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
  574. data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
  575. data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
  576. data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
  577. data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
  578. data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
  579. data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
  580. data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
  581. data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
  582. data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
  583. data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
  584. data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
  585. data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
  586. data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
  587. data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
  588. data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
  589. data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
  590. data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
  591. data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
  592. data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
  593. data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
  594. data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
  595. data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
  596. data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
  597. data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
  598. data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
  599. data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
  600. data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
  601. data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
  602. data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
  603. data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
  604. data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
  605. data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
  606. data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
  607. data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
  608. data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
  609. data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
  610. data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
  611. data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
  612. data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
  613. data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
  614. data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
  615. data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
  616. data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
  617. data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
  618. data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
  619. data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
  620. data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
  621. data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
  622. data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
  623. data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
  624. data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
  625. data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
  626. data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
  627. data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
  628. data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
  629. data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
  630. data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
  631. data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
  632. data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
  633. data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
  634. data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
  635. data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
  636. data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
  637. data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
  638. data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
  639. data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
  640. data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
  641. data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
  642. data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
  643. data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
  644. data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
  645. data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
  646. data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
  647. data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
  648. data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
  649. data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
  650. data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
  651. data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
  652. data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
  653. data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
  654. data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
  655. data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
  656. data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
  657. data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
  658. data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
  659. data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
  660. data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
  661. data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
  662. data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
  663. data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
  664. data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
  665. data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
  666. data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
  667. data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
  668. data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
  669. data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
  670. data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
  671. data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
  672. data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
  673. data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
  674. data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
  675. data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
  676. data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
  677. data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
  678. data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
  679. data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
  680. data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
  681. data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
  682. data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
  683. data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
  684. data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
  685. data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
  686. data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
  687. data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
  688. data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
  689. data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
  690. data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
  691. data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
  692. data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
  693. data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
  694. data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
  695. data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
  696. data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
  697. data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
  698. data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
  699. data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
  700. data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
  701. data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
  702. data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
  703. data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
  704. data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
  705. data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
  706. data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
  707. data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
  708. data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
  709. data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
  710. data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
  711. data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
  712. data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
  713. data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
  714. data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
  715. data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
  716. data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
  717. data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
  718. data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
  719. data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
  720. data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
  721. data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
  722. data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
  723. data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
  724. data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
  725. data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
  726. data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
  727. data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
  728. data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
  729. data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
  730. data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
  731. data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
  732. data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
  733. data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
  734. data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
  735. data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
  736. data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
  737. data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
  738. data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
  739. data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
  740. data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
  741. data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
  742. data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
  743. data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
  744. data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
  745. data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
  746. data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
  747. data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
  748. data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
  749. data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
  750. data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
  751. data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
  752. data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
  753. data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
  754. data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
  755. data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
  756. data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
  757. data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
  758. data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
  759. data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
  760. data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
  761. data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
  762. data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
  763. data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
  764. data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
  765. data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
  766. data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
  767. data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
  768. data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
  769. data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
  770. data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
  771. data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
  772. data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
  773. data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
  774. data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
  775. data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
  776. data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
  777. data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
  778. data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
  779. data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
  780. data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
  781. data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
  782. data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
  783. data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
  784. data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
  785. data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
  786. data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
  787. data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
  788. data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
  789. data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
  790. data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
  791. data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
  792. data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
  793. data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
  794. data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
  795. data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
  796. data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
  797. data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
  798. data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
  799. data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
  800. data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
  801. data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
  802. data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
  803. data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
  804. data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
  805. data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
  806. data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
  807. data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
  808. data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
  809. data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
  810. data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
  811. data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
  812. data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
  813. data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
  814. data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
  815. data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
  816. data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
  817. data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
  818. data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
  819. data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
  820. data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
  821. data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
  822. data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
  823. data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
  824. data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
  825. data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
  826. data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
  827. data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
  828. data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
  829. data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
  830. data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
  831. data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
  832. data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
  833. data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
  834. data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
  835. data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
  836. data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
  837. data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
  838. data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
  839. data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
  840. data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
  841. data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
  842. data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
  843. data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
  844. data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
  845. data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
  846. data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
  847. data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
  848. data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
  849. data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
  850. data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
  851. data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
  852. data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
  853. data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
  854. data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
  855. data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
  856. data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
  857. data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
  858. data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
  859. data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
  860. data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
  861. data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
  862. data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
  863. data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
  864. data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
  865. data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
  866. data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
  867. data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
  868. data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
  869. data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
  870. data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
  871. data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
  872. data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
  873. data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
  874. data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
  875. data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
  876. data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
  877. data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
  878. data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
  879. data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
  880. data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
  881. data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
  882. data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
  883. data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
  884. data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
  885. data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
  886. data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
  887. data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
  888. data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
  889. data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
  890. data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
  891. data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
  892. data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
  893. data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
  894. data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
  895. data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
  896. data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
  897. data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
  898. data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
  899. data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
  900. data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
  901. data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
  902. data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
  903. data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
  904. data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
  905. data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
  906. data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
  907. data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
  908. data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
  909. data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
  910. data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
  911. data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
  912. data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
  913. data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
  914. data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
  915. data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
  916. data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
  917. data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
  918. data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
  919. data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
  920. data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
  921. data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
  922. data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
  923. data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
  924. data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
  925. data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
  926. data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
  927. data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
  928. data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
  929. data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
  930. data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
  931. data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
  932. data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
  933. data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
  934. data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
  935. data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
  936. data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
  937. data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
  938. data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
  939. data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
  940. data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
  941. data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
  942. data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
  943. data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
  944. data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
  945. data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
  946. data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
  947. data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
  948. data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
  949. data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
  950. data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
  951. data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
  952. data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
  953. data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
  954. data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
  955. data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
  956. data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
  957. data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
  958. data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
  959. data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
  960. data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
  961. data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
  962. data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
  963. data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
  964. data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
  965. data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
  966. data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
  967. data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
  968. data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
  969. data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
  970. data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
  971. data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
  972. data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
  973. data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
  974. data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
  975. data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
  976. data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
  977. data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
  978. data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
  979. data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
  980. data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
  981. data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
  982. data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
  983. data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
  984. data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
  985. data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
  986. data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
  987. data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
  988. data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
  989. data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
  990. data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
  991. data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
  992. data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
  993. data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
  994. data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
  995. data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
  996. data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
  997. data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
  998. data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
  999. data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
  1000. data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
  1001. data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
  1002. data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
  1003. data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
  1004. data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
  1005. data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
  1006. data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
  1007. data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
  1008. data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
  1009. data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
  1010. data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
  1011. data/vendor/ggml/ggml.pc.in +10 -0
  1012. data/vendor/ggml/include/ggml-alloc.h +85 -0
  1013. data/vendor/ggml/include/ggml-backend.h +431 -0
  1014. data/vendor/ggml/include/ggml-blas.h +25 -0
  1015. data/vendor/ggml/include/ggml-cann.h +123 -0
  1016. data/vendor/ggml/include/ggml-cpp.h +39 -0
  1017. data/vendor/ggml/include/ggml-cpu.h +151 -0
  1018. data/vendor/ggml/include/ggml-cuda.h +50 -0
  1019. data/vendor/ggml/include/ggml-hexagon.h +19 -0
  1020. data/vendor/ggml/include/ggml-metal.h +61 -0
  1021. data/vendor/ggml/include/ggml-opencl.h +26 -0
  1022. data/vendor/ggml/include/ggml-openvino.h +37 -0
  1023. data/vendor/ggml/include/ggml-opt.h +256 -0
  1024. data/vendor/ggml/include/ggml-rpc.h +35 -0
  1025. data/vendor/ggml/include/ggml-sycl.h +49 -0
  1026. data/vendor/ggml/include/ggml-virtgpu.h +14 -0
  1027. data/vendor/ggml/include/ggml-vulkan.h +29 -0
  1028. data/vendor/ggml/include/ggml-webgpu.h +19 -0
  1029. data/vendor/ggml/include/ggml-zdnn.h +17 -0
  1030. data/vendor/ggml/include/ggml-zendnn.h +22 -0
  1031. data/vendor/ggml/include/ggml.h +2845 -0
  1032. data/vendor/ggml/include/gguf.h +204 -0
  1033. data/vendor/ggml/requirements.txt +12 -0
  1034. data/vendor/ggml/scripts/gen-authors.sh +9 -0
  1035. data/vendor/ggml/scripts/release.sh +296 -0
  1036. data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
  1037. data/vendor/ggml/scripts/sync-llama.last +1 -0
  1038. data/vendor/ggml/scripts/sync-llama.sh +21 -0
  1039. data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
  1040. data/vendor/ggml/scripts/sync-whisper.last +1 -0
  1041. data/vendor/ggml/scripts/sync-whisper.sh +17 -0
  1042. data/vendor/ggml/src/CMakeLists.txt +493 -0
  1043. data/vendor/ggml/src/ggml-alloc.c +1248 -0
  1044. data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
  1045. data/vendor/ggml/src/ggml-backend-dl.h +45 -0
  1046. data/vendor/ggml/src/ggml-backend-impl.h +275 -0
  1047. data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
  1048. data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
  1049. data/vendor/ggml/src/ggml-backend.cpp +2371 -0
  1050. data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  1051. data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
  1052. data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
  1053. data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
  1054. data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
  1055. data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
  1056. data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
  1057. data/vendor/ggml/src/ggml-cann/common.h +651 -0
  1058. data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
  1059. data/vendor/ggml/src/ggml-common.h +1900 -0
  1060. data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
  1061. data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
  1062. data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
  1063. data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
  1064. data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  1065. data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  1066. data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  1067. data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
  1068. data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
  1069. data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
  1070. data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  1071. data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
  1072. data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  1073. data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
  1074. data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
  1075. data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  1076. data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
  1077. data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
  1078. data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  1079. data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
  1080. data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
  1081. data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
  1082. data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  1083. data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
  1084. data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  1085. data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
  1086. data/vendor/ggml/src/ggml-cpu/common.h +95 -0
  1087. data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
  1088. data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
  1089. data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
  1090. data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
  1091. data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
  1092. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
  1093. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  1094. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
  1095. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  1096. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
  1097. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  1098. data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
  1099. data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
  1100. data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
  1101. data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
  1102. data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
  1103. data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
  1104. data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
  1105. data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
  1106. data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
  1107. data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
  1108. data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
  1109. data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
  1110. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
  1111. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
  1112. data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
  1113. data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
  1114. data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
  1115. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
  1116. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
  1117. data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
  1118. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
  1119. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
  1120. data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
  1121. data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
  1122. data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
  1123. data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  1124. data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
  1125. data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
  1126. data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
  1127. data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
  1128. data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
  1129. data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
  1130. data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
  1131. data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
  1132. data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
  1133. data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
  1134. data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
  1135. data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
  1136. data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
  1137. data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
  1138. data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
  1139. data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
  1140. data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
  1141. data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
  1142. data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
  1143. data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
  1144. data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
  1145. data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
  1146. data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
  1147. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
  1148. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  1149. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  1150. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  1151. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
  1152. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
  1153. data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
  1154. data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  1155. data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
  1156. data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
  1157. data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
  1158. data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  1159. data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  1160. data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  1161. data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
  1162. data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
  1163. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
  1164. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  1165. data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
  1166. data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  1167. data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
  1168. data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
  1169. data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
  1170. data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
  1171. data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  1172. data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
  1173. data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
  1174. data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
  1175. data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
  1176. data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
  1177. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
  1178. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
  1179. data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
  1180. data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
  1181. data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
  1182. data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
  1183. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
  1184. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  1185. data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
  1186. data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
  1187. data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
  1188. data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
  1189. data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
  1190. data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
  1191. data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
  1192. data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
  1193. data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
  1194. data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
  1195. data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
  1196. data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
  1197. data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
  1198. data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
  1199. data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
  1200. data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
  1201. data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
  1202. data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
  1203. data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
  1204. data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
  1205. data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
  1206. data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
  1207. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  1208. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  1209. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  1210. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  1211. data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
  1212. data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  1213. data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
  1214. data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
  1215. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  1216. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  1217. data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
  1218. data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  1219. data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
  1220. data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
  1221. data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
  1222. data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
  1223. data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
  1224. data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
  1225. data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
  1226. data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
  1227. data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
  1228. data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
  1229. data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  1230. data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
  1231. data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
  1232. data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
  1233. data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
  1234. data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
  1235. data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
  1236. data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
  1237. data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
  1238. data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  1239. data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  1240. data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
  1241. data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  1242. data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
  1243. data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  1244. data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
  1245. data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
  1246. data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
  1247. data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  1248. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
  1249. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
  1250. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
  1251. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  1252. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  1253. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
  1254. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
  1255. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
  1256. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
  1257. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
  1258. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  1259. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  1260. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
  1261. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  1262. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
  1263. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
  1264. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  1265. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  1266. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  1267. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
  1268. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
  1269. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  1270. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  1271. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
  1272. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  1273. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
  1274. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  1275. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
  1276. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  1277. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  1278. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  1279. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  1280. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  1281. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
  1282. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
  1283. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
  1284. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
  1285. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
  1286. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
  1287. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
  1288. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
  1289. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  1290. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  1291. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  1292. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  1293. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  1294. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  1295. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
  1296. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  1297. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  1298. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  1299. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  1300. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  1301. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  1302. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
  1303. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  1304. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  1305. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  1306. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  1307. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  1308. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  1309. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
  1310. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  1311. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  1312. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  1313. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  1314. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  1315. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  1316. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
  1317. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  1318. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  1319. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  1320. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  1321. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  1322. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  1323. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
  1324. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  1325. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  1326. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  1327. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  1328. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  1329. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  1330. data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
  1331. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  1332. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  1333. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  1334. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  1335. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  1336. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  1337. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  1338. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  1339. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  1340. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  1341. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  1342. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  1343. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  1344. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  1345. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  1346. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  1347. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  1348. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  1349. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  1350. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  1351. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  1352. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  1353. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  1354. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  1355. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  1356. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
  1357. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
  1358. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  1359. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  1360. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  1361. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  1362. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  1363. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  1364. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  1365. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  1366. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  1367. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  1368. data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
  1369. data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
  1370. data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
  1371. data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
  1372. data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
  1373. data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
  1374. data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
  1375. data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  1376. data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
  1377. data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
  1378. data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
  1379. data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
  1380. data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
  1381. data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
  1382. data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
  1383. data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
  1384. data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
  1385. data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
  1386. data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
  1387. data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
  1388. data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
  1389. data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
  1390. data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
  1391. data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
  1392. data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  1393. data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
  1394. data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
  1395. data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
  1396. data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
  1397. data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
  1398. data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
  1399. data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
  1400. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  1401. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
  1402. data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
  1403. data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  1404. data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
  1405. data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
  1406. data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
  1407. data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
  1408. data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
  1409. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
  1410. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
  1411. data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
  1412. data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
  1413. data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
  1414. data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
  1415. data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  1416. data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
  1417. data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
  1418. data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
  1419. data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  1420. data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
  1421. data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  1422. data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  1423. data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  1424. data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
  1425. data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  1426. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
  1427. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  1428. data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  1429. data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
  1430. data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
  1431. data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
  1432. data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
  1433. data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
  1434. data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
  1435. data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
  1436. data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
  1437. data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
  1438. data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  1439. data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
  1440. data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
  1441. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  1442. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  1443. data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  1444. data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  1445. data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
  1446. data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
  1447. data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
  1448. data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
  1449. data/vendor/ggml/src/ggml-impl.h +783 -0
  1450. data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  1451. data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  1452. data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  1453. data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  1454. data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
  1455. data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
  1456. data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
  1457. data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
  1458. data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
  1459. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
  1460. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
  1461. data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
  1462. data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
  1463. data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
  1464. data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
  1465. data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
  1466. data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
  1467. data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
  1468. data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  1469. data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  1470. data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  1471. data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  1472. data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  1473. data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  1474. data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  1475. data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
  1476. data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  1477. data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
  1478. data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  1479. data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  1480. data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  1481. data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  1482. data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  1483. data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  1484. data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  1485. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  1486. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  1487. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  1488. data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  1489. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  1490. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
  1491. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
  1492. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
  1493. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
  1494. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
  1495. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
  1496. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
  1497. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  1498. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
  1499. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
  1500. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
  1501. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
  1502. data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
  1503. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  1504. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
  1505. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
  1506. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
  1507. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
  1508. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
  1509. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
  1510. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
  1511. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
  1512. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  1513. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
  1514. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
  1515. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
  1516. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
  1517. data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  1518. data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  1519. data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  1520. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  1521. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  1522. data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  1523. data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  1524. data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
  1525. data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
  1526. data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  1527. data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  1528. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  1529. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  1530. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  1531. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
  1532. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  1533. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  1534. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
  1535. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
  1536. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  1537. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  1538. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  1539. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  1540. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  1541. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  1542. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  1543. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  1544. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  1545. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  1546. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  1547. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  1548. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
  1549. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
  1550. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  1551. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  1552. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  1553. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  1554. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  1555. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  1556. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  1557. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  1558. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  1559. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  1560. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
  1561. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
  1562. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
  1563. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  1564. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  1565. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  1566. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  1567. data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  1568. data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  1569. data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  1570. data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  1571. data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  1572. data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  1573. data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  1574. data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  1575. data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  1576. data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  1577. data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  1578. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  1579. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  1580. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  1581. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  1582. data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  1583. data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  1584. data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  1585. data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  1586. data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  1587. data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  1588. data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  1589. data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  1590. data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
  1591. data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  1592. data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  1593. data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  1594. data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  1595. data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
  1596. data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  1597. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
  1598. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  1599. data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
  1600. data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
  1601. data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  1602. data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  1603. data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  1604. data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  1605. data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  1606. data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  1607. data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  1608. data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  1609. data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  1610. data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  1611. data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  1612. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  1613. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  1614. data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  1615. data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  1616. data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  1617. data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  1618. data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
  1619. data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  1620. data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  1621. data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  1622. data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  1623. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
  1624. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  1625. data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  1626. data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
  1627. data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
  1628. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  1629. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  1630. data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  1631. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  1632. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  1633. data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
  1634. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
  1635. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  1636. data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
  1637. data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
  1638. data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
  1639. data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
  1640. data/vendor/ggml/src/ggml-opt.cpp +1094 -0
  1641. data/vendor/ggml/src/ggml-quants.c +5491 -0
  1642. data/vendor/ggml/src/ggml-quants.h +112 -0
  1643. data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
  1644. data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
  1645. data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
  1646. data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
  1647. data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
  1648. data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
  1649. data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
  1650. data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
  1651. data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
  1652. data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  1653. data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
  1654. data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
  1655. data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
  1656. data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
  1657. data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
  1658. data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
  1659. data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
  1660. data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
  1661. data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  1662. data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  1663. data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
  1664. data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
  1665. data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
  1666. data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
  1667. data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
  1668. data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
  1669. data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
  1670. data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
  1671. data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  1672. data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
  1673. data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
  1674. data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
  1675. data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
  1676. data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
  1677. data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
  1678. data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
  1679. data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
  1680. data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
  1681. data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
  1682. data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
  1683. data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
  1684. data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
  1685. data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
  1686. data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
  1687. data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
  1688. data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
  1689. data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
  1690. data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
  1691. data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
  1692. data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
  1693. data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
  1694. data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
  1695. data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  1696. data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
  1697. data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
  1698. data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
  1699. data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
  1700. data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
  1701. data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
  1702. data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
  1703. data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
  1704. data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
  1705. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  1706. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  1707. data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
  1708. data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
  1709. data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
  1710. data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  1711. data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  1712. data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
  1713. data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
  1714. data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
  1715. data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
  1716. data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
  1717. data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
  1718. data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
  1719. data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  1720. data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
  1721. data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
  1722. data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
  1723. data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
  1724. data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
  1725. data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  1726. data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
  1727. data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
  1728. data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
  1729. data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
  1730. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  1731. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  1732. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  1733. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  1734. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
  1735. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  1736. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  1737. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  1738. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  1739. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  1740. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
  1741. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
  1742. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
  1743. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
  1744. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
  1745. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
  1746. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
  1747. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
  1748. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
  1749. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
  1750. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
  1751. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
  1752. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
  1753. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
  1754. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
  1755. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
  1756. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
  1757. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
  1758. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
  1759. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
  1760. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
  1761. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
  1762. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
  1763. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
  1764. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
  1765. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
  1766. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
  1767. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
  1768. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
  1769. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
  1770. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
  1771. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
  1772. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
  1773. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
  1774. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
  1775. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
  1776. data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  1777. data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  1778. data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
  1779. data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
  1780. data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
  1781. data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
  1782. data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
  1783. data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
  1784. data/vendor/ggml/src/ggml-threading.cpp +12 -0
  1785. data/vendor/ggml/src/ggml-threading.h +14 -0
  1786. data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  1787. data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  1788. data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  1789. data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  1790. data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  1791. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  1792. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  1793. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  1794. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  1795. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  1796. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  1797. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  1798. data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  1799. data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  1800. data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  1801. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  1802. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  1803. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  1804. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  1805. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  1806. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  1807. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
  1808. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
  1809. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  1810. data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
  1811. data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  1812. data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  1813. data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  1814. data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  1815. data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  1816. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  1817. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  1818. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  1819. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  1820. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  1821. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  1822. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
  1823. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  1824. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  1825. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  1826. data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
  1827. data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
  1828. data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
  1829. data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  1830. data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
  1831. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  1832. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  1833. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  1834. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  1835. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  1836. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  1837. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  1838. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  1839. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  1840. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  1841. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  1842. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  1843. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  1844. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  1845. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  1846. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  1847. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  1848. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  1849. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  1850. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
  1851. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  1852. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  1853. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  1854. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  1855. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  1856. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  1857. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  1858. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  1859. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
  1860. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
  1861. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  1862. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  1863. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  1864. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  1865. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  1866. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  1867. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  1868. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  1869. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  1870. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  1871. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  1872. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
  1873. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
  1874. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  1875. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  1876. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  1877. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  1878. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  1879. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  1880. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  1881. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  1882. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  1883. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  1884. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
  1885. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  1886. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  1887. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  1888. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
  1889. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  1890. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  1891. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  1892. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  1893. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  1894. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
  1895. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
  1896. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
  1897. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
  1898. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
  1899. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  1900. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
  1901. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  1902. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  1903. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
  1904. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  1905. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  1906. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  1907. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  1908. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  1909. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  1910. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
  1911. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  1912. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  1913. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  1914. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  1915. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
  1916. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
  1917. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  1918. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  1919. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  1920. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
  1921. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
  1922. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  1923. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  1924. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
  1925. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  1926. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  1927. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  1928. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  1929. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  1930. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  1931. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  1932. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  1933. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  1934. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  1935. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  1936. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  1937. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  1938. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  1939. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  1940. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  1941. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
  1942. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  1943. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  1944. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  1945. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
  1946. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  1947. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  1948. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
  1949. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  1950. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  1951. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  1952. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
  1953. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
  1954. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  1955. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  1956. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  1957. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  1958. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  1959. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  1960. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
  1961. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  1962. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  1963. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  1964. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  1965. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  1966. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  1967. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  1968. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  1969. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  1970. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
  1971. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  1972. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  1973. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  1974. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
  1975. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  1976. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  1977. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  1978. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  1979. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  1980. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  1981. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  1982. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  1983. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  1984. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  1985. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  1986. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  1987. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  1988. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  1989. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  1990. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  1991. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  1992. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  1993. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  1994. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  1995. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  1996. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  1997. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  1998. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  1999. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  2000. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  2001. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  2002. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  2003. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  2004. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  2005. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  2006. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
  2007. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  2008. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
  2009. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  2010. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  2011. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
  2012. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  2013. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  2014. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  2015. data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  2016. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
  2017. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
  2018. data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  2019. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
  2020. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  2021. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  2022. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  2023. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
  2024. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
  2025. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  2026. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
  2027. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
  2028. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  2029. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
  2030. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
  2031. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
  2032. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
  2033. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
  2034. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
  2035. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
  2036. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
  2037. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
  2038. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
  2039. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  2040. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
  2041. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
  2042. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
  2043. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
  2044. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
  2045. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
  2046. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
  2047. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
  2048. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
  2049. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  2050. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  2051. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
  2052. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
  2053. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
  2054. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  2055. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
  2056. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  2057. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
  2058. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
  2059. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
  2060. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
  2061. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  2062. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
  2063. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
  2064. data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  2065. data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
  2066. data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
  2067. data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  2068. data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  2069. data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
  2070. data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
  2071. data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  2072. data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
  2073. data/vendor/ggml/src/ggml.c +7777 -0
  2074. data/vendor/ggml/src/ggml.cpp +26 -0
  2075. data/vendor/ggml/src/gguf.cpp +1556 -0
  2076. data/vendor/ggml/tests/CMakeLists.txt +356 -0
  2077. data/vendor/ggml/tests/test-arange.cpp +100 -0
  2078. data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
  2079. data/vendor/ggml/tests/test-cont.c +170 -0
  2080. data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
  2081. data/vendor/ggml/tests/test-conv-transpose.c +248 -0
  2082. data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
  2083. data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
  2084. data/vendor/ggml/tests/test-conv1d.cpp +289 -0
  2085. data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
  2086. data/vendor/ggml/tests/test-conv2d.cpp +391 -0
  2087. data/vendor/ggml/tests/test-customop.c +300 -0
  2088. data/vendor/ggml/tests/test-dup.c +111 -0
  2089. data/vendor/ggml/tests/test-interpolate.cpp +166 -0
  2090. data/vendor/ggml/tests/test-opt.cpp +1003 -0
  2091. data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
  2092. data/vendor/ggml/tests/test-pool.c +274 -0
  2093. data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
  2094. data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
  2095. data/vendor/ggml/tests/test-rel-pos.c +87 -0
  2096. data/vendor/ggml/tests/test-roll.cpp +128 -0
  2097. data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
  2098. data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
  2099. data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
  2100. data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
  2101. data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
  2102. data/vendor-patches/0005-concat-backward.patch +36 -0
  2103. data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
  2104. data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
  2105. data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
  2106. data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
  2107. metadata +2161 -0
@@ -0,0 +1,2371 @@
1
+ // Note: porting this file to C++ is a work in progress
2
+
3
+ #ifdef _WIN32
4
+ #define WIN32_LEAN_AND_MEAN
5
+ #ifndef NOMINMAX
6
+ # define NOMINMAX
7
+ #endif
8
+ #include <windows.h>
9
+ #endif
10
+
11
+ #include "ggml-backend.h"
12
+ #include "ggml-backend-impl.h"
13
+ #include "ggml-alloc.h"
14
+ #include "ggml-impl.h"
15
+
16
+ #include <assert.h>
17
+ #include <limits.h>
18
+ #include <stdarg.h>
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+ #include <algorithm>
23
+ #include <vector>
24
+
25
+ #ifdef __APPLE__
26
+ #include <sys/types.h>
27
+ #include <sys/sysctl.h>
28
+ #endif
29
+
30
+
31
+ // backend buffer type
32
+
33
+ const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
34
+ GGML_ASSERT(buft);
35
+ return buft->iface.get_name(buft);
36
+ }
37
+
38
+ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
39
+ GGML_ASSERT(buft);
40
+ if (size == 0) {
41
+ // return a dummy buffer for zero-sized allocations
42
+ return ggml_backend_buffer_init(buft, {}, NULL, 0);
43
+ }
44
+ return buft->iface.alloc_buffer(buft, size);
45
+ }
46
+
47
+ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
48
+ GGML_ASSERT(buft);
49
+ return buft->iface.get_alignment(buft);
50
+ }
51
+
52
+ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
53
+ GGML_ASSERT(buft);
54
+ // get_max_size is optional, defaults to SIZE_MAX
55
+ if (buft->iface.get_max_size) {
56
+ return buft->iface.get_max_size(buft);
57
+ }
58
+ return SIZE_MAX;
59
+ }
60
+
61
+ size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
62
+ GGML_ASSERT(buft);
63
+ // get_alloc_size is optional, defaults to ggml_nbytes
64
+ if (buft->iface.get_alloc_size) {
65
+ size_t size = buft->iface.get_alloc_size(buft, tensor);
66
+ assert(size >= ggml_nbytes(tensor));
67
+ return size;
68
+ }
69
+ return ggml_nbytes(tensor);
70
+ }
71
+
72
+ bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
73
+ GGML_ASSERT(buft);
74
+ if (buft->iface.is_host) {
75
+ return buft->iface.is_host(buft);
76
+ }
77
+ return false;
78
+ }
79
+
80
+ ggml_backend_dev_t ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) {
81
+ GGML_ASSERT(buft);
82
+ return buft->device;
83
+ }
84
+
85
+ // backend buffer
86
+
87
+ ggml_backend_buffer_t ggml_backend_buffer_init(
88
+ ggml_backend_buffer_type_t buft,
89
+ struct ggml_backend_buffer_i iface,
90
+ void * context,
91
+ size_t size) {
92
+ ggml_backend_buffer_t buffer = new ggml_backend_buffer {
93
+ /* .interface = */ iface,
94
+ /* .buft = */ buft,
95
+ /* .context = */ context,
96
+ /* .size = */ size,
97
+ /* .usage = */ GGML_BACKEND_BUFFER_USAGE_ANY
98
+ };
99
+
100
+ return buffer;
101
+ }
102
+
103
+ const char * ggml_backend_buffer_name(ggml_backend_buffer_t buffer) {
104
+ return ggml_backend_buft_name(ggml_backend_buffer_get_type(buffer));
105
+ }
106
+
107
+ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
108
+ if (buffer == NULL) {
109
+ return;
110
+ }
111
+
112
+ if (buffer->iface.free_buffer != NULL) {
113
+ buffer->iface.free_buffer(buffer);
114
+ }
115
+ delete buffer;
116
+ }
117
+
118
+ size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
119
+ GGML_ASSERT(buffer);
120
+ return buffer->size;
121
+ }
122
+
123
+ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
124
+ GGML_ASSERT(buffer);
125
+ // get_base is optional if the buffer is zero-sized
126
+ if (!ggml_backend_buffer_is_meta(buffer) && buffer->size == 0) {
127
+ return NULL;
128
+ }
129
+
130
+ // FIXME JG: a multi_buffer has a non-zero size, according to the above comment get_base is not optional,
131
+ // I don't know whether the above comment is correct
132
+ if (!buffer->iface.get_base) {
133
+ return NULL;
134
+ }
135
+
136
+ void * base = buffer->iface.get_base(buffer);
137
+
138
+ GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
139
+
140
+ return base;
141
+ }
142
+
143
+ enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
144
+ GGML_ASSERT(buffer);
145
+ // init_tensor is optional
146
+ if (buffer->iface.init_tensor) {
147
+ return buffer->iface.init_tensor(buffer, tensor);
148
+ }
149
+ return GGML_STATUS_SUCCESS;
150
+ }
151
+
152
+ void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
153
+ GGML_ASSERT(buffer);
154
+ // clear is optional if the buffer is zero-sized
155
+ if (buffer->size == 0) {
156
+ return;
157
+ }
158
+
159
+ buffer->iface.clear(buffer, value);
160
+ }
161
+
162
+ size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) {
163
+ return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer));
164
+ }
165
+
166
+ size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) {
167
+ return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer));
168
+ }
169
+
170
+ size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor) {
171
+ return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
172
+ }
173
+
174
+ bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) {
175
+ return ggml_backend_buft_is_host(ggml_backend_buffer_get_type(buffer));
176
+ }
177
+
178
+ void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
179
+ GGML_ASSERT(buffer);
180
+ buffer->usage = usage;
181
+
182
+ // FIXME: add a generic callback to the buffer interface
183
+ if (ggml_backend_buffer_is_multi_buffer(buffer)) {
184
+ ggml_backend_multi_buffer_set_usage(buffer, usage);
185
+ }
186
+ }
187
+
188
+ enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage(ggml_backend_buffer_t buffer) {
189
+ GGML_ASSERT(buffer);
190
+ return buffer->usage;
191
+ }
192
+
193
+ ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) {
194
+ GGML_ASSERT(buffer);
195
+ return buffer->buft;
196
+ }
197
+
198
+ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
199
+ GGML_ASSERT(buffer);
200
+ if (buffer->iface.reset) {
201
+ buffer->iface.reset(buffer);
202
+ }
203
+ }
204
+
205
+ bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
206
+ ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
207
+ if (dst_buf->iface.cpy_tensor) {
208
+ return dst_buf->iface.cpy_tensor(dst_buf, src, dst);
209
+ }
210
+ return false;
211
+ }
212
+
213
+ // backend
214
+
215
+ ggml_guid_t ggml_backend_guid(ggml_backend_t backend) {
216
+ if (backend == NULL) {
217
+ return NULL;
218
+ }
219
+ return backend->guid;
220
+ }
221
+
222
+ const char * ggml_backend_name(ggml_backend_t backend) {
223
+ if (backend == NULL) {
224
+ return "NULL";
225
+ }
226
+ return backend->iface.get_name(backend);
227
+ }
228
+
229
+ void ggml_backend_free(ggml_backend_t backend) {
230
+ if (backend == NULL) {
231
+ return;
232
+ }
233
+
234
+ backend->iface.free(backend);
235
+ }
236
+
237
+ ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend) {
238
+ GGML_ASSERT(backend);
239
+ return ggml_backend_dev_buffer_type(backend->device);
240
+ }
241
+
242
+ ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) {
243
+ return ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), size);
244
+ }
245
+
246
+ size_t ggml_backend_get_alignment(ggml_backend_t backend) {
247
+ return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend));
248
+ }
249
+
250
+ size_t ggml_backend_get_max_size(ggml_backend_t backend) {
251
+ return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
252
+ }
253
+
254
+ void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
255
+ GGML_ASSERT(backend);
256
+ GGML_ASSERT(tensor);
257
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
258
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
259
+
260
+ if (backend->iface.set_tensor_async == NULL) {
261
+ ggml_backend_synchronize(backend);
262
+ ggml_backend_tensor_set(tensor, data, offset, size);
263
+ } else {
264
+ backend->iface.set_tensor_async(backend, tensor, data, offset, size);
265
+ }
266
+ }
267
+
268
+ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
269
+ GGML_ASSERT(backend);
270
+ GGML_ASSERT(tensor);
271
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
272
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
273
+
274
+ if (backend->iface.get_tensor_async == NULL) {
275
+ ggml_backend_synchronize(backend);
276
+ ggml_backend_tensor_get(tensor, data, offset, size);
277
+ } else {
278
+ backend->iface.get_tensor_async(backend, tensor, data, offset, size);
279
+ }
280
+ }
281
+
282
+ void ggml_backend_tensor_set_2d_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size,
283
+ size_t n_copies, size_t stride_tensor, size_t stride_data) {
284
+ GGML_ASSERT(backend);
285
+ GGML_ASSERT(tensor);
286
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
287
+
288
+ if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
289
+ for (size_t i = 0; i < n_copies; i++) {
290
+ ggml_backend_tensor_set_async(backend, tensor, (const char *) data + i*stride_data, offset + i*stride_tensor, size);
291
+ }
292
+ return;
293
+ }
294
+ if (size == 0) {
295
+ return;
296
+ }
297
+
298
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
299
+ GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
300
+ backend->iface.set_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
301
+ }
302
+
303
+ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size,
304
+ size_t n_copies, size_t stride_tensor, size_t stride_data) {
305
+ GGML_ASSERT(backend);
306
+ GGML_ASSERT(tensor);
307
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
308
+
309
+ if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
310
+ for (size_t i = 0; i < n_copies; i++) {
311
+ ggml_backend_tensor_get_async(backend, tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
312
+ }
313
+ return;
314
+ }
315
+ if (size == 0) {
316
+ return;
317
+ }
318
+
319
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
320
+ GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
321
+ backend->iface.get_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
322
+ }
323
+
324
+ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
325
+ GGML_ASSERT(tensor);
326
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
327
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
328
+
329
+ if (size == 0) {
330
+ return;
331
+ }
332
+
333
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
334
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
335
+
336
+ buf->iface.set_tensor(buf, tensor, data, offset, size);
337
+ }
338
+
339
+ void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
340
+ GGML_ASSERT(tensor);
341
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
342
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
343
+
344
+ if (size == 0) {
345
+ return;
346
+ }
347
+
348
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
349
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
350
+
351
+ buf->iface.get_tensor(buf, tensor, data, offset, size);
352
+ }
353
+
354
+ void ggml_backend_tensor_set_2d(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size,
355
+ size_t n_copies, size_t stride_tensor, size_t stride_data) {
356
+ GGML_ASSERT(tensor);
357
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
358
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
359
+
360
+ if (n_copies <= 1 || buf->iface.set_tensor_2d == NULL) {
361
+ for (size_t i = 0; i < n_copies; i++) {
362
+ ggml_backend_tensor_set(tensor, (const char *) data + i*stride_data, offset + i*stride_tensor, size);
363
+ }
364
+ return;
365
+ }
366
+ if (size == 0) {
367
+ return;
368
+ }
369
+
370
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
371
+ GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
372
+
373
+ buf->iface.set_tensor_2d(buf, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
374
+ }
375
+
376
+ void ggml_backend_tensor_get_2d(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size,
377
+ size_t n_copies, size_t stride_tensor, size_t stride_data) {
378
+ GGML_ASSERT(tensor);
379
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
380
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
381
+
382
+ if (n_copies <= 1 || buf->iface.set_tensor_2d == NULL) {
383
+ for (size_t i = 0; i < n_copies; i++) {
384
+ ggml_backend_tensor_get(tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
385
+ }
386
+ return;
387
+ }
388
+ if (size == 0) {
389
+ return;
390
+ }
391
+
392
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
393
+ GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
394
+
395
+ buf->iface.get_tensor_2d(buf, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
396
+ }
397
+
398
+ void ggml_backend_tensor_memset(struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
399
+ GGML_ASSERT(tensor);
400
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
401
+
402
+ if (size == 0) {
403
+ return;
404
+ }
405
+
406
+ GGML_ASSERT(buf != NULL && "tensor buffer not set");
407
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
408
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
409
+ GGML_ASSERT(buf->iface.memset_tensor != NULL && "memset not implemented by backend buffer");
410
+
411
+ buf->iface.memset_tensor(buf, tensor, value, offset, size);
412
+ }
413
+
414
+ void ggml_backend_synchronize(ggml_backend_t backend) {
415
+ GGML_ASSERT(backend);
416
+ if (backend->iface.synchronize == NULL) {
417
+ return;
418
+ }
419
+
420
+ backend->iface.synchronize(backend);
421
+ }
422
+
423
+ ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
424
+ GGML_ASSERT(backend);
425
+ GGML_ASSERT(backend->iface.graph_plan_create != NULL);
426
+
427
+ return backend->iface.graph_plan_create(backend, cgraph);
428
+ }
429
+
430
+ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
431
+ GGML_ASSERT(backend);
432
+ GGML_ASSERT(backend->iface.graph_plan_free != NULL);
433
+
434
+ backend->iface.graph_plan_free(backend, plan);
435
+ }
436
+
437
+ enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
438
+ GGML_ASSERT(backend);
439
+ GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
440
+
441
+ return backend->iface.graph_plan_compute(backend, plan);
442
+ }
443
+
444
+ enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
445
+ enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph);
446
+ ggml_backend_synchronize(backend);
447
+ return err;
448
+ }
449
+
450
+ enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
451
+ GGML_ASSERT(backend);
452
+ return backend->iface.graph_compute(backend, cgraph);
453
+ }
454
+
455
+ bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
456
+ GGML_ASSERT(backend);
457
+ return ggml_backend_dev_supports_op(backend->device, op);
458
+ }
459
+
460
+ bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
461
+ GGML_ASSERT(backend);
462
+ return ggml_backend_dev_supports_buft(backend->device, buft);
463
+ }
464
+
465
+ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) {
466
+ GGML_ASSERT(backend);
467
+ return ggml_backend_dev_offload_op(backend->device, op);
468
+ }
469
+
470
+ ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
471
+ GGML_ASSERT(backend);
472
+ return backend->device;
473
+ }
474
+
475
+ // backend copy
476
+
477
+ void ggml_backend_tensor_copy(const struct ggml_tensor * src, struct ggml_tensor * dst) {
478
+ GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
479
+
480
+ if (src == dst) {
481
+ return;
482
+ }
483
+
484
+ if (ggml_backend_buffer_is_host(src->buffer)) {
485
+ ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));
486
+ } else if (ggml_backend_buffer_is_host(dst->buffer)) {
487
+ ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
488
+ } else if (!ggml_backend_buffer_copy_tensor(src, dst)) {
489
+ #ifndef NDEBUG
490
+ GGML_LOG_DEBUG("%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer));
491
+ #endif // NDEBUG
492
+ size_t nbytes = ggml_nbytes(src);
493
+ void * data = malloc(nbytes);
494
+ ggml_backend_tensor_get(src, data, 0, nbytes);
495
+ ggml_backend_tensor_set(dst, data, 0, nbytes);
496
+ free(data);
497
+ }
498
+ }
499
+
500
+ void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst) {
501
+ GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
502
+
503
+ if (src == dst) {
504
+ return;
505
+ }
506
+
507
+ GGML_ASSERT(backend_dst);
508
+ if (backend_dst->iface.cpy_tensor_async != NULL) {
509
+ if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
510
+ return;
511
+ }
512
+ }
513
+
514
+ // an async copy would normally happen after all the queued operations on both backends are completed
515
+ // to simulate the same behavior, we need to synchronize both backends first, and do a blocking copy
516
+ ggml_backend_synchronize(backend_src);
517
+ ggml_backend_synchronize(backend_dst);
518
+ ggml_backend_tensor_copy(src, dst);
519
+ }
520
+
521
+ // events
522
+
523
+ ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device) {
524
+ // null device is allowed for the transition period to the device interface
525
+ if (device == NULL || device->iface.event_new == NULL) {
526
+ return NULL;
527
+ }
528
+ return device->iface.event_new(device);
529
+ }
530
+
531
+ void ggml_backend_event_free(ggml_backend_event_t event) {
532
+ if (event == NULL) {
533
+ return;
534
+ }
535
+ event->device->iface.event_free(event->device, event);
536
+ }
537
+
538
+ void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend) {
539
+ GGML_ASSERT(backend);
540
+ GGML_ASSERT(backend->iface.event_record != NULL);
541
+
542
+ backend->iface.event_record(backend, event);
543
+ }
544
+
545
+ void ggml_backend_event_synchronize(ggml_backend_event_t event) {
546
+ GGML_ASSERT(event);
547
+ GGML_ASSERT(event->device->iface.event_synchronize);
548
+
549
+ event->device->iface.event_synchronize(event->device, event);
550
+ }
551
+
552
+ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
553
+ GGML_ASSERT(backend);
554
+ GGML_ASSERT(backend->iface.event_wait != NULL);
555
+
556
+ backend->iface.event_wait(backend, event);
557
+ }
558
+
559
+ static void ggml_backend_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
560
+ GGML_ASSERT(backend);
561
+ if (backend->iface.graph_optimize != NULL) {
562
+ backend->iface.graph_optimize(backend, cgraph);
563
+ }
564
+ }
565
+
566
+ // Backend device
567
+
568
+ const char * ggml_backend_dev_name(ggml_backend_dev_t device) {
569
+ GGML_ASSERT(device);
570
+ return device->iface.get_name(device);
571
+ }
572
+
573
+ const char * ggml_backend_dev_description(ggml_backend_dev_t device) {
574
+ GGML_ASSERT(device);
575
+ return device->iface.get_description(device);
576
+ }
577
+
578
+ void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
579
+ GGML_ASSERT(device);
580
+ device->iface.get_memory(device, free, total);
581
+ }
582
+
583
+ enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
584
+ GGML_ASSERT(device);
585
+ return device->iface.get_type(device);
586
+ }
587
+
588
+ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
589
+ GGML_ASSERT(device);
590
+ memset(props, 0, sizeof(*props));
591
+ device->iface.get_props(device, props);
592
+ }
593
+
594
+ ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
595
+ GGML_ASSERT(device);
596
+ return device->reg;
597
+ }
598
+
599
+ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params) {
600
+ GGML_ASSERT(device);
601
+ return device->iface.init_backend(device, params);
602
+ }
603
+
604
+ ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device) {
605
+ GGML_ASSERT(device);
606
+ return device->iface.get_buffer_type(device);
607
+ }
608
+
609
+ ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
610
+ GGML_ASSERT(device);
611
+ if (device->iface.get_host_buffer_type == NULL) {
612
+ return NULL;
613
+ }
614
+
615
+ return device->iface.get_host_buffer_type(device);
616
+ }
617
+
618
+ ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size) {
619
+ GGML_ASSERT(device);
620
+ return device->iface.buffer_from_host_ptr(device, ptr, size, max_tensor_size);
621
+ }
622
+
623
+ bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
624
+ GGML_ASSERT(device);
625
+ return device->iface.supports_op(device, op);
626
+ }
627
+
628
+ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) {
629
+ GGML_ASSERT(device);
630
+ return device->iface.supports_buft(device, buft);
631
+ }
632
+
633
+ bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
634
+ GGML_ASSERT(device);
635
+ if (device->iface.offload_op != NULL) {
636
+ return device->iface.offload_op(device, op);
637
+ }
638
+
639
+ return false;
640
+ }
641
+
642
+ // Backend (reg)
643
+
644
+ const char * ggml_backend_reg_name(ggml_backend_reg_t reg) {
645
+ GGML_ASSERT(reg);
646
+ return reg->iface.get_name(reg);
647
+ }
648
+
649
+ size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg) {
650
+ GGML_ASSERT(reg);
651
+ return reg->iface.get_device_count(reg);
652
+ }
653
+
654
+ ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index) {
655
+ GGML_ASSERT(reg);
656
+ return reg->iface.get_device(reg, index);
657
+ }
658
+
659
+ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
660
+ GGML_ASSERT(reg);
661
+ if (!reg->iface.get_proc_address) {
662
+ return NULL;
663
+ }
664
+ return reg->iface.get_proc_address(reg, name);
665
+ }
666
+
667
+ // multi-buffer buffer
668
+
669
+ struct ggml_backend_multi_buffer_context {
670
+ ggml_backend_buffer_t * buffers;
671
+ size_t n_buffers;
672
+ };
673
+
674
+ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) {
675
+ GGML_ASSERT(buffer);
676
+ ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
677
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
678
+ ggml_backend_buffer_free(ctx->buffers[i]);
679
+ }
680
+
681
+ free(ctx->buffers);
682
+ free(ctx);
683
+ }
684
+
685
+ static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
686
+ GGML_ASSERT(buffer);
687
+ ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
688
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
689
+ ggml_backend_buffer_clear(ctx->buffers[i], value);
690
+ }
691
+ }
692
+
693
+ static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = {
694
+ /* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
695
+ /* .get_base = */ NULL,
696
+ /* .init_tensor = */ NULL,
697
+ /* .memset_tensor = */ NULL,
698
+ /* .set_tensor = */ NULL,
699
+ /* .get_tensor = */ NULL,
700
+ /* .set_tensor_2d = */ NULL,
701
+ /* .get_tensor_2d = */ NULL,
702
+ /* .cpy_tensor = */ NULL,
703
+ /* .clear = */ ggml_backend_multi_buffer_clear,
704
+ /* .reset = */ NULL,
705
+ };
706
+
707
+ ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) {
708
+ ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) malloc(sizeof(struct ggml_backend_multi_buffer_context));
709
+ ctx->n_buffers = n_buffers;
710
+ ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t));
711
+
712
+ GGML_ASSERT(ctx->buffers != NULL);
713
+
714
+ size_t total_size = 0;
715
+ for (size_t i = 0; i < n_buffers; i++) {
716
+ ctx->buffers[i] = buffers[i];
717
+ total_size += ggml_backend_buffer_get_size(buffers[i]);
718
+ }
719
+
720
+ return ggml_backend_buffer_init(buffers[0]->buft, ggml_backend_multi_buffer_i, ctx, total_size);
721
+ }
722
+
723
+ bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) {
724
+ GGML_ASSERT(buffer);
725
+ return buffer->iface.free_buffer == ggml_backend_multi_buffer_free_buffer;
726
+ }
727
+
728
+ void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
729
+ GGML_ASSERT(buffer);
730
+ GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer));
731
+ ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
732
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
733
+ ggml_backend_buffer_set_usage(ctx->buffers[i], usage);
734
+ }
735
+ }
736
+
737
+ // creates a copy of the tensor with the same memory layout
738
+ static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) {
739
+ struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor);
740
+ for (int i = 0; i < GGML_MAX_DIMS; i++) {
741
+ dup->nb[i] = tensor->nb[i];
742
+ }
743
+ return dup;
744
+ }
745
+
746
+ static bool ggml_is_view_op(enum ggml_op op) {
747
+ return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
748
+ }
749
+
750
+ // scheduler
751
+
752
+ #ifndef GGML_SCHED_MAX_BACKENDS
753
+ #define GGML_SCHED_MAX_BACKENDS 16
754
+ #endif
755
+
756
+ #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
757
+ #define GGML_SCHED_MAX_SPLIT_INPUTS 30
758
+ #endif
759
+
760
+ #ifndef GGML_SCHED_MAX_COPIES
761
+ #define GGML_SCHED_MAX_COPIES 4
762
+ #endif
763
+
764
+ struct ggml_backend_sched_split {
765
+ int backend_id;
766
+ int i_start;
767
+ int i_end;
768
+ struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
769
+ int n_inputs;
770
+ // graph view of this split
771
+ struct ggml_cgraph graph;
772
+ };
773
+
774
+ struct ggml_backend_sched {
775
+ bool is_reset; // true if the scheduler has been reset since the last graph split
776
+ bool is_alloc;
777
+
778
+ int n_backends;
779
+
780
+ ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS];
781
+ ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
782
+ ggml_gallocr_t galloc;
783
+
784
+ // hash map of the nodes in the graph
785
+ struct ggml_hash_set hash_set;
786
+ int * hv_tensor_backend_ids; // [hash_set.size]
787
+ struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies]
788
+
789
+ int * node_backend_ids; // [graph_size]
790
+ int * leaf_backend_ids; // [graph_size]
791
+
792
+ int * prev_node_backend_ids; // [graph_size]
793
+ int * prev_leaf_backend_ids; // [graph_size]
794
+
795
+ // copy of the graph with modified inputs
796
+ struct ggml_cgraph graph;
797
+
798
+ // graph splits
799
+ struct ggml_backend_sched_split * splits;
800
+ int n_splits;
801
+ int splits_capacity;
802
+
803
+ // pipeline parallelism support
804
+ int n_copies;
805
+ int cur_copy;
806
+ int next_copy;
807
+ ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
808
+ struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
809
+ int n_graph_inputs;
810
+
811
+ struct ggml_context * ctx;
812
+
813
+ ggml_backend_sched_eval_callback callback_eval;
814
+ void * callback_eval_user_data;
815
+
816
+ char * context_buffer;
817
+ size_t context_buffer_size;
818
+
819
+ bool op_offload;
820
+
821
+ int debug;
822
+
823
+ // used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
824
+ // ref: https://github.com/ggml-org/llama.cpp/pull/17617
825
+ int debug_realloc;
826
+ int debug_graph_size;
827
+ int debug_prev_graph_size;
828
+ };
829
+
830
+ #define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
831
+ #define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)]
832
+ #define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)]
833
+ #define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id)
834
+
835
+ // returns the priority of the backend, lower id is higher priority
836
+ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
837
+ for (int i = 0; i < sched->n_backends; i++) {
838
+ if (sched->backends[i] == backend) {
839
+ return i;
840
+ }
841
+ }
842
+ return -1;
843
+ }
844
+
845
+ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) {
846
+ ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
847
+ if (buffer == NULL) {
848
+ return -1;
849
+ }
850
+
851
+ // find highest prio backend that supports the buffer type and the op
852
+ for (int i = 0; i < sched->n_backends; i++) {
853
+ if (ggml_backend_supports_buft(sched->backends[i], buffer->buft) &&
854
+ ggml_backend_supports_op(sched->backends[i], op)) {
855
+ return i;
856
+ }
857
+ }
858
+
859
+ #ifndef NDEBUG
860
+ GGML_LOG_DEBUG("%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n",
861
+ __func__, ggml_op_desc(tensor), ggml_backend_buffer_name(buffer), tensor->name);
862
+ #endif
863
+
864
+ return -1;
865
+ }
866
+
867
+ #if 0
868
+ #define GGML_SCHED_MAX_SPLITS_DEBUG 4096
869
+ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
870
+ #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
871
+ #define GET_CAUSE(node) causes[hash_id(node)]
872
+ #else
873
+ #define SET_CAUSE(node, ...)
874
+ #define GET_CAUSE(node) ""
875
+ #endif
876
+
877
+ // returns the backend that should be used for the node based on the current locations
878
+ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
879
+ // assign pre-allocated nodes to their backend
880
+ int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
881
+ if (cur_backend_id != -1) {
882
+ SET_CAUSE(tensor, "1.dst");
883
+ return cur_backend_id;
884
+ }
885
+
886
+ // view_src
887
+ if (tensor->view_src != NULL) {
888
+ cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src, tensor);
889
+ if (cur_backend_id != -1) {
890
+ SET_CAUSE(tensor, "1.vsrc");
891
+ return cur_backend_id;
892
+ }
893
+ }
894
+
895
+ if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
896
+ // since the tensor is pre-allocated, it cannot be moved to another backend
897
+ ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
898
+ GGML_ABORT("pre-allocated tensor (%s) in a buffer (%s) that cannot run the operation (%s)", tensor->name, ggml_backend_buffer_name(buffer), ggml_op_name(tensor->op));
899
+ }
900
+
901
+ // graph input
902
+ if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
903
+ cur_backend_id = sched->n_backends - 1; // last backend (assumed CPU)
904
+ SET_CAUSE(tensor, "1.inp");
905
+ return cur_backend_id;
906
+ }
907
+
908
+ // operations with weights are preferably run on the same backend as the weights
909
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
910
+ const struct ggml_tensor * src = tensor->src[i];
911
+ if (src == NULL) {
912
+ continue;
913
+ }
914
+ // skip ROPE since the rope freqs tensor is too small to choose a backend based on it
915
+ // not an ideal solution
916
+ if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
917
+ int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
918
+ // check if a backend with higher prio wants to offload the op
919
+ if (sched->op_offload && src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
920
+ for (int b = 0; b < src_backend_id; b++) {
921
+ if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
922
+ SET_CAUSE(tensor, "1.off");
923
+ return b;
924
+ }
925
+ }
926
+ }
927
+ SET_CAUSE(tensor, "1.wgt%d", i);
928
+ return src_backend_id;
929
+ }
930
+ }
931
+
932
+ return -1;
933
+ }
934
+
935
+ static char * fmt_size(size_t size) {
936
+ static char buffer[128];
937
+ if (size >= 1024*1024) {
938
+ snprintf(buffer, sizeof(buffer), "%zuM", size/1024/1024);
939
+ } else {
940
+ snprintf(buffer, sizeof(buffer), "%zuK", size/1024);
941
+ }
942
+ return buffer;
943
+ }
944
+
945
+ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
946
+ int cur_split = 0;
947
+ for (int i = 0; i < graph->n_nodes; i++) {
948
+ if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
949
+ ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id];
950
+ GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs", cur_split, ggml_backend_name(split_backend),
951
+ sched->splits[cur_split].n_inputs);
952
+ for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
953
+ if (j == 0) {
954
+ GGML_LOG_DEBUG(": ");
955
+ }
956
+ GGML_LOG_DEBUG("[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
957
+ fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
958
+ }
959
+ GGML_LOG_DEBUG("\n");
960
+ cur_split++;
961
+ }
962
+ struct ggml_tensor * node = graph->nodes[i];
963
+ if (ggml_is_view_op(node->op)) {
964
+ continue;
965
+ }
966
+ if (sched->debug > 1) {
967
+ ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
968
+ GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d,c=%d:", i, ggml_op_desc(node), node->name,
969
+ fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
970
+ graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)], node->flags & GGML_TENSOR_FLAG_COMPUTE ? 1 : 0);
971
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
972
+ struct ggml_tensor * src = node->src[j];
973
+ if (src == NULL) {
974
+ continue;
975
+ }
976
+ ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
977
+ GGML_LOG_DEBUG(" %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
978
+ fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
979
+ }
980
+ GGML_LOG_DEBUG("\n");
981
+ }
982
+ }
983
+ }
984
+
985
+ static bool ggml_backend_sched_buffer_supported(ggml_backend_sched_t sched, struct ggml_tensor * t, int backend_id) {
986
+ ggml_backend_buffer_t buf = t->view_src ? t->view_src->buffer : t->buffer;
987
+ ggml_backend_buffer_type_t buft = NULL;
988
+
989
+ if (buf) {
990
+ // the tensor is already allocated
991
+ buft = buf->buft;
992
+ } else {
993
+ // see if the tensor already has a backend assigned, and use the buffer type of that backend
994
+ int tensor_backend_id = tensor_backend_id(t);
995
+ if (tensor_backend_id == -1 && t->view_src) {
996
+ tensor_backend_id = tensor_backend_id(t->view_src);
997
+ }
998
+ if (tensor_backend_id != -1) {
999
+ buft = sched->bufts[tensor_backend_id];
1000
+ }
1001
+ }
1002
+
1003
+ return buft != NULL && ggml_backend_supports_buft(sched->backends[backend_id], buft);
1004
+ }
1005
+
1006
+ static void ggml_backend_sched_set_if_supported(ggml_backend_sched_t sched, struct ggml_tensor * node, int cur_backend_id, int * node_backend_id) {
1007
+ if (ggml_backend_supports_op(sched->backends[cur_backend_id], node)) {
1008
+ *node_backend_id = cur_backend_id;
1009
+ SET_CAUSE(node, "2.sup");
1010
+ }
1011
+ }
1012
+
1013
+ // assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend
1014
+ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1015
+ // reset splits
1016
+ sched->n_splits = 0;
1017
+ sched->n_graph_inputs = 0;
1018
+ sched->is_reset = false;
1019
+
1020
+ struct ggml_init_params params = {
1021
+ /* .mem_size = */ sched->context_buffer_size,
1022
+ /* .mem_buffer = */ sched->context_buffer,
1023
+ /* .no_alloc = */ true
1024
+ };
1025
+
1026
+ ggml_free(sched->ctx);
1027
+
1028
+ sched->ctx = ggml_init(params);
1029
+ if (sched->ctx == NULL) {
1030
+ GGML_ABORT("%s: failed to initialize context\n", __func__);
1031
+ }
1032
+
1033
+ graph->uid = ggml_graph_next_uid();
1034
+
1035
+ // pass 1: assign backends to ops with pre-allocated inputs
1036
+ for (int i = 0; i < graph->n_leafs; i++) {
1037
+ struct ggml_tensor * leaf = graph->leafs[i];
1038
+ int * leaf_backend_id = &tensor_backend_id(leaf);
1039
+ // do not overwrite user assignments
1040
+ if (*leaf_backend_id == -1) {
1041
+ *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
1042
+ }
1043
+ }
1044
+
1045
+ for (int i = 0; i < graph->n_nodes; i++) {
1046
+ struct ggml_tensor * node = graph->nodes[i];
1047
+ int * node_backend_id = &tensor_backend_id(node);
1048
+ // do not overwrite user assignments
1049
+ if (*node_backend_id == -1) {
1050
+ *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
1051
+
1052
+ #if 0
1053
+ // src
1054
+ if (node->op == GGML_OP_NONE) {
1055
+ continue;
1056
+ }
1057
+
1058
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1059
+ struct ggml_tensor * src = node->src[j];
1060
+ if (src == NULL) {
1061
+ continue;
1062
+ }
1063
+ int * src_backend_id = &tensor_backend_id(src);
1064
+ if (*src_backend_id == -1) {
1065
+ *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
1066
+ }
1067
+ }
1068
+ #endif
1069
+ }
1070
+ }
1071
+
1072
+ // pass 2: expand current backend assignments
1073
+ // assign the same backend to adjacent nodes
1074
+ // expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend)
1075
+ // thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
1076
+ // ops unsupported by the backend being expanded will be left unassigned so that they can be assigned later when the locations of its inputs are known
1077
+ // expand gpu down
1078
+ {
1079
+ int cur_backend_id = -1;
1080
+ for (int i = 0; i < graph->n_nodes; i++) {
1081
+ struct ggml_tensor * node = graph->nodes[i];
1082
+ if (ggml_is_view_op(node->op)) {
1083
+ continue;
1084
+ }
1085
+ int * node_backend_id = &tensor_backend_id(node);
1086
+ if (*node_backend_id != -1) {
1087
+ if (*node_backend_id == sched->n_backends - 1) {
1088
+ // skip cpu (lowest prio backend)
1089
+ cur_backend_id = -1;
1090
+ } else {
1091
+ cur_backend_id = *node_backend_id;
1092
+ }
1093
+ } else if (cur_backend_id != -1) {
1094
+ ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id);
1095
+ }
1096
+ }
1097
+ }
1098
+ // expand gpu up
1099
+ {
1100
+ int cur_backend_id = -1;
1101
+ for (int i = graph->n_nodes - 1; i >= 0; i--) {
1102
+ struct ggml_tensor * node = graph->nodes[i];
1103
+ if (ggml_is_view_op(node->op)) {
1104
+ continue;
1105
+ }
1106
+ int * node_backend_id = &tensor_backend_id(node);
1107
+ if (*node_backend_id != -1) {
1108
+ if (*node_backend_id == sched->n_backends - 1) {
1109
+ // skip cpu (lowest prio backend)
1110
+ cur_backend_id = -1;
1111
+ } else {
1112
+ cur_backend_id = *node_backend_id;
1113
+ }
1114
+ } else if (cur_backend_id != -1) {
1115
+ ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id);
1116
+ }
1117
+ }
1118
+ }
1119
+ // expand rest down
1120
+ {
1121
+ int cur_backend_id = -1;
1122
+ for (int i = 0; i < graph->n_nodes; i++) {
1123
+ struct ggml_tensor * node = graph->nodes[i];
1124
+ if (ggml_is_view_op(node->op)) {
1125
+ continue;
1126
+ }
1127
+ int * node_backend_id = &tensor_backend_id(node);
1128
+ if (*node_backend_id != -1) {
1129
+ cur_backend_id = *node_backend_id;
1130
+ } else if (cur_backend_id != -1) {
1131
+ ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id);
1132
+ }
1133
+ }
1134
+ }
1135
+ // expand rest up
1136
+ {
1137
+ int cur_backend_id = -1;
1138
+ for (int i = graph->n_nodes - 1; i >= 0; i--) {
1139
+ struct ggml_tensor * node = graph->nodes[i];
1140
+ if (ggml_is_view_op(node->op)) {
1141
+ continue;
1142
+ }
1143
+ int * node_backend_id = &tensor_backend_id(node);
1144
+ if (*node_backend_id != -1) {
1145
+ cur_backend_id = *node_backend_id;
1146
+ } else if (cur_backend_id != -1) {
1147
+ ggml_backend_sched_set_if_supported(sched, node, cur_backend_id, node_backend_id);
1148
+ }
1149
+ }
1150
+ }
1151
+
1152
+ // pass 3: upgrade nodes to higher prio backends with compatible buffer types
1153
+ // if the tensor is already in the same buffer type (*) as another higher priority backend, we should move it there
1154
+ // however, we also need to verify that the sources are in compatible buffer types
1155
+ // (*) the actual requirement is more relaxed, the buffer type of the backend should be supported by all the users of this tensor further down the graph
1156
+ // however, this is slow to verify, so we have a more strict requirement that the buffer type is the same
1157
+ // this is not uncommon since multiple backends can use host memory, with the same buffer type (eg. BLAS and CPU)
1158
+ // additionally, set remaining unassigned nodes to the backend with the most supported inputs
1159
+ // only nodes that could not be assigned during expansion due to the backend not supporting the op should be unassigned at this point
1160
+ for (int i = 0; i < graph->n_nodes; i++) {
1161
+ struct ggml_tensor * node = graph->nodes[i];
1162
+ if (ggml_is_view_op(node->op)) {
1163
+ continue;
1164
+ }
1165
+ int * node_backend_id = &tensor_backend_id(node);
1166
+ if (*node_backend_id == -1) {
1167
+ // unassigned node: find the backend with the most supported inputs
1168
+ int n_supported_best = -1;
1169
+ for (int b = 0; b < sched->n_backends; b++) {
1170
+ if (ggml_backend_supports_op(sched->backends[b], node)) {
1171
+ int n_supported = 0;
1172
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1173
+ struct ggml_tensor * src = node->src[j];
1174
+ if (src == NULL) {
1175
+ continue;
1176
+ }
1177
+ if ((tensor_backend_id(src) != -1 || tensor_backend_id(src->view_src) != -1) && ggml_backend_sched_buffer_supported(sched, src, b)) {
1178
+ n_supported++;
1179
+ }
1180
+ }
1181
+ if (n_supported > n_supported_best) {
1182
+ n_supported_best = n_supported;
1183
+ *node_backend_id = b;
1184
+ SET_CAUSE(node, "3.best");
1185
+ }
1186
+ }
1187
+ }
1188
+ } else {
1189
+ // assigned node: upgrade to higher prio backend if possible
1190
+ for (int b = 0; b < *node_backend_id; b++) {
1191
+ if (sched->bufts[b] == sched->bufts[*node_backend_id] && ggml_backend_supports_op(sched->backends[b], node)) {
1192
+ bool supported = true;
1193
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1194
+ struct ggml_tensor * src = node->src[j];
1195
+ if (src == NULL) {
1196
+ continue;
1197
+ }
1198
+ if (!ggml_backend_sched_buffer_supported(sched, src, b)) {
1199
+ supported = false;
1200
+ break;
1201
+ }
1202
+ }
1203
+ if (supported) {
1204
+ *node_backend_id = b;
1205
+ SET_CAUSE(node, "3.upg");
1206
+ break;
1207
+ }
1208
+ }
1209
+ }
1210
+ }
1211
+ }
1212
+
1213
+ // pass 4: assign backends to remaining src from dst and view_src
1214
+ for (int i = 0; i < graph->n_nodes; i++) {
1215
+ struct ggml_tensor * node = graph->nodes[i];
1216
+ int * cur_backend_id = &tensor_backend_id(node);
1217
+ if (node->view_src != NULL && *cur_backend_id == -1) {
1218
+ *cur_backend_id = tensor_backend_id(node->view_src);
1219
+ SET_CAUSE(node, "4.vsrc");
1220
+ }
1221
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1222
+ struct ggml_tensor * src = node->src[j];
1223
+ if (src == NULL) {
1224
+ continue;
1225
+ }
1226
+ int * src_backend_id = &tensor_backend_id(src);
1227
+ if (*src_backend_id == -1) {
1228
+ if (src->view_src != NULL) {
1229
+ // views are always on the same backend as the source
1230
+ *src_backend_id = tensor_backend_id(src->view_src);
1231
+ SET_CAUSE(src, "4.vsrc");
1232
+ } else {
1233
+ *src_backend_id = *cur_backend_id;
1234
+ SET_CAUSE(src, "4.cur");
1235
+ }
1236
+ }
1237
+ }
1238
+ // if the node is still unassigned, assign it to the first backend that supports it
1239
+ for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) {
1240
+ ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id);
1241
+ }
1242
+ GGML_ASSERT(*cur_backend_id != -1);
1243
+ }
1244
+
1245
+ // pass 5: split graph, find tensors that need to be copied
1246
+ {
1247
+ int i_split = 0;
1248
+ struct ggml_backend_sched_split * split = &sched->splits[0];
1249
+ // find the backend of the first split, skipping view ops
1250
+ int i = 0;
1251
+ for (; i < graph->n_nodes; i++) {
1252
+ struct ggml_tensor * node = graph->nodes[i];
1253
+ if (!ggml_is_view_op(node->op)) {
1254
+ split->backend_id = tensor_backend_id(node);
1255
+ break;
1256
+ }
1257
+ }
1258
+ split->i_start = 0;
1259
+ split->n_inputs = 0;
1260
+ int cur_backend_id = split->backend_id;
1261
+ for (; i < graph->n_nodes; i++) {
1262
+ struct ggml_tensor * node = graph->nodes[i];
1263
+
1264
+ if (ggml_is_view_op(node->op)) {
1265
+ continue;
1266
+ }
1267
+
1268
+ const int node_backend_id = tensor_backend_id(node);
1269
+
1270
+ GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
1271
+
1272
+ // check if we should start a new split based on the sources of the current node
1273
+ bool need_new_split = false;
1274
+ if (node_backend_id == cur_backend_id && split->n_inputs > 0) {
1275
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1276
+ struct ggml_tensor * src = node->src[j];
1277
+ if (src == NULL) {
1278
+ continue;
1279
+ }
1280
+ // check if a weight is on a different and incompatible backend
1281
+ // by starting a new split, the memory of the previously offloaded weights can be reused
1282
+ if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
1283
+ int src_backend_id = tensor_backend_id(src);
1284
+ if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
1285
+ need_new_split = true;
1286
+ break;
1287
+ }
1288
+ }
1289
+ // check if the split has too many inputs
1290
+ // FIXME: count the number of inputs instead of only checking when full
1291
+ if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
1292
+ const size_t id = hash_id(src);
1293
+ int src_backend_id = sched->hv_tensor_backend_ids[id];
1294
+ bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
1295
+ if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) {
1296
+ need_new_split = true;
1297
+ break;
1298
+ }
1299
+ }
1300
+ }
1301
+ }
1302
+
1303
+ if (node_backend_id != cur_backend_id || need_new_split) {
1304
+ split->i_end = i;
1305
+ i_split++;
1306
+ if (i_split >= sched->splits_capacity) {
1307
+ sched->splits_capacity *= 2;
1308
+ sched->splits = (ggml_backend_sched_split *)
1309
+ realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
1310
+ GGML_ASSERT(sched->splits != NULL);
1311
+ }
1312
+ split = &sched->splits[i_split];
1313
+ split->backend_id = node_backend_id;
1314
+ split->i_start = i;
1315
+ split->n_inputs = 0;
1316
+ cur_backend_id = node_backend_id;
1317
+ }
1318
+
1319
+ // find inputs that are not on the same backend
1320
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
1321
+ struct ggml_tensor * src = node->src[j];
1322
+ if (src == NULL) {
1323
+ continue;
1324
+ }
1325
+
1326
+ size_t src_id = hash_id(src);
1327
+ const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
1328
+ GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now
1329
+
1330
+ if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
1331
+ if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
1332
+ ggml_backend_t backend = sched->backends[src_backend_id];
1333
+ for (int c = 0; c < sched->n_copies; c++) {
1334
+ struct ggml_tensor * tensor_copy;
1335
+ if (c == sched->cur_copy) {
1336
+ tensor_copy = src; // use the original tensor as the current copy
1337
+ } else {
1338
+ tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
1339
+ ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
1340
+ }
1341
+ ggml_set_input(tensor_copy);
1342
+ ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
1343
+ tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
1344
+ SET_CAUSE(tensor_copy, "4.cpy");
1345
+ }
1346
+ int n_graph_inputs = sched->n_graph_inputs++;
1347
+ GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
1348
+ sched->graph_inputs[n_graph_inputs] = src;
1349
+ }
1350
+ }
1351
+
1352
+ if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
1353
+ // create a copy of the input in the split's backend
1354
+ if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) {
1355
+ ggml_backend_t backend = sched->backends[cur_backend_id];
1356
+ for (int c = 0; c < sched->n_copies; c++) {
1357
+ struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
1358
+ ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
1359
+ if (sched->n_copies > 1) {
1360
+ ggml_set_input(tensor_copy);
1361
+ ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
1362
+ }
1363
+ tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
1364
+ SET_CAUSE(tensor_copy, "4.cpy");
1365
+ }
1366
+ int n_inputs = split->n_inputs++;
1367
+ GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
1368
+ split->inputs[n_inputs] = src;
1369
+ }
1370
+ node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy);
1371
+ }
1372
+ }
1373
+ }
1374
+ split->i_end = graph->n_nodes;
1375
+ sched->n_splits = i_split + 1;
1376
+ }
1377
+
1378
+ if (sched->debug) {
1379
+ ggml_backend_sched_print_assignments(sched, graph);
1380
+ }
1381
+
1382
+ // swap node_backend_ids and leaf _backend_ids with prevs
1383
+ {
1384
+ int * tmp = sched->node_backend_ids;
1385
+ sched->node_backend_ids = sched->prev_node_backend_ids;
1386
+ sched->prev_node_backend_ids = tmp;
1387
+
1388
+ tmp = sched->leaf_backend_ids;
1389
+ sched->leaf_backend_ids = sched->prev_leaf_backend_ids;
1390
+ sched->prev_leaf_backend_ids = tmp;
1391
+ }
1392
+
1393
+ int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
1394
+
1395
+ // remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
1396
+ sched->debug_prev_graph_size = sched->debug_graph_size;
1397
+ sched->debug_graph_size = graph_size;
1398
+
1399
+ if (sched->graph.size < graph_size) {
1400
+ sched->graph.size = graph_size;
1401
+ sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
1402
+ sched->graph.leafs = (ggml_tensor **) realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *));
1403
+ GGML_ASSERT(sched->graph.nodes != NULL);
1404
+ GGML_ASSERT(sched->graph.leafs != NULL);
1405
+ }
1406
+ sched->graph.n_nodes = 0;
1407
+ sched->graph.n_leafs = 0;
1408
+
1409
+ struct ggml_cgraph * graph_copy = &sched->graph;
1410
+
1411
+ for (int i = 0; i < sched->n_splits; i++) {
1412
+ struct ggml_backend_sched_split * split = &sched->splits[i];
1413
+ split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
1414
+
1415
+ // Optimize this split of the graph. This needs to happen before we make graph_copy,
1416
+ // so they are in sync.
1417
+ ggml_backend_graph_optimize(sched->backends[split->backend_id], &split->graph);
1418
+
1419
+ // add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
1420
+ for (int j = 0; j < split->n_inputs; j++) {
1421
+ assert(graph_copy->size > (graph_copy->n_nodes + 1));
1422
+
1423
+ struct ggml_tensor * input = split->inputs[j];
1424
+ const size_t input_id = hash_id(input);
1425
+ struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy);
1426
+
1427
+ // add a dependency to the input source so that it is not freed before the copy is done
1428
+ struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
1429
+ input_dep->src[0] = input;
1430
+ sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id];
1431
+ graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
1432
+
1433
+ // add a dependency to the input copy so that it is allocated at the start of the split
1434
+ sched->node_backend_ids[graph_copy->n_nodes] = split->backend_id;
1435
+ graph_copy->nodes[graph_copy->n_nodes++] = input_cpy;
1436
+ }
1437
+
1438
+ for (int j = split->i_start; j < split->i_end; j++) {
1439
+ assert(graph_copy->size > graph_copy->n_nodes);
1440
+ sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(graph->nodes[j]);
1441
+ graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
1442
+ }
1443
+ }
1444
+
1445
+ if (sched->n_copies > 1) {
1446
+ // add input copies as leafs so that they are allocated first
1447
+ for (int i = 0; i < sched->n_graph_inputs; i++) {
1448
+ struct ggml_tensor * input = sched->graph_inputs[i];
1449
+ size_t id = hash_id(input);
1450
+ int backend_id = tensor_backend_id(input);
1451
+ for (int c = 0; c < sched->n_copies; c++) {
1452
+ struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
1453
+ sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
1454
+ assert(graph_copy->size > graph_copy->n_leafs);
1455
+ graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
1456
+ }
1457
+ }
1458
+
1459
+ for (int i = 0; i < sched->n_splits; i++) {
1460
+ struct ggml_backend_sched_split * split = &sched->splits[i];
1461
+ int backend_id = split->backend_id;
1462
+ for (int j = 0; j < split->n_inputs; j++) {
1463
+ struct ggml_tensor * input = split->inputs[j];
1464
+ size_t id = hash_id(input);
1465
+ for (int c = 0; c < sched->n_copies; c++) {
1466
+ struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
1467
+ sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
1468
+ assert(graph_copy->size > graph_copy->n_leafs);
1469
+ graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
1470
+ }
1471
+ }
1472
+ }
1473
+ }
1474
+
1475
+ // add leafs from the original graph
1476
+ for (int i = 0; i < graph->n_leafs; i++) {
1477
+ struct ggml_tensor * leaf = graph->leafs[i];
1478
+ sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
1479
+ assert(graph_copy->size > graph_copy->n_leafs);
1480
+ graph_copy->leafs[graph_copy->n_leafs++] = leaf;
1481
+ }
1482
+
1483
+ // set ids for all splits
1484
+ for (int i = 0; i < sched->n_splits; ++i) {
1485
+ sched->splits[i].graph.uid = ggml_graph_next_uid();
1486
+ }
1487
+ }
1488
+
1489
+ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
1490
+ bool backend_ids_changed = false;
1491
+ for (int i = 0; i < sched->graph.n_nodes; i++) {
1492
+ if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
1493
+ sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
1494
+ backend_ids_changed = true;
1495
+ break;
1496
+ }
1497
+ }
1498
+ if (!backend_ids_changed) {
1499
+ for (int i = 0; i < sched->graph.n_leafs; i++) {
1500
+ if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
1501
+ sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
1502
+ backend_ids_changed = true;
1503
+ break;
1504
+ }
1505
+ }
1506
+ }
1507
+
1508
+ // allocate graph
1509
+ if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1510
+ #ifndef NDEBUG
1511
+ GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
1512
+ #endif
1513
+
1514
+ if (sched->debug_realloc > 0) {
1515
+ // we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
1516
+ // example: https://github.com/ggml-org/llama.cpp/pull/17143
1517
+ const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
1518
+
1519
+ if (unexpected || sched->debug_realloc > 1) {
1520
+ GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
1521
+ sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
1522
+ }
1523
+ }
1524
+
1525
+ // the re-allocation may cause the split inputs to be moved to a different address
1526
+ // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
1527
+ for (int i = 0; i < sched->n_backends; i++) {
1528
+ ggml_backend_synchronize(sched->backends[i]);
1529
+ }
1530
+
1531
+ ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
1532
+ if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1533
+ GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__);
1534
+ return false;
1535
+ }
1536
+ }
1537
+
1538
+ return true;
1539
+ }
1540
+
1541
+ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1542
+ GGML_ASSERT(sched);
1543
+ struct ggml_backend_sched_split * splits = sched->splits;
1544
+
1545
+ ggml_tensor * prev_ids_tensor = nullptr;
1546
+ std::vector<int32_t> ids;
1547
+ std::vector<ggml_bitset_t> used_ids;
1548
+
1549
+ for (int split_id = 0; split_id < sched->n_splits; split_id++) {
1550
+ struct ggml_backend_sched_split * split = &splits[split_id];
1551
+ int split_backend_id = split->backend_id;
1552
+ ggml_backend_t split_backend = sched->backends[split_backend_id];
1553
+
1554
+ // copy the input tensors to the split backend
1555
+ for (int input_id = 0; input_id < split->n_inputs; input_id++) {
1556
+ ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[input_id]);
1557
+ struct ggml_tensor * input = split->inputs[input_id];
1558
+ struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
1559
+
1560
+ if (input->flags & GGML_TENSOR_FLAG_INPUT) {
1561
+ // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
1562
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
1563
+ ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
1564
+ } else {
1565
+ ggml_backend_synchronize(split_backend);
1566
+ }
1567
+ ggml_backend_tensor_copy(input, input_cpy);
1568
+ } else {
1569
+ // wait for the split backend to finish using the input before overwriting it
1570
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
1571
+ ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
1572
+ } else {
1573
+ ggml_backend_synchronize(split_backend);
1574
+ }
1575
+
1576
+ // when offloading MoE weights, we can reduce the amount of data copied by copying only the experts that are used
1577
+ ggml_tensor * node = split->graph.nodes[0];
1578
+ if (split->graph.n_nodes > 0 &&
1579
+ ggml_backend_buffer_get_usage(input->buffer) == GGML_BACKEND_BUFFER_USAGE_WEIGHTS &&
1580
+ ggml_backend_buffer_is_host(input->buffer) && (
1581
+ (node->src[0] == input_cpy && node->op == GGML_OP_MUL_MAT_ID)
1582
+ //|| (node->src[1] == input_cpy && node->op == GGML_OP_ADD_ID) /* GGML_OP_ADD_ID weights are small and not worth splitting */
1583
+ )) {
1584
+
1585
+ const int64_t n_expert = node->op == GGML_OP_MUL_MAT_ID ? input->ne[2] : input->ne[1];
1586
+ const size_t expert_size = node->op == GGML_OP_MUL_MAT_ID ? input->nb[2] : input->nb[1];
1587
+
1588
+ ggml_backend_synchronize(input_backend);
1589
+
1590
+ // get the ids
1591
+ ggml_tensor * ids_tensor = node->src[2];
1592
+ ggml_backend_t ids_backend = split_backend;
1593
+
1594
+ // if the ids tensor is also an input of the split, it may not have been copied yet to the split backend
1595
+ // in that case, we use the original ids tensor
1596
+ for (int i = input_id + 1; i < split->n_inputs; i++) {
1597
+ if (ids_tensor == tensor_copy(split->inputs[i], split_backend_id, sched->cur_copy)) {
1598
+ ids_tensor = split->inputs[i];
1599
+ ids_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[i]);
1600
+ break;
1601
+ }
1602
+ }
1603
+
1604
+ if (ids_tensor != prev_ids_tensor) {
1605
+ ids.resize(ggml_nbytes(ids_tensor) / sizeof(int32_t));
1606
+ ggml_backend_tensor_get_async(ids_backend, ids_tensor, ids.data(), 0, ggml_nbytes(ids_tensor));
1607
+ ggml_backend_synchronize(ids_backend);
1608
+
1609
+ // find the used experts
1610
+ used_ids.clear();
1611
+ used_ids.resize(ggml_bitset_size(n_expert));
1612
+ for (int64_t i1 = 0; i1 < ids_tensor->ne[1]; i1++) {
1613
+ for (int64_t i0 = 0; i0 < ids_tensor->ne[0]; i0++) {
1614
+ int32_t id = ids[i1 * ids_tensor->nb[1]/sizeof(int32_t) + i0 * ids_tensor->nb[0]/sizeof(int32_t)];
1615
+ GGML_ASSERT(id >= 0 && id < n_expert);
1616
+ ggml_bitset_set(used_ids.data(), id);
1617
+ }
1618
+ }
1619
+
1620
+ prev_ids_tensor = ids_tensor;
1621
+ }
1622
+
1623
+ // group consecutive experts and copy them together
1624
+ auto copy_experts = [&](int32_t first_id, int32_t last_id) {
1625
+ const size_t expert_offset = first_id * expert_size;
1626
+ const size_t expert_size_copy = (last_id - first_id + 1) * expert_size;
1627
+ const size_t padding = std::min<size_t>(expert_size, 512);
1628
+ const size_t padding_end = last_id < n_expert - 1 ? padding : 0;
1629
+
1630
+ ggml_backend_tensor_set_async(split_backend,
1631
+ input_cpy,
1632
+ (const uint8_t *)input->data + expert_offset, expert_offset,
1633
+ // copy a bit extra at the to ensure there are no NaNs in the padding of the last expert
1634
+ // this is necessary for MMQ in the CUDA backend
1635
+ expert_size_copy + padding_end);
1636
+ };
1637
+
1638
+ int id = 0;
1639
+ while (!ggml_bitset_get(used_ids.data(), id)) {
1640
+ id++;
1641
+ }
1642
+ int32_t first_id = id;
1643
+ int32_t last_id = first_id;
1644
+
1645
+ for (++id; id < n_expert; ++id) {
1646
+ if (!ggml_bitset_get(used_ids.data(), id)) {
1647
+ continue;
1648
+ }
1649
+
1650
+ if (id == last_id + 1) {
1651
+ last_id = id;
1652
+ continue;
1653
+ }
1654
+
1655
+ copy_experts(first_id, last_id);
1656
+
1657
+ first_id = id;
1658
+ last_id = id;
1659
+ }
1660
+ copy_experts(first_id, last_id);
1661
+ } else {
1662
+ // try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1663
+ // TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
1664
+ if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1665
+ ggml_backend_synchronize(input_backend);
1666
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
1667
+ ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
1668
+ } else {
1669
+ ggml_backend_synchronize(split_backend);
1670
+ }
1671
+ ggml_backend_tensor_copy(input, input_cpy);
1672
+ }
1673
+ }
1674
+ }
1675
+ }
1676
+
1677
+ if (!sched->callback_eval) {
1678
+ enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
1679
+ if (ec != GGML_STATUS_SUCCESS) {
1680
+ return ec;
1681
+ }
1682
+ } else {
1683
+ // similar to ggml_backend_compare_graph_backend
1684
+ for (int j0 = 0; j0 < split->graph.n_nodes; j0++) {
1685
+ struct ggml_tensor * t = split->graph.nodes[j0];
1686
+
1687
+ // check if the user needs data from this node
1688
+ bool need = sched->callback_eval(t, true, sched->callback_eval_user_data);
1689
+
1690
+ int j1 = j0;
1691
+
1692
+ // determine the range [j0, j1] of nodes that can be computed together
1693
+ while (!need && j1 < split->graph.n_nodes - 1) {
1694
+ t = split->graph.nodes[++j1];
1695
+ need = sched->callback_eval(t, true, sched->callback_eval_user_data);
1696
+ }
1697
+
1698
+ struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
1699
+
1700
+ enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv);
1701
+ if (ec != GGML_STATUS_SUCCESS) {
1702
+ return ec;
1703
+ }
1704
+
1705
+ // TODO: pass backend to the callback, then the user can decide if they want to synchronize
1706
+ ggml_backend_synchronize(split_backend);
1707
+
1708
+ if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
1709
+ break;
1710
+ }
1711
+
1712
+ j0 = j1;
1713
+ }
1714
+ }
1715
+
1716
+ // record the event of this copy
1717
+ if (split->n_inputs > 0) {
1718
+ if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
1719
+ ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy], split_backend);
1720
+ }
1721
+ }
1722
+ }
1723
+
1724
+ return GGML_STATUS_SUCCESS;
1725
+ }
1726
+
1727
+ ggml_backend_sched_t ggml_backend_sched_new(
1728
+ ggml_backend_t * backends,
1729
+ ggml_backend_buffer_type_t * bufts,
1730
+ int n_backends,
1731
+ size_t graph_size,
1732
+ bool parallel,
1733
+ bool op_offload) {
1734
+ GGML_ASSERT(n_backends > 0);
1735
+ GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
1736
+ GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU);
1737
+
1738
+ struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched));
1739
+
1740
+ const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
1741
+ sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
1742
+
1743
+ sched->debug_realloc = 0;
1744
+ #ifdef GGML_SCHED_NO_REALLOC
1745
+ sched->debug_realloc = 1;
1746
+ #endif
1747
+ const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
1748
+ sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
1749
+
1750
+ sched->n_backends = n_backends;
1751
+ sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
1752
+
1753
+ // initialize hash table
1754
+ // FIXME: needs to be size*2 to account for leafs (do it in graph_split instead)
1755
+ sched->hash_set = ggml_hash_set_new(graph_size);
1756
+ sched->hv_tensor_backend_ids = (int *) malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
1757
+ sched->hv_tensor_copies = (ggml_tensor **) malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
1758
+
1759
+ const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
1760
+ const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
1761
+ sched->node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
1762
+ sched->leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
1763
+ sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
1764
+ sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
1765
+
1766
+ sched->debug_graph_size = 0;
1767
+ sched->debug_prev_graph_size = 0;
1768
+
1769
+ sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
1770
+ sched->context_buffer = (char *) malloc(sched->context_buffer_size);
1771
+
1772
+ const int initial_splits_capacity = 16;
1773
+ sched->splits = (ggml_backend_sched_split *) calloc(initial_splits_capacity, sizeof(sched->splits[0]));
1774
+ sched->splits_capacity = initial_splits_capacity;
1775
+
1776
+ for (int b = 0; b < n_backends; b++) {
1777
+ sched->backends[b] = backends[b];
1778
+ sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
1779
+ GGML_ASSERT(ggml_backend_supports_buft(backends[b], sched->bufts[b]));
1780
+
1781
+ if (sched->n_copies > 1) {
1782
+ for (int c = 0; c < sched->n_copies; c++) {
1783
+ sched->events[b][c] = ggml_backend_event_new(backends[b]->device);
1784
+ }
1785
+ }
1786
+ }
1787
+
1788
+ sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
1789
+ sched->op_offload = op_offload;
1790
+
1791
+ ggml_backend_sched_reset(sched);
1792
+
1793
+ return sched;
1794
+ }
1795
+
1796
+ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
1797
+ if (sched == NULL) {
1798
+ return;
1799
+ }
1800
+ for (int b = 0; b < sched->n_backends; b++) {
1801
+ for (int c = 0; c < sched->n_copies; c++) {
1802
+ ggml_backend_event_free(sched->events[b][c]);
1803
+ }
1804
+ }
1805
+ ggml_gallocr_free(sched->galloc);
1806
+ ggml_free(sched->ctx);
1807
+ ggml_hash_set_free(&sched->hash_set);
1808
+ free(sched->splits);
1809
+ free(sched->hv_tensor_backend_ids);
1810
+ free(sched->hv_tensor_copies);
1811
+ free(sched->node_backend_ids);
1812
+ free(sched->leaf_backend_ids);
1813
+ free(sched->prev_node_backend_ids);
1814
+ free(sched->prev_leaf_backend_ids);
1815
+ free(sched->context_buffer);
1816
+ free(sched->graph.nodes);
1817
+ free(sched->graph.leafs);
1818
+ free(sched);
1819
+ }
1820
+
1821
+ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
1822
+ GGML_ASSERT(sched);
1823
+ // reset state for the next run
1824
+ if (!sched->is_reset) {
1825
+ ggml_hash_set_reset(&sched->hash_set);
1826
+ memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
1827
+ memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
1828
+ sched->is_reset = true;
1829
+ }
1830
+ sched->is_alloc = false;
1831
+ }
1832
+
1833
+ void ggml_backend_sched_reserve_size(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph, size_t * sizes) {
1834
+ GGML_ASSERT(sched);
1835
+ GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
1836
+ GGML_ASSERT(sizes);
1837
+
1838
+ ggml_backend_sched_reset(sched);
1839
+
1840
+ ggml_backend_sched_synchronize(sched);
1841
+
1842
+ ggml_backend_sched_split_graph(sched, measure_graph);
1843
+
1844
+ ggml_gallocr_reserve_n_size(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids, sizes);
1845
+ }
1846
+
1847
+ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
1848
+ GGML_ASSERT(sched);
1849
+ GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
1850
+
1851
+ ggml_backend_sched_synchronize(sched);
1852
+
1853
+ ggml_backend_sched_split_graph(sched, measure_graph);
1854
+
1855
+ if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
1856
+ return false;
1857
+ }
1858
+
1859
+ ggml_backend_sched_reset(sched);
1860
+
1861
+ return true;
1862
+ }
1863
+
1864
+ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1865
+ GGML_ASSERT(sched);
1866
+ GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
1867
+ GGML_ASSERT(!sched->is_alloc);
1868
+
1869
+ sched->cur_copy = sched->next_copy;
1870
+ sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
1871
+
1872
+ ggml_backend_sched_split_graph(sched, graph);
1873
+
1874
+ if (!ggml_backend_sched_alloc_splits(sched)) {
1875
+ return false;
1876
+ }
1877
+
1878
+ sched->is_alloc = true;
1879
+
1880
+ return true;
1881
+ }
1882
+
1883
+ enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1884
+ enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
1885
+ ggml_backend_sched_synchronize(sched);
1886
+ return err;
1887
+ }
1888
+
1889
+ enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1890
+ GGML_ASSERT(sched);
1891
+ if (!sched->is_reset && !sched->is_alloc) {
1892
+ ggml_backend_sched_reset(sched);
1893
+ }
1894
+
1895
+ if (!sched->is_alloc) {
1896
+ if (!ggml_backend_sched_alloc_graph(sched, graph)) {
1897
+ return GGML_STATUS_ALLOC_FAILED;
1898
+ }
1899
+ }
1900
+
1901
+ return ggml_backend_sched_compute_splits(sched);
1902
+ }
1903
+
1904
+ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
1905
+ GGML_ASSERT(sched);
1906
+ for (int i = 0; i < sched->n_backends; i++) {
1907
+ ggml_backend_synchronize(sched->backends[i]);
1908
+ }
1909
+ if (!sched->is_alloc) {
1910
+ // if the graph is not already allocated, always use copy 0 after a synchronization
1911
+ // this ensures that during generation the same copy is used every time,
1912
+ // which avoids changes in the graph that could cause CUDA or other graphs to be disabled
1913
+ sched->next_copy = 0;
1914
+ }
1915
+ }
1916
+
1917
+ void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
1918
+ GGML_ASSERT(sched);
1919
+ sched->callback_eval = callback;
1920
+ sched->callback_eval_user_data = user_data;
1921
+ }
1922
+
1923
+ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
1924
+ GGML_ASSERT(sched);
1925
+ return sched->n_splits;
1926
+ }
1927
+
1928
+ int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
1929
+ GGML_ASSERT(sched);
1930
+ return sched->n_copies;
1931
+ }
1932
+
1933
+ int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) {
1934
+ GGML_ASSERT(sched);
1935
+ return sched->n_backends;
1936
+ }
1937
+
1938
+ ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) {
1939
+ GGML_ASSERT(sched);
1940
+ GGML_ASSERT(i >= 0 && i < sched->n_backends);
1941
+ return sched->backends[i];
1942
+ }
1943
+
1944
+ ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend) {
1945
+ GGML_ASSERT(sched);
1946
+ int backend_index = ggml_backend_sched_backend_id(sched, backend);
1947
+ GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
1948
+
1949
+ return sched->bufts[backend_index];
1950
+ }
1951
+
1952
+ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
1953
+ GGML_ASSERT(sched);
1954
+ int backend_index = ggml_backend_sched_backend_id(sched, backend);
1955
+ GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
1956
+
1957
+ return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
1958
+ }
1959
+
1960
+ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
1961
+ GGML_ASSERT(sched);
1962
+ int backend_index = ggml_backend_sched_backend_id(sched, backend);
1963
+ GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
1964
+ tensor_backend_id(node) = backend_index;
1965
+ SET_CAUSE(node, "usr");
1966
+ sched->is_reset = false;
1967
+ }
1968
+
1969
+ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
1970
+ GGML_ASSERT(sched);
1971
+ int backend_index = tensor_backend_id(node);
1972
+ if (backend_index == -1) {
1973
+ return NULL;
1974
+ }
1975
+ return sched->backends[backend_index];
1976
+ }
1977
+
1978
+ // utils
1979
+
1980
+ enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
1981
+ GGML_ASSERT(tensor);
1982
+ GGML_ASSERT(tensor->buffer == NULL);
1983
+ GGML_ASSERT(tensor->view_src != NULL);
1984
+ GGML_ASSERT(tensor->view_src->buffer != NULL);
1985
+ GGML_ASSERT(tensor->view_src->data != NULL);
1986
+
1987
+ tensor->buffer = tensor->view_src->buffer;
1988
+ tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
1989
+ return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
1990
+ }
1991
+
1992
+ enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
1993
+ GGML_ASSERT(tensor);
1994
+ GGML_ASSERT(tensor->buffer == NULL);
1995
+ GGML_ASSERT(tensor->data == NULL);
1996
+ GGML_ASSERT(tensor->view_src == NULL);
1997
+ GGML_ASSERT(addr >= ggml_backend_buffer_get_base(buffer));
1998
+ GGML_ASSERT(ggml_backend_buffer_is_meta(buffer) ||
1999
+ (char *) addr + ggml_backend_buffer_get_alloc_size(buffer, tensor) <=
2000
+ (char *) ggml_backend_buffer_get_base(buffer) + ggml_backend_buffer_get_size(buffer));
2001
+
2002
+ tensor->buffer = buffer;
2003
+ tensor->data = addr;
2004
+ return ggml_backend_buffer_init_tensor(buffer, tensor);
2005
+ }
2006
+
2007
+ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
2008
+ struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) {
2009
+
2010
+ GGML_ASSERT(src != NULL);
2011
+ GGML_ASSERT(src->data && "graph must be allocated");
2012
+
2013
+ size_t id = ggml_hash_insert(&hash_set, src);
2014
+ if (id == GGML_HASHSET_ALREADY_EXISTS) {
2015
+ return node_copies[ggml_hash_find(&hash_set, src)];
2016
+ }
2017
+
2018
+ struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
2019
+ if (src->view_src != NULL) {
2020
+ dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src);
2021
+ dst->view_offs = src->view_offs;
2022
+ }
2023
+ dst->op = src->op;
2024
+ dst->flags = src->flags;
2025
+ memcpy(dst->op_params, src->op_params, sizeof(dst->op_params));
2026
+ ggml_set_name(dst, src->name);
2027
+
2028
+ // copy src
2029
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
2030
+ struct ggml_tensor * s = src->src[i];
2031
+ if (s == NULL) {
2032
+ continue;
2033
+ }
2034
+ dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s);
2035
+ }
2036
+
2037
+ node_copies[id] = dst;
2038
+ return dst;
2039
+ }
2040
+
2041
+ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
2042
+ size_t id = ggml_hash_find(hash_set, src);
2043
+ if (node_init[id]) {
2044
+ return;
2045
+ }
2046
+ node_init[id] = true;
2047
+
2048
+ struct ggml_tensor * dst = node_copies[id];
2049
+ if (dst->view_src != NULL) {
2050
+ graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
2051
+ enum ggml_status status = ggml_backend_view_init(dst);
2052
+ GGML_ASSERT(status == GGML_STATUS_SUCCESS);
2053
+ }
2054
+ else {
2055
+ ggml_backend_tensor_copy(src, dst);
2056
+ }
2057
+
2058
+ // init src
2059
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
2060
+ struct ggml_tensor * s = src->src[i];
2061
+ if (s == NULL) {
2062
+ continue;
2063
+ }
2064
+ graph_copy_init_tensor(hash_set, node_copies, node_init, s);
2065
+ }
2066
+ }
2067
+
2068
+ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
2069
+ GGML_ASSERT(graph);
2070
+ struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
2071
+ struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
2072
+ bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0]));
2073
+
2074
+ struct ggml_init_params params = {
2075
+ /* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
2076
+ /* .mem_buffer = */ NULL,
2077
+ /* .no_alloc = */ true
2078
+ };
2079
+
2080
+ struct ggml_context * ctx_allocated = ggml_init(params);
2081
+ struct ggml_context * ctx_unallocated = ggml_init(params);
2082
+
2083
+ if (ctx_allocated == NULL || ctx_unallocated == NULL) {
2084
+ GGML_LOG_ERROR("%s: failed to allocate context for graph copy\n", __func__);
2085
+ ggml_hash_set_free(&hash_set);
2086
+ free(node_copies);
2087
+ free(node_init);
2088
+ ggml_free(ctx_allocated);
2089
+ ggml_free(ctx_unallocated);
2090
+ return {
2091
+ /* .buffer = */ NULL,
2092
+ /* .ctx_allocated = */ NULL,
2093
+ /* .ctx_unallocated = */ NULL,
2094
+ /* .graph = */ NULL,
2095
+ };
2096
+ }
2097
+
2098
+ // dup nodes
2099
+ for (int i = 0; i < graph->n_nodes; i++) {
2100
+ struct ggml_tensor * node = graph->nodes[i];
2101
+ graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node);
2102
+ }
2103
+
2104
+ // allocate nodes
2105
+ ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
2106
+ if (buffer == NULL) {
2107
+ GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__);
2108
+ ggml_hash_set_free(&hash_set);
2109
+ free(node_copies);
2110
+ free(node_init);
2111
+ ggml_free(ctx_allocated);
2112
+ ggml_free(ctx_unallocated);
2113
+ return {
2114
+ /* .buffer = */ NULL,
2115
+ /* .ctx_allocated = */ NULL,
2116
+ /* .ctx_unallocated = */ NULL,
2117
+ /* .graph = */ NULL,
2118
+ };
2119
+ }
2120
+
2121
+ //printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024);
2122
+
2123
+ // copy data and init views
2124
+ for (int i = 0; i < graph->n_nodes; i++) {
2125
+ struct ggml_tensor * node = graph->nodes[i];
2126
+ graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
2127
+ }
2128
+
2129
+ // build graph copy
2130
+ struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
2131
+ for (int i = 0; i < graph->n_nodes; i++) {
2132
+ struct ggml_tensor * node = graph->nodes[i];
2133
+ struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)];
2134
+ graph_copy->nodes[i] = node_copy;
2135
+ }
2136
+ graph_copy->n_nodes = graph->n_nodes;
2137
+
2138
+ ggml_hash_set_free(&hash_set);
2139
+ free(node_copies);
2140
+ free(node_init);
2141
+
2142
+ return {
2143
+ /* .buffer = */ buffer,
2144
+ /* .ctx_allocated = */ ctx_allocated,
2145
+ /* .ctx_unallocated = */ ctx_unallocated,
2146
+ /* .graph = */ graph_copy,
2147
+ };
2148
+ }
2149
+
2150
+ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
2151
+ ggml_backend_buffer_free(copy.buffer);
2152
+ ggml_free(copy.ctx_allocated);
2153
+ ggml_free(copy.ctx_unallocated);
2154
+ }
2155
+
2156
+ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor const * const * test_nodes, size_t num_test_nodes) {
2157
+ struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
2158
+ if (copy.buffer == NULL) {
2159
+ return false;
2160
+ }
2161
+
2162
+ struct ggml_cgraph * g1 = graph;
2163
+ struct ggml_cgraph * g2 = copy.graph;
2164
+
2165
+ assert(g1->n_nodes == g2->n_nodes);
2166
+
2167
+ if (num_test_nodes != 0) {
2168
+ GGML_ASSERT(test_nodes);
2169
+ // Compute the whole graph and only test the output for specific tensors
2170
+ ggml_backend_graph_compute(backend1, g1);
2171
+ ggml_backend_graph_compute(backend2, g2);
2172
+
2173
+ bool verified = false;
2174
+ for (int i = 0; i < g1->n_nodes; i++) {
2175
+ for (size_t j = 0; j < num_test_nodes; ++j) {
2176
+ if (g1->nodes[i] == test_nodes[j]) {
2177
+ callback(i, g1->nodes[i], g2->nodes[i], user_data);
2178
+ verified = true;
2179
+ }
2180
+ }
2181
+ }
2182
+ GGML_ASSERT(verified);
2183
+ } else {
2184
+ for (int i = 0; i < g1->n_nodes; i++) {
2185
+ struct ggml_tensor * t1 = g1->nodes[i];
2186
+ struct ggml_tensor * t2 = g2->nodes[i];
2187
+
2188
+ assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
2189
+
2190
+ struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
2191
+ struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
2192
+
2193
+ ggml_backend_graph_compute(backend1, &g1v);
2194
+ ggml_backend_graph_compute(backend2, &g2v);
2195
+
2196
+ if (ggml_is_view_op(t1->op)) {
2197
+ continue;
2198
+ }
2199
+
2200
+ // compare results, calculate rms etc
2201
+ if (!callback(i, t1, t2, user_data)) {
2202
+ break;
2203
+ }
2204
+ }
2205
+ }
2206
+ ggml_backend_graph_copy_free(copy);
2207
+
2208
+ return true;
2209
+ }
2210
+
2211
+ // CPU backend - buffer
2212
+
2213
+ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
2214
+ GGML_ASSERT(buffer);
2215
+ uintptr_t data = (uintptr_t)buffer->context;
2216
+
2217
+ // align the buffer
2218
+ if (data % TENSOR_ALIGNMENT != 0) {
2219
+ data = GGML_PAD(data, TENSOR_ALIGNMENT);
2220
+ }
2221
+
2222
+ return (void *)data;
2223
+ }
2224
+
2225
+ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
2226
+ GGML_ASSERT(buffer);
2227
+ ggml_aligned_free(buffer->context, buffer->size);
2228
+ }
2229
+
2230
+ static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
2231
+ GGML_ASSERT(tensor);
2232
+ memset((char *)tensor->data + offset, value, size);
2233
+
2234
+ GGML_UNUSED(buffer);
2235
+ }
2236
+
2237
+ static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
2238
+ GGML_ASSERT(tensor);
2239
+ memcpy((char *)tensor->data + offset, data, size);
2240
+
2241
+ GGML_UNUSED(buffer);
2242
+ }
2243
+
2244
+ static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
2245
+ GGML_ASSERT(tensor);
2246
+ memcpy(data, (const char *)tensor->data + offset, size);
2247
+
2248
+ GGML_UNUSED(buffer);
2249
+ }
2250
+
2251
+ static bool ggml_backend_cpu_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
2252
+ GGML_ASSERT(src);
2253
+ if (ggml_backend_buffer_is_host(src->buffer)) {
2254
+ memcpy(dst->data, src->data, ggml_nbytes(src));
2255
+ return true;
2256
+ }
2257
+ return false;
2258
+
2259
+ GGML_UNUSED(buffer);
2260
+ }
2261
+
2262
+ static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
2263
+ GGML_ASSERT(buffer);
2264
+ memset(buffer->context, value, buffer->size);
2265
+ }
2266
+
2267
+ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
2268
+ /* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
2269
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
2270
+ /* .init_tensor = */ NULL, // no initialization required
2271
+ /* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
2272
+ /* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
2273
+ /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
2274
+ /* .set_tensor_2d = */ NULL,
2275
+ /* .get_tensor_2d = */ NULL,
2276
+ /* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
2277
+ /* .clear = */ ggml_backend_cpu_buffer_clear,
2278
+ /* .reset = */ NULL,
2279
+ };
2280
+
2281
+ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
2282
+ /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
2283
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
2284
+ /* .init_tensor = */ NULL, // no initialization required
2285
+ /* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
2286
+ /* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
2287
+ /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
2288
+ /* .set_tensor_2d = */ NULL,
2289
+ /* .get_tensor_2d = */ NULL,
2290
+ /* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
2291
+ /* .clear = */ ggml_backend_cpu_buffer_clear,
2292
+ /* .reset = */ NULL,
2293
+ };
2294
+
2295
+ // CPU backend buffer type
2296
+
2297
+ // this buffer type is defined here to make it available to all backends
2298
+
2299
+ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
2300
+ return "CPU";
2301
+
2302
+ GGML_UNUSED(buft);
2303
+ }
2304
+
2305
+ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
2306
+ void * data = ggml_aligned_malloc(size);
2307
+
2308
+ if (data == NULL) {
2309
+ GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
2310
+ return NULL;
2311
+ }
2312
+
2313
+ return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size);
2314
+ }
2315
+
2316
+ static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
2317
+ return TENSOR_ALIGNMENT;
2318
+
2319
+ GGML_UNUSED(buft);
2320
+ }
2321
+
2322
+ static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
2323
+ return true;
2324
+
2325
+ GGML_UNUSED(buft);
2326
+ }
2327
+
2328
+ ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
2329
+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
2330
+ /* .iface = */ {
2331
+ /* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
2332
+ /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
2333
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
2334
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
2335
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
2336
+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
2337
+ },
2338
+ /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
2339
+ /* .context = */ NULL,
2340
+ };
2341
+
2342
+ return &ggml_backend_cpu_buffer_type;
2343
+ }
2344
+
2345
+ static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
2346
+ return "CPU_Mapped";
2347
+
2348
+ GGML_UNUSED(buft);
2349
+ }
2350
+
2351
+ static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) {
2352
+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
2353
+ /* .iface = */ {
2354
+ /* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
2355
+ /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
2356
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
2357
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
2358
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
2359
+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
2360
+ },
2361
+ /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
2362
+ /* .context = */ NULL,
2363
+ };
2364
+
2365
+ return &ggml_backend_cpu_buffer_type;
2366
+ }
2367
+
2368
+ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
2369
+ GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned");
2370
+ return ggml_backend_buffer_init(ggml_backend_cpu_buffer_from_ptr_type(), ggml_backend_cpu_buffer_from_ptr_i, ptr, size);
2371
+ }