toy 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2107) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +1124 -0
  3. data/LICENSE +21 -0
  4. data/Makefile +2022 -0
  5. data/README.md +154 -0
  6. data/bin/toy +10 -0
  7. data/lib/toy/compute.rb +135 -0
  8. data/lib/toy/compute_cuda.rb +104 -0
  9. data/lib/toy/compute_metal.rb +97 -0
  10. data/lib/toy/core/cli/describe.rb +188 -0
  11. data/lib/toy/core/cli/eval.rb +385 -0
  12. data/lib/toy/core/cli/exit_codes.rb +15 -0
  13. data/lib/toy/core/cli/fetch.rb +238 -0
  14. data/lib/toy/core/cli/infer.rb +268 -0
  15. data/lib/toy/core/cli/install.rb +228 -0
  16. data/lib/toy/core/cli/list.rb +86 -0
  17. data/lib/toy/core/cli/manifest.rb +49 -0
  18. data/lib/toy/core/cli/new.rb +594 -0
  19. data/lib/toy/core/cli/serve.rb +237 -0
  20. data/lib/toy/core/cli/train.rb +471 -0
  21. data/lib/toy/core/cli.rb +165 -0
  22. data/lib/toy/core/config.rb +64 -0
  23. data/lib/toy/core/gguf_meta.rb +161 -0
  24. data/lib/toy/core/model_scan.rb +221 -0
  25. data/lib/toy/core/run_log.rb +94 -0
  26. data/lib/toy/core/toy_root.rb +95 -0
  27. data/lib/toy/dev/toy_card.rb +299 -0
  28. data/lib/toy/dev/toy_describe_flow.rb +412 -0
  29. data/lib/toy/dev/toy_logprobs.rb +86 -0
  30. data/lib/toy/dev/toy_tap.rb +183 -0
  31. data/lib/toy/dev/toy_token_drift.rb +121 -0
  32. data/lib/toy/ffi/tinynn.rb +1491 -0
  33. data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
  34. data/lib/toy/ffi/tinynn_metal.rb +359 -0
  35. data/lib/toy/ffi_manifest.rb +84 -0
  36. data/lib/toy/io/bpe.rb +325 -0
  37. data/lib/toy/io/gguf_kv.rb +35 -0
  38. data/lib/toy/io/gguf_load.rb +331 -0
  39. data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
  40. data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
  41. data/lib/toy/io/model_index.rb +206 -0
  42. data/lib/toy/io/run_bundle.rb +280 -0
  43. data/lib/toy/io/tokenizer.rb +613 -0
  44. data/lib/toy/io/toy_corpus_loader.rb +52 -0
  45. data/lib/toy/io/toy_events.rb +56 -0
  46. data/lib/toy/io/toy_image_loader.rb +48 -0
  47. data/lib/toy/llm/adamw.rb +169 -0
  48. data/lib/toy/llm/archs/llama_arch.rb +233 -0
  49. data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
  50. data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
  51. data/lib/toy/llm/blocks/transformer_block.rb +876 -0
  52. data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
  53. data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
  54. data/lib/toy/llm/classify_batch.rb +88 -0
  55. data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
  56. data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
  57. data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
  58. data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
  59. data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
  60. data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
  61. data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
  62. data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
  63. data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
  64. data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
  65. data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
  66. data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
  67. data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
  68. data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
  69. data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
  70. data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
  71. data/lib/toy/llm/labels.rb +142 -0
  72. data/lib/toy/llm/primitives/gqa.rb +62 -0
  73. data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
  74. data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
  75. data/lib/toy/llm/primitives/rms_norm.rb +39 -0
  76. data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
  77. data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
  78. data/lib/toy/llm/primitives/rope.rb +68 -0
  79. data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
  80. data/lib/toy/llm/primitives/rope_metal.rb +72 -0
  81. data/lib/toy/llm/primitives/swiglu.rb +41 -0
  82. data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
  83. data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
  84. data/lib/toy/llm/recipe_options.rb +71 -0
  85. data/lib/toy/llm/recipes/from_scratch.rb +105 -0
  86. data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
  87. data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
  88. data/lib/toy/llm/recipes/lora.rb +110 -0
  89. data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
  90. data/lib/toy/llm/recipes/lora_metal.rb +114 -0
  91. data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
  92. data/lib/toy/llm/recipes/warm_start.rb +235 -0
  93. data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
  94. data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
  95. data/lib/toy/llm/training_batch.rb +133 -0
  96. data/lib/toy/models/arch.rb +253 -0
  97. data/lib/toy/models/gpt2.rb +311 -0
  98. data/lib/toy/models/toy_gpt2.rb +177 -0
  99. data/lib/toy/models/toy_smollm2.rb +393 -0
  100. data/lib/toy/models/toy_vit.rb +83 -0
  101. data/lib/toy/models/transformer.rb +1494 -0
  102. data/lib/toy/models/transformer_lm.rb +298 -0
  103. data/lib/toy/models/transformer_lm_cuda.rb +159 -0
  104. data/lib/toy/models/transformer_lm_metal.rb +142 -0
  105. data/lib/toy/mri.rb +300 -0
  106. data/lib/toy/run/eval.rb +76 -0
  107. data/lib/toy/run/eval_cuda.rb +66 -0
  108. data/lib/toy/run/eval_lmc.rb +334 -0
  109. data/lib/toy/run/eval_metal.rb +67 -0
  110. data/lib/toy/run/infer.rb +130 -0
  111. data/lib/toy/run/infer_cuda.rb +118 -0
  112. data/lib/toy/run/infer_metal.rb +119 -0
  113. data/lib/toy/run/infer_trace.rb +37 -0
  114. data/lib/toy/run/serve.rb +144 -0
  115. data/lib/toy/run/train.rb +404 -0
  116. data/lib/toy/run/train_cuda.rb +397 -0
  117. data/lib/toy/run/train_gpt2.rb +103 -0
  118. data/lib/toy/run/train_gpt2_cuda.rb +85 -0
  119. data/lib/toy/run/train_gpt2_metal.rb +85 -0
  120. data/lib/toy/run/train_lora.rb +207 -0
  121. data/lib/toy/run/train_lora_cuda.rb +219 -0
  122. data/lib/toy/run/train_metal.rb +227 -0
  123. data/lib/toy/run/train_vit.rb +251 -0
  124. data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
  125. data/lib/toy/serve/openai/handlers.rb +143 -0
  126. data/lib/toy/serve/openai/server.rb +159 -0
  127. data/lib/toy/train/sampler.rb +314 -0
  128. data/lib/toy/train/toy_chat_template.rb +179 -0
  129. data/lib/toy/train/toy_drift_grad.rb +176 -0
  130. data/lib/toy/train/toy_gguf_fuse.rb +428 -0
  131. data/lib/toy/train/toy_gguf_writer.rb +100 -0
  132. data/lib/toy/train/toy_lr_schedule.rb +39 -0
  133. data/lib/toy/train/toy_sample.rb +125 -0
  134. data/lib/toy/train/toy_trainer.rb +86 -0
  135. data/lib/toy/train/training.rb +160 -0
  136. data/lib/toy/version.rb +11 -0
  137. data/lib/toy.rb +902 -0
  138. data/prep/progress +118 -0
  139. data/prep/quietly +64 -0
  140. data/sig/toy.rbs +397 -0
  141. data/sig/toy_compute.rbs +450 -0
  142. data/spinel-ext.json +122 -0
  143. data/tinynn/Makefile +71 -0
  144. data/tinynn/tinynn_backend_cuda.c +99 -0
  145. data/tinynn/tinynn_backend_metal.m +75 -0
  146. data/tinynn/tinynn_events.c +122 -0
  147. data/tinynn/tinynn_events.h +83 -0
  148. data/tinynn/tinynn_ggml.c +2460 -0
  149. data/tinynn/tinynn_ggml.h +545 -0
  150. data/tinynn/tinynn_gguf.c +783 -0
  151. data/tinynn/tinynn_gguf.h +167 -0
  152. data/tinynn/tinynn_trace.c +180 -0
  153. data/tinynn/tinynn_trace.h +85 -0
  154. data/vendor/ggml/AUTHORS +335 -0
  155. data/vendor/ggml/CMakeLists.txt +505 -0
  156. data/vendor/ggml/CONTRIBUTING.md +3 -0
  157. data/vendor/ggml/LICENSE +21 -0
  158. data/vendor/ggml/README.md +50 -0
  159. data/vendor/ggml/ci/run.sh +395 -0
  160. data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
  161. data/vendor/ggml/cmake/GitVars.cmake +22 -0
  162. data/vendor/ggml/cmake/common.cmake +50 -0
  163. data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
  164. data/vendor/ggml/docs/gguf.md +828 -0
  165. data/vendor/ggml/examples/CMakeLists.txt +34 -0
  166. data/vendor/ggml/examples/common-ggml.cpp +244 -0
  167. data/vendor/ggml/examples/common-ggml.h +18 -0
  168. data/vendor/ggml/examples/common.cpp +675 -0
  169. data/vendor/ggml/examples/common.h +322 -0
  170. data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
  171. data/vendor/ggml/examples/gpt-2/README.md +225 -0
  172. data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
  173. data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
  174. data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
  175. data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
  176. data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
  177. data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
  178. data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
  179. data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
  180. data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
  181. data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
  182. data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
  183. data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
  184. data/vendor/ggml/examples/gpt-j/README.md +239 -0
  185. data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
  186. data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
  187. data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
  188. data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
  189. data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
  190. data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
  191. data/vendor/ggml/examples/magika/README.md +23 -0
  192. data/vendor/ggml/examples/magika/convert.py +32 -0
  193. data/vendor/ggml/examples/magika/main.cpp +374 -0
  194. data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
  195. data/vendor/ggml/examples/mnist/README.md +206 -0
  196. data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
  197. data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
  198. data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
  199. data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
  200. data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
  201. data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
  202. data/vendor/ggml/examples/mnist/server.py +36 -0
  203. data/vendor/ggml/examples/mnist/web/index.html +178 -0
  204. data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
  205. data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
  206. data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
  207. data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
  208. data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
  209. data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
  210. data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
  211. data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
  212. data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
  213. data/vendor/ggml/examples/prompts/replit.txt +100 -0
  214. data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
  215. data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
  216. data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
  217. data/vendor/ggml/examples/prompts/whisper.txt +100 -0
  218. data/vendor/ggml/examples/python/README.md +115 -0
  219. data/vendor/ggml/examples/python/api.h +14 -0
  220. data/vendor/ggml/examples/python/example_add_quant.py +25 -0
  221. data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
  222. data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
  223. data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
  224. data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
  225. data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
  226. data/vendor/ggml/examples/python/ggml/utils.py +182 -0
  227. data/vendor/ggml/examples/python/regenerate.py +42 -0
  228. data/vendor/ggml/examples/python/stubs.py +128 -0
  229. data/vendor/ggml/examples/python/test_tensor.py +258 -0
  230. data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
  231. data/vendor/ggml/examples/sam/README.md +95 -0
  232. data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
  233. data/vendor/ggml/examples/sam/example.jpg +0 -0
  234. data/vendor/ggml/examples/sam/sam.cpp +2370 -0
  235. data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
  236. data/vendor/ggml/examples/simple/README.md +61 -0
  237. data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
  238. data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
  239. data/vendor/ggml/examples/stb_image.h +7987 -0
  240. data/vendor/ggml/examples/stb_image_write.h +1724 -0
  241. data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
  242. data/vendor/ggml/examples/test-cmake/README.md +3 -0
  243. data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
  244. data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
  245. data/vendor/ggml/examples/yolo/README.md +59 -0
  246. data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
  247. data/vendor/ggml/examples/yolo/data/coco.names +80 -0
  248. data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
  249. data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
  250. data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
  251. data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
  252. data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
  253. data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
  254. data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
  255. data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
  256. data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
  257. data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
  258. data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
  259. data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
  260. data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
  261. data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
  262. data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
  263. data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
  264. data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
  265. data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
  266. data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
  267. data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
  268. data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
  269. data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
  270. data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
  271. data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
  272. data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
  273. data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
  274. data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
  275. data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
  276. data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
  277. data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
  278. data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
  279. data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
  280. data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
  281. data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
  282. data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
  283. data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
  284. data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
  285. data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
  286. data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
  287. data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
  288. data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
  289. data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
  290. data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
  291. data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
  292. data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
  293. data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
  294. data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
  295. data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
  296. data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
  297. data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
  298. data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
  299. data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
  300. data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
  301. data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
  302. data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
  303. data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
  304. data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
  305. data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
  306. data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
  307. data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
  308. data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
  309. data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
  310. data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
  311. data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
  312. data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
  313. data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
  314. data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
  315. data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
  316. data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
  317. data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
  318. data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
  319. data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
  320. data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
  321. data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
  322. data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
  323. data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
  324. data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
  325. data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
  326. data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
  327. data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
  328. data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
  329. data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
  330. data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
  331. data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
  332. data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
  333. data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
  334. data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
  335. data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
  336. data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
  337. data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
  338. data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
  339. data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
  340. data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
  341. data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
  342. data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
  343. data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
  344. data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
  345. data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
  346. data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
  347. data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
  348. data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
  349. data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
  350. data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
  351. data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
  352. data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
  353. data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
  354. data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
  355. data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
  356. data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
  357. data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
  358. data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
  359. data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
  360. data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
  361. data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
  362. data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
  363. data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
  364. data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
  365. data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
  366. data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
  367. data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
  368. data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
  369. data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
  370. data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
  371. data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
  372. data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
  373. data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
  374. data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
  375. data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
  376. data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
  377. data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
  378. data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
  379. data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
  380. data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
  381. data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
  382. data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
  383. data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
  384. data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
  385. data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
  386. data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
  387. data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
  388. data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
  389. data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
  390. data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
  391. data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
  392. data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
  393. data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
  394. data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
  395. data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
  396. data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
  397. data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
  398. data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
  399. data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
  400. data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
  401. data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
  402. data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
  403. data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
  404. data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
  405. data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
  406. data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
  407. data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
  408. data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
  409. data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
  410. data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
  411. data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
  412. data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
  413. data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
  414. data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
  415. data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
  416. data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
  417. data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
  418. data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
  419. data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
  420. data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
  421. data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
  422. data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
  423. data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
  424. data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
  425. data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
  426. data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
  427. data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
  428. data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
  429. data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
  430. data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
  431. data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
  432. data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
  433. data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
  434. data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
  435. data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
  436. data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
  437. data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
  438. data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
  439. data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
  440. data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
  441. data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
  442. data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
  443. data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
  444. data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
  445. data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
  446. data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
  447. data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
  448. data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
  449. data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
  450. data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
  451. data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
  452. data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
  453. data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
  454. data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
  455. data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
  456. data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
  457. data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
  458. data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
  459. data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
  460. data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
  461. data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
  462. data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
  463. data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
  464. data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
  465. data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
  466. data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
  467. data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
  468. data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
  469. data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
  470. data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
  471. data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
  472. data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
  473. data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
  474. data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
  475. data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
  476. data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
  477. data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
  478. data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
  479. data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
  480. data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
  481. data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
  482. data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
  483. data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
  484. data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
  485. data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
  486. data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
  487. data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
  488. data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
  489. data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
  490. data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
  491. data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
  492. data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
  493. data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
  494. data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
  495. data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
  496. data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
  497. data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
  498. data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
  499. data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
  500. data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
  501. data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
  502. data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
  503. data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
  504. data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
  505. data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
  506. data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
  507. data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
  508. data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
  509. data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
  510. data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
  511. data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
  512. data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
  513. data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
  514. data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
  515. data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
  516. data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
  517. data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
  518. data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
  519. data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
  520. data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
  521. data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
  522. data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
  523. data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
  524. data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
  525. data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
  526. data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
  527. data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
  528. data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
  529. data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
  530. data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
  531. data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
  532. data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
  533. data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
  534. data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
  535. data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
  536. data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
  537. data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
  538. data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
  539. data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
  540. data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
  541. data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
  542. data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
  543. data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
  544. data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
  545. data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
  546. data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
  547. data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
  548. data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
  549. data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
  550. data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
  551. data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
  552. data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
  553. data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
  554. data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
  555. data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
  556. data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
  557. data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
  558. data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
  559. data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
  560. data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
  561. data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
  562. data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
  563. data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
  564. data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
  565. data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
  566. data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
  567. data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
  568. data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
  569. data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
  570. data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
  571. data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
  572. data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
  573. data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
  574. data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
  575. data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
  576. data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
  577. data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
  578. data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
  579. data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
  580. data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
  581. data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
  582. data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
  583. data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
  584. data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
  585. data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
  586. data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
  587. data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
  588. data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
  589. data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
  590. data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
  591. data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
  592. data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
  593. data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
  594. data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
  595. data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
  596. data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
  597. data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
  598. data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
  599. data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
  600. data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
  601. data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
  602. data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
  603. data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
  604. data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
  605. data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
  606. data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
  607. data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
  608. data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
  609. data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
  610. data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
  611. data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
  612. data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
  613. data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
  614. data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
  615. data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
  616. data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
  617. data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
  618. data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
  619. data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
  620. data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
  621. data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
  622. data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
  623. data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
  624. data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
  625. data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
  626. data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
  627. data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
  628. data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
  629. data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
  630. data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
  631. data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
  632. data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
  633. data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
  634. data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
  635. data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
  636. data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
  637. data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
  638. data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
  639. data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
  640. data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
  641. data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
  642. data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
  643. data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
  644. data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
  645. data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
  646. data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
  647. data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
  648. data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
  649. data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
  650. data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
  651. data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
  652. data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
  653. data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
  654. data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
  655. data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
  656. data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
  657. data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
  658. data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
  659. data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
  660. data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
  661. data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
  662. data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
  663. data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
  664. data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
  665. data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
  666. data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
  667. data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
  668. data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
  669. data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
  670. data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
  671. data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
  672. data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
  673. data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
  674. data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
  675. data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
  676. data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
  677. data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
  678. data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
  679. data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
  680. data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
  681. data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
  682. data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
  683. data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
  684. data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
  685. data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
  686. data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
  687. data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
  688. data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
  689. data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
  690. data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
  691. data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
  692. data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
  693. data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
  694. data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
  695. data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
  696. data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
  697. data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
  698. data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
  699. data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
  700. data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
  701. data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
  702. data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
  703. data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
  704. data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
  705. data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
  706. data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
  707. data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
  708. data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
  709. data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
  710. data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
  711. data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
  712. data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
  713. data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
  714. data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
  715. data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
  716. data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
  717. data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
  718. data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
  719. data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
  720. data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
  721. data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
  722. data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
  723. data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
  724. data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
  725. data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
  726. data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
  727. data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
  728. data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
  729. data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
  730. data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
  731. data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
  732. data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
  733. data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
  734. data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
  735. data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
  736. data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
  737. data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
  738. data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
  739. data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
  740. data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
  741. data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
  742. data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
  743. data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
  744. data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
  745. data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
  746. data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
  747. data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
  748. data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
  749. data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
  750. data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
  751. data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
  752. data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
  753. data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
  754. data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
  755. data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
  756. data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
  757. data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
  758. data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
  759. data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
  760. data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
  761. data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
  762. data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
  763. data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
  764. data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
  765. data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
  766. data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
  767. data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
  768. data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
  769. data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
  770. data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
  771. data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
  772. data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
  773. data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
  774. data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
  775. data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
  776. data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
  777. data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
  778. data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
  779. data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
  780. data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
  781. data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
  782. data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
  783. data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
  784. data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
  785. data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
  786. data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
  787. data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
  788. data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
  789. data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
  790. data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
  791. data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
  792. data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
  793. data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
  794. data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
  795. data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
  796. data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
  797. data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
  798. data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
  799. data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
  800. data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
  801. data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
  802. data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
  803. data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
  804. data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
  805. data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
  806. data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
  807. data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
  808. data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
  809. data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
  810. data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
  811. data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
  812. data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
  813. data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
  814. data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
  815. data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
  816. data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
  817. data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
  818. data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
  819. data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
  820. data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
  821. data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
  822. data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
  823. data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
  824. data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
  825. data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
  826. data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
  827. data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
  828. data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
  829. data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
  830. data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
  831. data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
  832. data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
  833. data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
  834. data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
  835. data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
  836. data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
  837. data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
  838. data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
  839. data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
  840. data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
  841. data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
  842. data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
  843. data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
  844. data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
  845. data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
  846. data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
  847. data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
  848. data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
  849. data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
  850. data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
  851. data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
  852. data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
  853. data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
  854. data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
  855. data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
  856. data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
  857. data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
  858. data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
  859. data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
  860. data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
  861. data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
  862. data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
  863. data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
  864. data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
  865. data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
  866. data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
  867. data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
  868. data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
  869. data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
  870. data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
  871. data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
  872. data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
  873. data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
  874. data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
  875. data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
  876. data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
  877. data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
  878. data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
  879. data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
  880. data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
  881. data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
  882. data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
  883. data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
  884. data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
  885. data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
  886. data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
  887. data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
  888. data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
  889. data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
  890. data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
  891. data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
  892. data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
  893. data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
  894. data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
  895. data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
  896. data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
  897. data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
  898. data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
  899. data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
  900. data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
  901. data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
  902. data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
  903. data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
  904. data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
  905. data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
  906. data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
  907. data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
  908. data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
  909. data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
  910. data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
  911. data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
  912. data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
  913. data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
  914. data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
  915. data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
  916. data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
  917. data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
  918. data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
  919. data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
  920. data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
  921. data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
  922. data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
  923. data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
  924. data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
  925. data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
  926. data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
  927. data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
  928. data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
  929. data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
  930. data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
  931. data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
  932. data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
  933. data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
  934. data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
  935. data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
  936. data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
  937. data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
  938. data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
  939. data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
  940. data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
  941. data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
  942. data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
  943. data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
  944. data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
  945. data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
  946. data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
  947. data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
  948. data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
  949. data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
  950. data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
  951. data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
  952. data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
  953. data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
  954. data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
  955. data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
  956. data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
  957. data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
  958. data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
  959. data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
  960. data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
  961. data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
  962. data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
  963. data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
  964. data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
  965. data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
  966. data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
  967. data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
  968. data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
  969. data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
  970. data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
  971. data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
  972. data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
  973. data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
  974. data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
  975. data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
  976. data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
  977. data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
  978. data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
  979. data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
  980. data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
  981. data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
  982. data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
  983. data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
  984. data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
  985. data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
  986. data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
  987. data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
  988. data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
  989. data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
  990. data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
  991. data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
  992. data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
  993. data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
  994. data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
  995. data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
  996. data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
  997. data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
  998. data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
  999. data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
  1000. data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
  1001. data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
  1002. data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
  1003. data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
  1004. data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
  1005. data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
  1006. data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
  1007. data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
  1008. data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
  1009. data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
  1010. data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
  1011. data/vendor/ggml/ggml.pc.in +10 -0
  1012. data/vendor/ggml/include/ggml-alloc.h +85 -0
  1013. data/vendor/ggml/include/ggml-backend.h +431 -0
  1014. data/vendor/ggml/include/ggml-blas.h +25 -0
  1015. data/vendor/ggml/include/ggml-cann.h +123 -0
  1016. data/vendor/ggml/include/ggml-cpp.h +39 -0
  1017. data/vendor/ggml/include/ggml-cpu.h +151 -0
  1018. data/vendor/ggml/include/ggml-cuda.h +50 -0
  1019. data/vendor/ggml/include/ggml-hexagon.h +19 -0
  1020. data/vendor/ggml/include/ggml-metal.h +61 -0
  1021. data/vendor/ggml/include/ggml-opencl.h +26 -0
  1022. data/vendor/ggml/include/ggml-openvino.h +37 -0
  1023. data/vendor/ggml/include/ggml-opt.h +256 -0
  1024. data/vendor/ggml/include/ggml-rpc.h +35 -0
  1025. data/vendor/ggml/include/ggml-sycl.h +49 -0
  1026. data/vendor/ggml/include/ggml-virtgpu.h +14 -0
  1027. data/vendor/ggml/include/ggml-vulkan.h +29 -0
  1028. data/vendor/ggml/include/ggml-webgpu.h +19 -0
  1029. data/vendor/ggml/include/ggml-zdnn.h +17 -0
  1030. data/vendor/ggml/include/ggml-zendnn.h +22 -0
  1031. data/vendor/ggml/include/ggml.h +2845 -0
  1032. data/vendor/ggml/include/gguf.h +204 -0
  1033. data/vendor/ggml/requirements.txt +12 -0
  1034. data/vendor/ggml/scripts/gen-authors.sh +9 -0
  1035. data/vendor/ggml/scripts/release.sh +296 -0
  1036. data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
  1037. data/vendor/ggml/scripts/sync-llama.last +1 -0
  1038. data/vendor/ggml/scripts/sync-llama.sh +21 -0
  1039. data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
  1040. data/vendor/ggml/scripts/sync-whisper.last +1 -0
  1041. data/vendor/ggml/scripts/sync-whisper.sh +17 -0
  1042. data/vendor/ggml/src/CMakeLists.txt +493 -0
  1043. data/vendor/ggml/src/ggml-alloc.c +1248 -0
  1044. data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
  1045. data/vendor/ggml/src/ggml-backend-dl.h +45 -0
  1046. data/vendor/ggml/src/ggml-backend-impl.h +275 -0
  1047. data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
  1048. data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
  1049. data/vendor/ggml/src/ggml-backend.cpp +2371 -0
  1050. data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  1051. data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
  1052. data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
  1053. data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
  1054. data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
  1055. data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
  1056. data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
  1057. data/vendor/ggml/src/ggml-cann/common.h +651 -0
  1058. data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
  1059. data/vendor/ggml/src/ggml-common.h +1900 -0
  1060. data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
  1061. data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
  1062. data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
  1063. data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
  1064. data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  1065. data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  1066. data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  1067. data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
  1068. data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
  1069. data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
  1070. data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  1071. data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
  1072. data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  1073. data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
  1074. data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
  1075. data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  1076. data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
  1077. data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
  1078. data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  1079. data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
  1080. data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
  1081. data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
  1082. data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  1083. data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
  1084. data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  1085. data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
  1086. data/vendor/ggml/src/ggml-cpu/common.h +95 -0
  1087. data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
  1088. data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
  1089. data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
  1090. data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
  1091. data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
  1092. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
  1093. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  1094. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
  1095. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  1096. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
  1097. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  1098. data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
  1099. data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
  1100. data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
  1101. data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
  1102. data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
  1103. data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
  1104. data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
  1105. data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
  1106. data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
  1107. data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
  1108. data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
  1109. data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
  1110. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
  1111. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
  1112. data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
  1113. data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
  1114. data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
  1115. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
  1116. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
  1117. data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
  1118. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
  1119. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
  1120. data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
  1121. data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
  1122. data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
  1123. data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  1124. data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
  1125. data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
  1126. data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
  1127. data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
  1128. data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
  1129. data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
  1130. data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
  1131. data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
  1132. data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
  1133. data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
  1134. data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
  1135. data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
  1136. data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
  1137. data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
  1138. data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
  1139. data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
  1140. data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
  1141. data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
  1142. data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
  1143. data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
  1144. data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
  1145. data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
  1146. data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
  1147. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
  1148. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  1149. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  1150. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  1151. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
  1152. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
  1153. data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
  1154. data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  1155. data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
  1156. data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
  1157. data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
  1158. data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  1159. data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  1160. data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  1161. data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
  1162. data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
  1163. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
  1164. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  1165. data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
  1166. data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  1167. data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
  1168. data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
  1169. data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
  1170. data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
  1171. data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  1172. data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
  1173. data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
  1174. data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
  1175. data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
  1176. data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
  1177. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
  1178. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
  1179. data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
  1180. data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
  1181. data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
  1182. data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
  1183. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
  1184. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  1185. data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
  1186. data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
  1187. data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
  1188. data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
  1189. data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
  1190. data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
  1191. data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
  1192. data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
  1193. data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
  1194. data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
  1195. data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
  1196. data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
  1197. data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
  1198. data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
  1199. data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
  1200. data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
  1201. data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
  1202. data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
  1203. data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
  1204. data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
  1205. data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
  1206. data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
  1207. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  1208. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  1209. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  1210. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  1211. data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
  1212. data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  1213. data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
  1214. data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
  1215. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  1216. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  1217. data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
  1218. data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  1219. data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
  1220. data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
  1221. data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
  1222. data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
  1223. data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
  1224. data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
  1225. data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
  1226. data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
  1227. data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
  1228. data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
  1229. data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  1230. data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
  1231. data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
  1232. data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
  1233. data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
  1234. data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
  1235. data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
  1236. data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
  1237. data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
  1238. data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  1239. data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  1240. data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
  1241. data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  1242. data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
  1243. data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  1244. data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
  1245. data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
  1246. data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
  1247. data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  1248. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
  1249. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
  1250. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
  1251. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  1252. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  1253. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
  1254. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
  1255. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
  1256. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
  1257. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
  1258. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  1259. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  1260. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
  1261. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  1262. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
  1263. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
  1264. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  1265. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  1266. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  1267. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
  1268. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
  1269. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  1270. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  1271. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
  1272. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  1273. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
  1274. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  1275. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
  1276. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  1277. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  1278. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  1279. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  1280. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  1281. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
  1282. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
  1283. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
  1284. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
  1285. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
  1286. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
  1287. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
  1288. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
  1289. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  1290. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  1291. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  1292. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  1293. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  1294. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  1295. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
  1296. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  1297. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  1298. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  1299. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  1300. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  1301. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  1302. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
  1303. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  1304. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  1305. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  1306. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  1307. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  1308. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  1309. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
  1310. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  1311. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  1312. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  1313. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  1314. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  1315. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  1316. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
  1317. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  1318. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  1319. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  1320. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  1321. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  1322. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  1323. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
  1324. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  1325. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  1326. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  1327. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  1328. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  1329. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  1330. data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
  1331. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  1332. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  1333. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  1334. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  1335. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  1336. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  1337. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  1338. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  1339. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  1340. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  1341. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  1342. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  1343. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  1344. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  1345. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  1346. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  1347. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  1348. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  1349. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  1350. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  1351. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  1352. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  1353. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  1354. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  1355. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  1356. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
  1357. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
  1358. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  1359. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  1360. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  1361. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  1362. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  1363. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  1364. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  1365. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  1366. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  1367. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  1368. data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
  1369. data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
  1370. data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
  1371. data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
  1372. data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
  1373. data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
  1374. data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
  1375. data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  1376. data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
  1377. data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
  1378. data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
  1379. data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
  1380. data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
  1381. data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
  1382. data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
  1383. data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
  1384. data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
  1385. data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
  1386. data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
  1387. data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
  1388. data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
  1389. data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
  1390. data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
  1391. data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
  1392. data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  1393. data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
  1394. data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
  1395. data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
  1396. data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
  1397. data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
  1398. data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
  1399. data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
  1400. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  1401. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
  1402. data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
  1403. data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  1404. data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
  1405. data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
  1406. data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
  1407. data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
  1408. data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
  1409. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
  1410. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
  1411. data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
  1412. data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
  1413. data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
  1414. data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
  1415. data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  1416. data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
  1417. data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
  1418. data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
  1419. data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  1420. data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
  1421. data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  1422. data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  1423. data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  1424. data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
  1425. data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  1426. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
  1427. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  1428. data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  1429. data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
  1430. data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
  1431. data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
  1432. data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
  1433. data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
  1434. data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
  1435. data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
  1436. data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
  1437. data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
  1438. data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  1439. data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
  1440. data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
  1441. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  1442. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  1443. data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  1444. data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  1445. data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
  1446. data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
  1447. data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
  1448. data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
  1449. data/vendor/ggml/src/ggml-impl.h +783 -0
  1450. data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  1451. data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  1452. data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  1453. data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  1454. data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
  1455. data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
  1456. data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
  1457. data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
  1458. data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
  1459. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
  1460. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
  1461. data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
  1462. data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
  1463. data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
  1464. data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
  1465. data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
  1466. data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
  1467. data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
  1468. data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  1469. data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  1470. data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  1471. data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  1472. data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  1473. data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  1474. data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  1475. data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
  1476. data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  1477. data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
  1478. data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  1479. data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  1480. data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  1481. data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  1482. data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  1483. data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  1484. data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  1485. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  1486. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  1487. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  1488. data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  1489. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  1490. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
  1491. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
  1492. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
  1493. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
  1494. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
  1495. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
  1496. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
  1497. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  1498. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
  1499. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
  1500. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
  1501. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
  1502. data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
  1503. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  1504. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
  1505. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
  1506. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
  1507. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
  1508. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
  1509. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
  1510. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
  1511. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
  1512. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  1513. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
  1514. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
  1515. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
  1516. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
  1517. data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  1518. data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  1519. data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  1520. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  1521. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  1522. data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  1523. data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  1524. data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
  1525. data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
  1526. data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  1527. data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  1528. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  1529. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  1530. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  1531. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
  1532. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  1533. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  1534. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
  1535. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
  1536. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  1537. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  1538. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  1539. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  1540. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  1541. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  1542. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  1543. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  1544. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  1545. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  1546. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  1547. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  1548. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
  1549. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
  1550. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  1551. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  1552. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  1553. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  1554. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  1555. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  1556. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  1557. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  1558. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  1559. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  1560. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
  1561. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
  1562. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
  1563. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  1564. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  1565. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  1566. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  1567. data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  1568. data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  1569. data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  1570. data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  1571. data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  1572. data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  1573. data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  1574. data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  1575. data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  1576. data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  1577. data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  1578. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  1579. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  1580. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  1581. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  1582. data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  1583. data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  1584. data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  1585. data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  1586. data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  1587. data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  1588. data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  1589. data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  1590. data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
  1591. data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  1592. data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  1593. data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  1594. data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  1595. data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
  1596. data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  1597. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
  1598. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  1599. data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
  1600. data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
  1601. data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  1602. data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  1603. data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  1604. data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  1605. data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  1606. data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  1607. data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  1608. data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  1609. data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  1610. data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  1611. data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  1612. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  1613. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  1614. data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  1615. data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  1616. data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  1617. data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  1618. data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
  1619. data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  1620. data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  1621. data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  1622. data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  1623. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
  1624. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  1625. data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  1626. data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
  1627. data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
  1628. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  1629. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  1630. data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  1631. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  1632. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  1633. data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
  1634. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
  1635. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  1636. data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
  1637. data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
  1638. data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
  1639. data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
  1640. data/vendor/ggml/src/ggml-opt.cpp +1094 -0
  1641. data/vendor/ggml/src/ggml-quants.c +5491 -0
  1642. data/vendor/ggml/src/ggml-quants.h +112 -0
  1643. data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
  1644. data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
  1645. data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
  1646. data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
  1647. data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
  1648. data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
  1649. data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
  1650. data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
  1651. data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
  1652. data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  1653. data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
  1654. data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
  1655. data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
  1656. data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
  1657. data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
  1658. data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
  1659. data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
  1660. data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
  1661. data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  1662. data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  1663. data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
  1664. data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
  1665. data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
  1666. data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
  1667. data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
  1668. data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
  1669. data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
  1670. data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
  1671. data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  1672. data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
  1673. data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
  1674. data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
  1675. data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
  1676. data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
  1677. data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
  1678. data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
  1679. data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
  1680. data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
  1681. data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
  1682. data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
  1683. data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
  1684. data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
  1685. data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
  1686. data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
  1687. data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
  1688. data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
  1689. data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
  1690. data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
  1691. data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
  1692. data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
  1693. data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
  1694. data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
  1695. data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  1696. data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
  1697. data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
  1698. data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
  1699. data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
  1700. data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
  1701. data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
  1702. data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
  1703. data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
  1704. data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
  1705. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  1706. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  1707. data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
  1708. data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
  1709. data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
  1710. data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  1711. data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  1712. data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
  1713. data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
  1714. data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
  1715. data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
  1716. data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
  1717. data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
  1718. data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
  1719. data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  1720. data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
  1721. data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
  1722. data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
  1723. data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
  1724. data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
  1725. data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  1726. data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
  1727. data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
  1728. data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
  1729. data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
  1730. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  1731. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  1732. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  1733. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  1734. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
  1735. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  1736. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  1737. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  1738. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  1739. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  1740. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
  1741. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
  1742. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
  1743. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
  1744. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
  1745. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
  1746. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
  1747. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
  1748. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
  1749. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
  1750. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
  1751. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
  1752. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
  1753. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
  1754. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
  1755. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
  1756. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
  1757. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
  1758. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
  1759. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
  1760. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
  1761. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
  1762. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
  1763. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
  1764. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
  1765. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
  1766. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
  1767. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
  1768. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
  1769. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
  1770. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
  1771. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
  1772. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
  1773. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
  1774. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
  1775. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
  1776. data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  1777. data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  1778. data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
  1779. data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
  1780. data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
  1781. data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
  1782. data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
  1783. data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
  1784. data/vendor/ggml/src/ggml-threading.cpp +12 -0
  1785. data/vendor/ggml/src/ggml-threading.h +14 -0
  1786. data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  1787. data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  1788. data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  1789. data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  1790. data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  1791. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  1792. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  1793. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  1794. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  1795. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  1796. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  1797. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  1798. data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  1799. data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  1800. data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  1801. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  1802. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  1803. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  1804. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  1805. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  1806. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  1807. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
  1808. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
  1809. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  1810. data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
  1811. data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  1812. data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  1813. data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  1814. data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  1815. data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  1816. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  1817. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  1818. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  1819. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  1820. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  1821. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  1822. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
  1823. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  1824. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  1825. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  1826. data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
  1827. data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
  1828. data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
  1829. data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  1830. data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
  1831. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  1832. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  1833. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  1834. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  1835. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  1836. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  1837. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  1838. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  1839. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  1840. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  1841. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  1842. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  1843. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  1844. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  1845. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  1846. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  1847. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  1848. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  1849. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  1850. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
  1851. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  1852. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  1853. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  1854. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  1855. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  1856. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  1857. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  1858. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  1859. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
  1860. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
  1861. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  1862. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  1863. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  1864. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  1865. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  1866. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  1867. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  1868. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  1869. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  1870. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  1871. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  1872. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
  1873. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
  1874. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  1875. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  1876. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  1877. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  1878. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  1879. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  1880. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  1881. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  1882. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  1883. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  1884. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
  1885. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  1886. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  1887. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  1888. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
  1889. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  1890. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  1891. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  1892. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  1893. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  1894. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
  1895. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
  1896. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
  1897. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
  1898. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
  1899. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  1900. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
  1901. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  1902. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  1903. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
  1904. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  1905. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  1906. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  1907. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  1908. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  1909. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  1910. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
  1911. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  1912. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  1913. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  1914. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  1915. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
  1916. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
  1917. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  1918. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  1919. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  1920. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
  1921. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
  1922. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  1923. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  1924. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
  1925. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  1926. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  1927. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  1928. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  1929. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  1930. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  1931. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  1932. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  1933. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  1934. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  1935. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  1936. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  1937. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  1938. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  1939. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  1940. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  1941. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
  1942. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  1943. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  1944. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  1945. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
  1946. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  1947. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  1948. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
  1949. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  1950. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  1951. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  1952. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
  1953. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
  1954. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  1955. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  1956. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  1957. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  1958. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  1959. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  1960. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
  1961. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  1962. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  1963. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  1964. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  1965. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  1966. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  1967. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  1968. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  1969. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  1970. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
  1971. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  1972. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  1973. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  1974. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
  1975. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  1976. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  1977. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  1978. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  1979. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  1980. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  1981. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  1982. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  1983. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  1984. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  1985. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  1986. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  1987. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  1988. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  1989. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  1990. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  1991. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  1992. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  1993. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  1994. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  1995. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  1996. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  1997. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  1998. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  1999. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  2000. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  2001. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  2002. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  2003. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  2004. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  2005. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  2006. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
  2007. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  2008. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
  2009. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  2010. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  2011. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
  2012. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  2013. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  2014. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  2015. data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  2016. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
  2017. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
  2018. data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  2019. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
  2020. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  2021. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  2022. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  2023. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
  2024. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
  2025. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  2026. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
  2027. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
  2028. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  2029. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
  2030. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
  2031. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
  2032. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
  2033. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
  2034. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
  2035. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
  2036. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
  2037. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
  2038. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
  2039. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  2040. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
  2041. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
  2042. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
  2043. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
  2044. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
  2045. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
  2046. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
  2047. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
  2048. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
  2049. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  2050. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  2051. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
  2052. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
  2053. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
  2054. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  2055. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
  2056. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  2057. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
  2058. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
  2059. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
  2060. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
  2061. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  2062. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
  2063. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
  2064. data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  2065. data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
  2066. data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
  2067. data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  2068. data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  2069. data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
  2070. data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
  2071. data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  2072. data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
  2073. data/vendor/ggml/src/ggml.c +7777 -0
  2074. data/vendor/ggml/src/ggml.cpp +26 -0
  2075. data/vendor/ggml/src/gguf.cpp +1556 -0
  2076. data/vendor/ggml/tests/CMakeLists.txt +356 -0
  2077. data/vendor/ggml/tests/test-arange.cpp +100 -0
  2078. data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
  2079. data/vendor/ggml/tests/test-cont.c +170 -0
  2080. data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
  2081. data/vendor/ggml/tests/test-conv-transpose.c +248 -0
  2082. data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
  2083. data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
  2084. data/vendor/ggml/tests/test-conv1d.cpp +289 -0
  2085. data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
  2086. data/vendor/ggml/tests/test-conv2d.cpp +391 -0
  2087. data/vendor/ggml/tests/test-customop.c +300 -0
  2088. data/vendor/ggml/tests/test-dup.c +111 -0
  2089. data/vendor/ggml/tests/test-interpolate.cpp +166 -0
  2090. data/vendor/ggml/tests/test-opt.cpp +1003 -0
  2091. data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
  2092. data/vendor/ggml/tests/test-pool.c +274 -0
  2093. data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
  2094. data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
  2095. data/vendor/ggml/tests/test-rel-pos.c +87 -0
  2096. data/vendor/ggml/tests/test-roll.cpp +128 -0
  2097. data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
  2098. data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
  2099. data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
  2100. data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
  2101. data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
  2102. data/vendor-patches/0005-concat-backward.patch +36 -0
  2103. data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
  2104. data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
  2105. data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
  2106. data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
  2107. metadata +2161 -0
@@ -0,0 +1,1593 @@
1
+ # lib/toy/llm/engine/llama_kv_engine.rb (was lib/toy_smollm2_ffi_kv.rb) — Toy::SmolLM2 KV-cache decode via ggml FFI.
2
+ #
3
+ # Mirror of lib/toy/llm/engine/gpt2_kv_engine.rb (was lib/gpt2_ffi_kv.rb)
4
+ # but for the llama-family architecture:
5
+ # - RMSNorm (no beta) instead of LayerNorm
6
+ # - No biases on Q / K / V / O / FFN projections
7
+ # - SwiGLU FFN: down( silu(gate(x)) * up(x) )
8
+ # - RoPE applied to Q and K before the dot product
9
+ # - GQA: K and V are stored per-`n_kv`-head, not per-`n_heads`-head.
10
+ # Each KV head is shared by group_size = n_heads / n_kv query heads.
11
+ #
12
+ # Per decode step builds a single-position compute graph; K and V at
13
+ # the current position are written into persistent per-layer buffers
14
+ # via cpy-into-view (same pattern as the GPT-2 cache). Cost per step:
15
+ # constant in prompt length.
16
+
17
+ require_relative "../../models/transformer"
18
+ require_relative "../../../toy"
19
+ require_relative "../../models/toy_smollm2"
20
+ require_relative "../../ffi/tinynn"
21
+ # NOTE: not requiring "toy_smollm2_loader" here. Requiring it from
22
+ # inside this file triggers a Spinel GC mark crash in decode_step
23
+ # (sp_gc_mark / sp_PtrArray_new_scan) for reasons we haven't fully
24
+ # isolated — likely something about require-order interaction with
25
+ # Spinel's type inference around GGUFLoad. Callers that use
26
+ # realize_and_load_auto (or any method here that references
27
+ # GGUFLoad) must `require_relative "toy/models/toy_smollm2_loader"` from their
28
+ # top-level driver file BEFORE this file is loaded. The OpenAI API
29
+ # binaries and the realize-mmap demos already do.
30
+
31
+ # Per-block persistent tensors for the SmolLM2 KV cache.
32
+ #
33
+ # Q is split per query head (n_heads of them).
34
+ # K, V, and their persistent buffers are split per KV head (n_kv of them).
35
+ class SmolLM2KVBlockFFI
36
+ attr_accessor :t_rn1_gamma, :t_rn2_gamma,
37
+ :t_w_q, :t_w_k, :t_w_v, :t_w_o,
38
+ :t_b_q, :t_b_k, :t_b_v,
39
+ # M1: per-block QK-norm (Qwen3). RMSNorm on Q and K with
40
+ # a shared [d_head] gamma applied to every head before
41
+ # RoPE. Allocated only when has_qk_norm is set. The
42
+ # null-ptr seed lets graph-builder code branch cleanly.
43
+ :t_q_norm_gamma, :t_k_norm_gamma,
44
+ # I-Gemma (#113): post-attention and post-FFN RMSNorm
45
+ # gammas. Gemma 2 sandwiches each sublayer between a
46
+ # pre-norm (the existing t_rn1_gamma / t_rn2_gamma)
47
+ # and a post-norm (these). Shape [d_model] each.
48
+ # Allocated only when cache.has_post_norms is set.
49
+ :t_post_attn_norm_gamma, :t_post_ffn_norm_gamma,
50
+ :t_w_gate, :t_w_up, :t_w_down,
51
+ # M2.3 MoE. When SmolLM2KVFFICache#is_moe is true, the
52
+ # FFN block is replaced with a Mixtral-style routed FFN:
53
+ # t_w_router : 2D [d_model, n_experts] — gating
54
+ # t_w_gate_exps : 3D [d_model, d_ff, n_experts]
55
+ # t_w_up_exps : 3D [d_model, d_ff, n_experts]
56
+ # t_w_down_exps : 3D [d_ff, d_model, n_experts]
57
+ # Set by realize_for_mmap when GGUF carries
58
+ # blk.0.ffn_gate_inp.weight (the MoE-presence sentinel).
59
+ :t_w_router, :t_w_gate_exps, :t_w_up_exps, :t_w_down_exps,
60
+ :t_K, :t_V,
61
+ # F1.2: optional LoRA adapters on Q projection (one
62
+ # rank-R pair per Q head). t_w_lora_a_q[hq] has shape
63
+ # (r, d_model); t_w_lora_b_q[hq] has shape (d_head, r).
64
+ # Allocated only when cache.lora_q_enabled at realize
65
+ # time. Trainable f32 tensors in ctx_w (not mmap'd from
66
+ # GGUF — adapters are session-local).
67
+ :t_w_lora_a_q, :t_w_lora_b_q,
68
+ # F1.2 step 6b: optional persistent Adam moments paired
69
+ # with the LoRA-A/B tensors above. Allocated in ctx_w
70
+ # (NOT compute ctx) so they survive tnn_reset_for_rebuild
71
+ # between multi-position SFT steps. Same shapes as A/B.
72
+ # Allocated only when cache.lora_q_adamw_enabled. The
73
+ # m/v live next to A/B so a future "save adapter +
74
+ # optimizer state" hook can serialize them together.
75
+ :t_w_lora_a_q_m, :t_w_lora_a_q_v,
76
+ :t_w_lora_b_q_m, :t_w_lora_b_q_v
77
+
78
+ def initialize
79
+ @t_rn1_gamma = TinyNN.tnn_null_ptr
80
+ @t_rn2_gamma = TinyNN.tnn_null_ptr
81
+ @t_q_norm_gamma = TinyNN.tnn_null_ptr
82
+ @t_k_norm_gamma = TinyNN.tnn_null_ptr
83
+ @t_post_attn_norm_gamma = TinyNN.tnn_null_ptr
84
+ @t_post_ffn_norm_gamma = TinyNN.tnn_null_ptr
85
+ @t_w_q = [TinyNN.tnn_null_ptr]
86
+ @t_w_k = [TinyNN.tnn_null_ptr]
87
+ @t_w_v = [TinyNN.tnn_null_ptr]
88
+ @t_b_q = [TinyNN.tnn_null_ptr] # per-Q-head bias (Qwen2.x)
89
+ @t_b_k = [TinyNN.tnn_null_ptr] # per-KV-head bias
90
+ @t_b_v = [TinyNN.tnn_null_ptr] # per-KV-head bias (1-D [d_head])
91
+ @t_K = [TinyNN.tnn_null_ptr]
92
+ @t_V = [TinyNN.tnn_null_ptr]
93
+ @t_w_o = TinyNN.tnn_null_ptr
94
+ @t_w_gate = TinyNN.tnn_null_ptr
95
+ @t_w_up = TinyNN.tnn_null_ptr
96
+ @t_w_down = TinyNN.tnn_null_ptr
97
+ @t_w_router = TinyNN.tnn_null_ptr
98
+ @t_w_gate_exps = TinyNN.tnn_null_ptr
99
+ @t_w_up_exps = TinyNN.tnn_null_ptr
100
+ @t_w_down_exps = TinyNN.tnn_null_ptr
101
+ @t_w_lora_a_q = [TinyNN.tnn_null_ptr]
102
+ @t_w_lora_b_q = [TinyNN.tnn_null_ptr]
103
+ @t_w_lora_a_q_m = [TinyNN.tnn_null_ptr]
104
+ @t_w_lora_a_q_v = [TinyNN.tnn_null_ptr]
105
+ @t_w_lora_b_q_m = [TinyNN.tnn_null_ptr]
106
+ @t_w_lora_b_q_v = [TinyNN.tnn_null_ptr]
107
+ end
108
+ end
109
+
110
+ class SmolLM2KVFFICache
111
+ attr_accessor :sess, :t_token_embed, :t_final_norm_gamma,
112
+ :t_output, :has_untied_output, :has_qkv_bias,
113
+ # M1: Qwen3 added per-block QK-norm. When true, the
114
+ # graph builder applies tnn_rms_norm to Q and K with
115
+ # blk.t_q_norm_gamma / blk.t_k_norm_gamma (shape
116
+ # [d_head], shared across heads) BEFORE tnn_rope_ext.
117
+ # Detect by presence of "blk.0.attn_q_norm.weight" in
118
+ # the GGUF. Always false on Qwen2.5 / Llama-family.
119
+ :has_qk_norm,
120
+ # #110: which QK-norm flavor — 1 = per-head shared
121
+ # gamma (Qwen3, gamma shape [d_head]); 2 = full-Q
122
+ # gamma (OLMoE / Granite, gamma shape [d_model],
123
+ # applied to the concatenated Q before head split).
124
+ # 0 = none. Set by realize_for_mmap from the detected
125
+ # flags. The graph builder branches on this.
126
+ :qk_norm_kind,
127
+ # I-Gemma (#113): Gemma 2-specific knobs. All default
128
+ # to inert values (no-op) for non-Gemma models.
129
+ # has_post_norms: blk.X has post_attention_norm +
130
+ # post_ffw_norm tensors after the residual adds.
131
+ # embed_scale: post-token-embed multiplier
132
+ # (sqrt(d_model) for Gemma 2; 1.0 otherwise).
133
+ # attn_softcap: tanh-softcap on attention logits
134
+ # (50.0 for Gemma 2; 0.0 disables).
135
+ # final_softcap: tanh-softcap on the final output
136
+ # logits (30.0 for Gemma 2; 0.0 disables).
137
+ # swa_alternates: when true, only EVEN layers apply
138
+ # sliding window; odd layers see full attention.
139
+ :has_post_norms, :embed_scale,
140
+ :attn_softcap, :final_softcap, :swa_alternates,
141
+ # M3: SWA window. 0 = no sliding window (full causal).
142
+ # >0 = attend only to the last `swa_window` positions
143
+ # in the K/V cache. Phi-3-mini-4k sets this to 2048;
144
+ # Gemma 2 local layers set it to 4096. Realize-time
145
+ # parameter (set via realize_for_mmap or post-init).
146
+ :swa_window,
147
+ :kv_blocks_ffi,
148
+ :max_T, :d_model, :d_ff, :n_heads, :n_kv, :d_head,
149
+ :group_size, :n_layers, :vocab_size, :rope_base,
150
+ :rope_scaling, :t_rope_freq_factors,
151
+ :rms_eps, :realized,
152
+ # CUDA-MIRROR-SKIP-BEGIN: trace-tap is CPU-only diagnostic
153
+ :trace_on, :trace_names, :trace_tensors,
154
+ # CUDA-MIRROR-SKIP-END
155
+ # Phase 3: ggml type for 2D linear weights. Default
156
+ # 0 = GGML_TYPE_F32 (legacy). 8 = GGML_TYPE_Q8_0. Set
157
+ # via #set_weight_type before #realize_for to keep
158
+ # quantized weights quantized in memory.
159
+ :weight_type,
160
+ # P5.1+P5.2: KV cache dtype. 0 = F32 (legacy), 8 = Q8_0.
161
+ # `enable_kv_q8!` sets both to Q8_0; finer-grained
162
+ # control is reserved for future debugging. Per-position
163
+ # writes go through ggml_cpy which quantizes f32→Q8 at
164
+ # the destination view. P5.2 flipped V's layout to
165
+ # match K (`ne=[d_head, max_T]`, positions on ne1), so
166
+ # both write paths span contiguous d_head-vectors —
167
+ # block-aligned for Q8 at d_head=64 (=2 blocks of 32).
168
+ :kv_type_k, :kv_type_v,
169
+ # P4.1: opt into ggml_flash_attn_ext in the attention
170
+ # step (default false → existing scale→softmax→matmul
171
+ # triplet). When true, each Q head's attention is one
172
+ # fused kernel call. Backward NOT supported (flash_back
173
+ # aborts in vendored ggml), so this is INFERENCE only.
174
+ # Set via enable_flash_attn! BEFORE realize_for_*.
175
+ :use_flash_attn,
176
+ # M2.3: MoE flags. is_moe → replace SwiGLU FFN with the
177
+ # routed expert FFN (router → softmax → top_k → 3× mul_mat_id
178
+ # → silu·up → weighted sum). Set by detect_smollm2_flags
179
+ # when GGUF carries blk.0.ffn_gate_inp.weight.
180
+ :is_moe, :n_experts, :n_experts_used,
181
+ :gguf_handle_keepalive,
182
+ # F1.2: LoRA on Q projection. enable_lora_q!(r) sets
183
+ # both flags BEFORE realize. When enabled, each block
184
+ # gets per-Q-head trainable A/B adapter pairs spliced
185
+ # into the Q matmul: q_eff = w_q[h]@h + B[h]@A[h]@h.
186
+ :lora_q_enabled, :lora_q_rank,
187
+ # F1.2 step 6b: when true, realize_for_mmap also
188
+ # allocates persistent AdamW moments (m, v) for every
189
+ # LoRA-A/B pair in ctx_w. Required for multi-position
190
+ # SFT: between graph rebuilds the compute ctx is freed,
191
+ # so moments held there would be lost (NaN on cycle 2+).
192
+ :lora_q_adamw_enabled
193
+
194
+ def initialize
195
+ @realized = false
196
+ @max_T = 0
197
+ @d_model = 0
198
+ @d_ff = 0
199
+ @n_heads = 0
200
+ @n_kv = 0
201
+ @d_head = 0
202
+ @group_size = 0
203
+ @n_layers = 0
204
+ @vocab_size = 0
205
+ @rope_base = 10000.0
206
+ @rope_scaling = Toy::RopeScaling.none
207
+ @t_rope_freq_factors = TinyNN.tnn_null_ptr
208
+ @rms_eps = 1.0e-5
209
+ @sess = TinyNN.tnn_null_ptr
210
+ @t_token_embed = TinyNN.tnn_null_ptr
211
+ @t_final_norm_gamma = TinyNN.tnn_null_ptr
212
+ @t_output = TinyNN.tnn_null_ptr
213
+ @has_untied_output = false
214
+ @has_qkv_bias = false
215
+ @has_qk_norm = false
216
+ @qk_norm_kind = 0
217
+ @swa_window = 0
218
+ @has_post_norms = false
219
+ @embed_scale = 1.0
220
+ @attn_softcap = 0.0
221
+ @final_softcap = 0.0
222
+ @swa_alternates = false
223
+ @kv_blocks_ffi = [SmolLM2KVBlockFFI.new]
224
+ # CUDA-MIRROR-SKIP-BEGIN: trace-tap is CPU-only diagnostic
225
+ # --- trace-tap diagnostics (zero cost when off) ---
226
+ # When @trace_on is true, trace_tap() pushes (name, tensor) onto
227
+ # parallel arrays AND calls tnn_set_output so the scheduler keeps
228
+ # the tensor's buffer alive. After tnn_compute, dump_trace() walks
229
+ # the arrays, downloads each, and prints min/max/|mean|/nan stats.
230
+ # When off, trace_tap() is a single bool branch — the graph is
231
+ # unchanged from production.
232
+ @trace_on = false
233
+ @trace_names = [""]
234
+ @trace_names.pop
235
+ @trace_tensors = [TinyNN.tnn_null_ptr]
236
+ @trace_tensors.pop
237
+ # CUDA-MIRROR-SKIP-END
238
+ @weight_type = 0 # GGML_TYPE_F32; legacy default
239
+ @kv_type_k = 0 # GGML_TYPE_F32; opt in via enable_kv_q8!
240
+ @kv_type_v = 0 # GGML_TYPE_F32; opt in via enable_kv_q8!
241
+ @use_flash_attn = false # opt in via enable_flash_attn!
242
+ @is_moe = false
243
+ @n_experts = 0
244
+ @n_experts_used = 0
245
+ @gguf_handle_keepalive = TinyNN.tnn_null_ptr # set by realize_for_mmap
246
+ @lora_q_enabled = false
247
+ @lora_q_rank = 0
248
+ @lora_q_adamw_enabled = false
249
+ end
250
+
251
+ # P5.1: opt into Q8_0 storage for the K cache. Must be called BEFORE
252
+ # realize_for_mmap. V stays F32 in this phase — its layout
253
+ # (positions along ne0) makes per-position Q8 writes non-block-
254
+ # aligned. K's layout (positions along ne1, d_head along ne0)
255
+ # writes whole d_head-vectors at a time, which for d_head=64
256
+ # spans exactly 2 Q8_0 blocks of 32 elements each → aligned. The
257
+ # write path uses ggml_cpy which quantizes on f32→Q8 destination;
258
+ # the read path (attention matmul) dequantizes block-by-block
259
+ # inside ggml's kernel. Cuts K-cache memory & bandwidth ~4×.
260
+ # P5.1+P5.2: opt into Q8_0 for the K and V caches. Halves K and V
261
+ # memory + bandwidth (3.75× smaller at d_head=64).
262
+ #
263
+ # Auto-enables flash attention. Reason: the non-flash V matmul
264
+ # requires a transpose-cont of V_hist, which is structurally
265
+ # impossible for Q8_0 (transposing flips the d_head and hist_count
266
+ # axes; hist_count generally isn't a multiple of 32, so the
267
+ # contiguous Q8 destination can't be allocated). flash_attn_ext
268
+ # consumes V in its natural [d_head, hist_count] orientation,
269
+ # which dodges the transpose entirely — so Q8 V works there.
270
+ #
271
+ # Inference-only. flash_attn's backward aborts in vendored ggml.
272
+ def enable_kv_q8!
273
+ @kv_type_k = 8 # GGML_TYPE_Q8_0
274
+ @kv_type_v = 8
275
+ @use_flash_attn = true
276
+ end
277
+
278
+ # P4.1: opt into ggml_flash_attn_ext for inference. Per-Q-head it
279
+ # replaces the (scale → softmax → matmul) triplet with one fused
280
+ # call. The V cache stays in its current [max_T, d_head] layout —
281
+ # we transpose-materialize it per step (cheap; one ggml_cont). A
282
+ # future cleanup (P5.2) flips V's layout to remove the transpose
283
+ # and unlock V Q8.
284
+ #
285
+ # Backward is unsupported in vendored ggml (flash_attn_back aborts),
286
+ # so this path is INFERENCE only. Call BEFORE realize_for_mmap.
287
+ def enable_flash_attn!
288
+ @use_flash_attn = true
289
+ end
290
+
291
+ # M2.3: opt into the MoE FFN graph. Must be called BEFORE realize_for_mmap.
292
+ # n_experts is the total count in the GGUF; n_experts_used is the
293
+ # top-K routed per token. Mixtral-8x7B: enable_moe!(8, 2). Qwen3-30B-
294
+ # A3B: enable_moe!(128, 8) (with optional shared expert — not yet
295
+ # supported in this path).
296
+ def enable_moe!(n_experts, n_experts_used)
297
+ @is_moe = true
298
+ @n_experts = n_experts
299
+ @n_experts_used = n_experts_used
300
+ end
301
+
302
+ # F1.2: enable per-Q-head LoRA on this session's forward graph. Call
303
+ # BEFORE realize_for_mmap. Adapter A is (r, d_model), adapter B is
304
+ # (d_head, r); both trainable F32 tensors in ctx_w (not mmap'd, so
305
+ # writes survive). Standard LoRA init: A = small Gaussian, B = 0,
306
+ # which makes the adapter a no-op at step 0 (forward output ==
307
+ # baseline). Use upload_lora_zero!(seed) to set up that init.
308
+ def enable_lora_q!(r)
309
+ @lora_q_enabled = true
310
+ @lora_q_rank = r
311
+ end
312
+
313
+ # F1.2 step 6b: allocate persistent AdamW moments (m, v) alongside
314
+ # each LoRA-A/B pair, in ctx_w. Requires enable_lora_q!(...) to have
315
+ # been called first (so the rank is known). Call BEFORE
316
+ # realize_for_mmap. Without this, multi-position SFT loses Adam
317
+ # state at every graph rebuild and diverges to NaN.
318
+ def enable_lora_q_adamw!
319
+ @lora_q_adamw_enabled = true
320
+ end
321
+
322
+ # Phase 3 opt-in: set the ggml type used for 2D linear weights when
323
+ # realize_for runs. 0 = F32, 8 = Q8_0. Call BEFORE realize_for —
324
+ # the persistent tensors are allocated there.
325
+ def set_weight_type(t)
326
+ @weight_type = t
327
+ end
328
+
329
+ # Allocate one persistent 2D linear weight tensor at the configured
330
+ # type. Used by realize_for; keeps the Q8/F32 branch in one place.
331
+ # Non-2D-linear tensors (norms, biases, K/V cache, t_output) stay
332
+ # F32 even in Q8 mode — quantizing them costs accuracy with no
333
+ # compute saving.
334
+ def alloc_2d_w(rows, cols)
335
+ if @weight_type == 0
336
+ TinyNN.tnn_input_2d_f32_persistent(@sess, rows, cols)
337
+ else
338
+ TinyNN.tnn_input_2d_persistent_typed(@sess, rows, cols, @weight_type)
339
+ end
340
+ end
341
+
342
+ # Phase 2 BYO-pointer realization. Like realize_for but every
343
+ # GGUF-resident tensor (token_embed, norms, biases, all 2D linears,
344
+ # untied output) is allocated to POINT AT the file's mmap'd pages
345
+ # rather than copied into a backend buffer. Only K/V cache and the
346
+ # compute scratch live in backend-allocated memory. The kv_cache
347
+ # holds the GGUF handle so the mmap stays alive for its lifetime.
348
+ #
349
+ # Caller flow:
350
+ # gguf = TinyNN.tnn_gguf_load(path) # mmap'd, no_alloc
351
+ # flags = GGUFLoad.detect_smollm2_flags(path)
352
+ # wtype = GGUFLoad.detect_weight_type(path)
353
+ # kv = SmolLM2KVFFICache.new
354
+ # kv.realize_for_mmap(gguf, cfg, MAX_T, flags.untied, flags.qkv_bias)
355
+ # # weights are already in place; no load_weights call needed.
356
+ def realize_for_mmap(gguf_handle, cfg, max_T, untied, qkv_bias, qk_norm)
357
+ @max_T = max_T
358
+ @d_model = cfg.d_model
359
+ @d_ff = cfg.d_ff
360
+ @n_heads = cfg.n_heads
361
+ @n_kv = cfg.n_kv
362
+ @d_head = cfg.head_dim
363
+ @group_size = cfg.n_heads / cfg.n_kv
364
+ @n_layers = cfg.n_layers
365
+ @vocab_size = cfg.vocab
366
+ @rope_base = cfg.rope_base
367
+ @rope_scaling = cfg.rope_scaling
368
+ @rms_eps = cfg.rms_eps
369
+
370
+ @gguf_handle_keepalive = gguf_handle # prevent GC; mmap must outlive @sess
371
+ @sess = TinyNN.tnn_session_new(0)
372
+ @has_untied_output = untied
373
+ @has_qkv_bias = qkv_bias
374
+ @has_qk_norm = qk_norm
375
+ # #110: if caller didn't pre-set qk_norm_kind via the
376
+ # attr_accessor, default to 1 (per-head shared) for backward
377
+ # compat with the Qwen3 detection that established the qk_norm
378
+ # path. Models that want full-Q (OLMoE / Granite) must set
379
+ # kv.qk_norm_kind = 2 BEFORE calling realize_for_mmap.
380
+ if @has_qk_norm && @qk_norm_kind == 0
381
+ @qk_norm_kind = 1
382
+ end
383
+
384
+ # llama3 / LongRoPE: allocate the (d_head/2)-elem freq_factors
385
+ # tensor in ctx_w before finalize_weights. We compute and upload
386
+ # the values after finalize (see below). For all other rope_scaling
387
+ # kinds the FFI call still needs a pointer — pass tnn_null_ptr.
388
+ if @rope_scaling.kind == :llama3
389
+ @t_rope_freq_factors = TinyNN.tnn_rope_freq_factors_alloc(@sess, @d_head)
390
+ else
391
+ @t_rope_freq_factors = TinyNN.tnn_null_ptr
392
+ end
393
+
394
+ # Wire the GGUF's mmap region into the session as the source of
395
+ # weight bytes. Subsequent tnn_input_*_persistent_mmap calls
396
+ # allocate tensors with .data inside this region — no copy.
397
+ map_base = TinyNN.tnn_gguf_mmap_base(gguf_handle)
398
+ map_size = TinyNN.tnn_gguf_mmap_size(gguf_handle)
399
+ TinyNN.tnn_session_attach_weight_mmap(@sess, map_base, map_size)
400
+
401
+ # toy#gguf-checkpoint-reload (#153) — from-scratch checkpoints
402
+ # written by ToyGGUFWriter store one tensor per head
403
+ # (blk.N.attn_q.head_H.weight) rather than the fused llama.cpp
404
+ # shape. Detect via the head_0 sentinel; the per-Q-head/K/V
405
+ # loaders below branch on it.
406
+ @per_head_attn = TinyNN.tnn_gguf_find_index(gguf_handle, "blk.0.attn_q.head_0.weight") >= 0
407
+ if @per_head_attn
408
+ puts " per-head tensors detected (toy from-scratch checkpoint)"
409
+ end
410
+
411
+ # Globals — embeddings + final norm + optional untied output.
412
+ eidx = TinyNN.tnn_gguf_find_index(gguf_handle, "token_embd.weight")
413
+ eoff = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, eidx)
414
+ etyp = TinyNN.tnn_gguf_tensor_type(gguf_handle, eidx)
415
+ @t_token_embed = TinyNN.tnn_input_2d_persistent_mmap(@sess,
416
+ @vocab_size, @d_model, etyp, eoff)
417
+
418
+ fnidx = TinyNN.tnn_gguf_find_index(gguf_handle, "output_norm.weight")
419
+ fnoff = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, fnidx)
420
+ @t_final_norm_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess,
421
+ @d_model, 0, fnoff) # 0 = GGML_TYPE_F32
422
+
423
+ if untied
424
+ oidx = TinyNN.tnn_gguf_find_index(gguf_handle, "output.weight")
425
+ ooff = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, oidx)
426
+ otyp = TinyNN.tnn_gguf_tensor_type(gguf_handle, oidx)
427
+ @t_output = TinyNN.tnn_input_2d_persistent_mmap(@sess,
428
+ @vocab_size, @d_model, otyp, ooff)
429
+ end
430
+
431
+ @kv_blocks_ffi = [SmolLM2KVBlockFFI.new]
432
+ li = 1
433
+ while li < @n_layers
434
+ @kv_blocks_ffi.push(SmolLM2KVBlockFFI.new)
435
+ li = li + 1
436
+ end
437
+
438
+ li = 0
439
+ while li < @n_layers
440
+ blk = @kv_blocks_ffi[li]
441
+ prefix = "blk." + li.to_s
442
+
443
+ # Norms — 1D F32 mmap'd directly.
444
+ rn1_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_norm.weight")
445
+ rn2_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_norm.weight")
446
+ blk.t_rn1_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
447
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, rn1_idx))
448
+ blk.t_rn2_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
449
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, rn2_idx))
450
+
451
+ # I-Gemma (#113): post-attention and post-FFN RMSNorm gammas
452
+ # (Gemma 2 sandwiches each sublayer between pre+post norms).
453
+ # Tensor names: blk.X.post_attention_norm.weight, blk.X.post_ffw_norm.weight.
454
+ if @has_post_norms
455
+ pa_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".post_attention_norm.weight")
456
+ pf_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".post_ffw_norm.weight")
457
+ blk.t_post_attn_norm_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
458
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, pa_idx))
459
+ blk.t_post_ffn_norm_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
460
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, pf_idx))
461
+ end
462
+
463
+ # M1 + #110: QK-norm gammas. Two flavors detected via shape:
464
+ # kind=1: Qwen3 — gamma shape [d_head], shared across heads.
465
+ # kind=2: OLMoE / Granite — gamma shape [d_model], applied to
466
+ # the full Q before head split. Allocate the full
467
+ # [d_model] tensor; the graph builder either does a
468
+ # full-Q rms_norm OR views per-head d_head slices.
469
+ gamma_nelems = (@qk_norm_kind == 2) ? @d_model : @d_head
470
+ if @has_qk_norm
471
+ qn_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q_norm.weight")
472
+ kn_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k_norm.weight")
473
+ blk.t_q_norm_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, gamma_nelems, 0,
474
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, qn_idx))
475
+ # K norm follows the same flavor as Q.
476
+ k_gamma_nelems = (@qk_norm_kind == 2) ? (@n_kv * @d_head) : @d_head
477
+ blk.t_k_norm_gamma = TinyNN.tnn_input_1d_persistent_mmap(@sess, k_gamma_nelems, 0,
478
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, kn_idx))
479
+ end
480
+
481
+ # Q per-head — two layouts:
482
+ # 1) Fused (llama.cpp): single attn_q.weight tensor; each head
483
+ # is a contiguous slice at offset q_base + h * head_nbytes.
484
+ # 2) Per-head (toy from-scratch ckpt, #153): each head has its
485
+ # own attn_q.head_H.weight tensor with its own file offset.
486
+ if @per_head_attn
487
+ q0_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.head_0.weight")
488
+ q0_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, q0_idx)
489
+ blk.t_w_q = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
490
+ @d_head, @d_model, q0_type,
491
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, q0_idx))]
492
+ hq = 1
493
+ while hq < @n_heads
494
+ qh_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.head_" + hq.to_s + ".weight")
495
+ blk.t_w_q.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
496
+ @d_head, @d_model, q0_type,
497
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, qh_idx)))
498
+ hq = hq + 1
499
+ end
500
+ else
501
+ q_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.weight")
502
+ q_off_base = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, q_idx)
503
+ q_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, q_idx)
504
+ q_stride = head_nbytes(q_type, @d_head, @d_model)
505
+ blk.t_w_q = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
506
+ @d_head, @d_model, q_type, q_off_base)]
507
+ hq = 1
508
+ while hq < @n_heads
509
+ blk.t_w_q.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
510
+ @d_head, @d_model, q_type,
511
+ q_off_base + hq * q_stride))
512
+ hq = hq + 1
513
+ end
514
+ end
515
+
516
+ # F1.2: per-Q-head LoRA adapter slots. F32-only, allocated in
517
+ # ctx_w (trainable, not mmap'd). A: (r, d_model). B: (d_head, r).
518
+ # Standard init (A small Gaussian + B zero) makes the adapter
519
+ # equal to zero at step 0 → forward output matches the base
520
+ # model exactly. Caller seeds via upload_lora_q_init!(seed).
521
+ if @lora_q_enabled
522
+ blk.t_w_lora_a_q = [TinyNN.tnn_input_2d_f32_persistent(@sess,
523
+ @lora_q_rank, @d_model)]
524
+ blk.t_w_lora_b_q = [TinyNN.tnn_input_2d_f32_persistent(@sess,
525
+ @d_head, @lora_q_rank)]
526
+ hq = 1
527
+ while hq < @n_heads
528
+ blk.t_w_lora_a_q.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
529
+ @lora_q_rank, @d_model))
530
+ blk.t_w_lora_b_q.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
531
+ @d_head, @lora_q_rank))
532
+ hq = hq + 1
533
+ end
534
+
535
+ # F1.2 step 6b: persistent AdamW moments paired with the LoRA
536
+ # adapter tensors above. Same shapes. Live in ctx_w so they
537
+ # survive tnn_reset_for_rebuild across multi-position SFT.
538
+ if @lora_q_adamw_enabled
539
+ blk.t_w_lora_a_q_m = [TinyNN.tnn_input_2d_f32_persistent(@sess,
540
+ @lora_q_rank, @d_model)]
541
+ blk.t_w_lora_a_q_v = [TinyNN.tnn_input_2d_f32_persistent(@sess,
542
+ @lora_q_rank, @d_model)]
543
+ blk.t_w_lora_b_q_m = [TinyNN.tnn_input_2d_f32_persistent(@sess,
544
+ @d_head, @lora_q_rank)]
545
+ blk.t_w_lora_b_q_v = [TinyNN.tnn_input_2d_f32_persistent(@sess,
546
+ @d_head, @lora_q_rank)]
547
+ hqm = 1
548
+ while hqm < @n_heads
549
+ blk.t_w_lora_a_q_m.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
550
+ @lora_q_rank, @d_model))
551
+ blk.t_w_lora_a_q_v.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
552
+ @lora_q_rank, @d_model))
553
+ blk.t_w_lora_b_q_m.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
554
+ @d_head, @lora_q_rank))
555
+ blk.t_w_lora_b_q_v.push(TinyNN.tnn_input_2d_f32_persistent(@sess,
556
+ @d_head, @lora_q_rank))
557
+ hqm = hqm + 1
558
+ end
559
+ end
560
+ end
561
+
562
+ # K, V per-kv-head — same dual-layout split (#153).
563
+ if @per_head_attn
564
+ k0_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.head_0.weight")
565
+ v0_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.head_0.weight")
566
+ k_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, k0_idx)
567
+ v_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, v0_idx)
568
+ blk.t_w_k = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
569
+ @d_head, @d_model, k_type,
570
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, k0_idx))]
571
+ blk.t_w_v = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
572
+ @d_head, @d_model, v_type,
573
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, v0_idx))]
574
+ k_stride = 0 # unused in per-head branch but referenced later
575
+ v_stride = 0
576
+ else
577
+ k_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.weight")
578
+ v_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.weight")
579
+ k_off_base = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, k_idx)
580
+ v_off_base = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, v_idx)
581
+ k_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, k_idx)
582
+ v_type = TinyNN.tnn_gguf_tensor_type(gguf_handle, v_idx)
583
+ k_stride = head_nbytes(k_type, @d_head, @d_model)
584
+ v_stride = head_nbytes(v_type, @d_head, @d_model)
585
+ blk.t_w_k = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
586
+ @d_head, @d_model, k_type, k_off_base)]
587
+ blk.t_w_v = [TinyNN.tnn_input_2d_persistent_mmap(@sess,
588
+ @d_head, @d_model, v_type, v_off_base)]
589
+ end
590
+ # P5.1+P5.2: K and V allocs both follow @kv_type_*. Layout is
591
+ # `ne=[d_head, max_T]` for both — positions on ne1, d_head on
592
+ # ne0. Per-position writes span a contiguous d_head-vector
593
+ # which is Q8-block-aligned at d_head=64 (=2 blocks of 32).
594
+ # See the struct comment on :kv_type_k / :kv_type_v.
595
+ if @kv_type_k == 8
596
+ blk.t_K = [TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8)]
597
+ else
598
+ blk.t_K = [TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, @d_head)]
599
+ end
600
+ if @kv_type_v == 8
601
+ blk.t_V = [TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8)]
602
+ else
603
+ blk.t_V = [TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, @d_head)]
604
+ end
605
+ hkv = 1
606
+ while hkv < @n_kv
607
+ if @per_head_attn
608
+ kh_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.head_" + hkv.to_s + ".weight")
609
+ vh_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.head_" + hkv.to_s + ".weight")
610
+ blk.t_w_k.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
611
+ @d_head, @d_model, k_type,
612
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, kh_idx)))
613
+ blk.t_w_v.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
614
+ @d_head, @d_model, v_type,
615
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, vh_idx)))
616
+ else
617
+ blk.t_w_k.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
618
+ @d_head, @d_model, k_type,
619
+ k_off_base + hkv * k_stride))
620
+ blk.t_w_v.push(TinyNN.tnn_input_2d_persistent_mmap(@sess,
621
+ @d_head, @d_model, v_type,
622
+ v_off_base + hkv * v_stride))
623
+ end
624
+ if @kv_type_k == 8
625
+ blk.t_K.push(TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8))
626
+ else
627
+ blk.t_K.push(TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, @d_head))
628
+ end
629
+ if @kv_type_v == 8
630
+ blk.t_V.push(TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8))
631
+ else
632
+ blk.t_V.push(TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, @d_head))
633
+ end
634
+ hkv = hkv + 1
635
+ end
636
+
637
+ # Q/K/V biases — 1D F32 per head, contiguous in the file.
638
+ if qkv_bias
639
+ qb_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.bias")
640
+ kb_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.bias")
641
+ vb_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.bias")
642
+ qb_off = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, qb_idx)
643
+ kb_off = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, kb_idx)
644
+ vb_off = TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, vb_idx)
645
+ bias_stride = @d_head * 4 # f32
646
+
647
+ blk.t_b_q = [TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, qb_off)]
648
+ hq = 1
649
+ while hq < @n_heads
650
+ blk.t_b_q.push(TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
651
+ qb_off + hq * bias_stride))
652
+ hq = hq + 1
653
+ end
654
+
655
+ blk.t_b_k = [TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, kb_off)]
656
+ blk.t_b_v = [TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, vb_off)]
657
+ hkv = 1
658
+ while hkv < @n_kv
659
+ blk.t_b_k.push(TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
660
+ kb_off + hkv * bias_stride))
661
+ blk.t_b_v.push(TinyNN.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
662
+ vb_off + hkv * bias_stride))
663
+ hkv = hkv + 1
664
+ end
665
+ end
666
+
667
+ # O / FFN — full 2D weights, no per-head slicing.
668
+ o_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".attn_output.weight")
669
+ # M1.1: o_proj maps [n_heads * d_head] → [d_model]. For models
670
+ # where d_head = d_model / n_heads (SmolLM2 / Llama / Qwen2.5)
671
+ # these are equal; for Qwen3 with explicit head_dim=128 they
672
+ # differ (n_heads * d_head = 2048, d_model = 1024).
673
+ blk.t_w_o = TinyNN.tnn_input_2d_persistent_mmap(@sess, @d_model, @n_heads * @d_head,
674
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, o_idx),
675
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, o_idx))
676
+
677
+ if @is_moe
678
+ # M2.3: MoE FFN. Per-expert weight matrices are stacked along
679
+ # ne2 in the GGUF (llama.cpp convention):
680
+ # ffn_gate_inp.weight : ne=[d_model, n_experts]
681
+ # ffn_gate_exps.weight: ne=[d_model, d_ff, n_experts]
682
+ # ffn_up_exps.weight : ne=[d_model, d_ff, n_experts]
683
+ # ffn_down_exps.weight: ne=[d_ff, d_model, n_experts]
684
+ # All mmap'd in place — Mixtral-8x7B Q4_K_M (26 GB) loads without
685
+ # any RAM copy.
686
+ router_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate_inp.weight")
687
+ gate_exps_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate_exps.weight")
688
+ up_exps_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_up_exps.weight")
689
+ down_exps_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_down_exps.weight")
690
+ # #112 (RESOLVED): K-quant MoE experts work. The old warning here
691
+ # blamed ggml's mul_mat_id kernel for the OLMoE-Q4_K_M corruption,
692
+ # but the op was always correct for K-quants (verified by op-level
693
+ # and real-bytes reproducers in tinynn/ggml1506_*). The actual bug
694
+ # was head_nbytes() returning 0 for K-quant ATTENTION weights,
695
+ # collapsing every head onto head 0 — fixed there. K-quant expert
696
+ # stacks (gate/up/down, including OLMoE's mixed q4_K+q6_K down_exps)
697
+ # load and run coherently. See docs/notes/mul_mat_id_quants.md.
698
+ blk.t_w_router = TinyNN.tnn_input_2d_persistent_mmap(@sess,
699
+ @n_experts, @d_model,
700
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, router_idx),
701
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, router_idx))
702
+ blk.t_w_gate_exps = TinyNN.tnn_input_3d_persistent_mmap(@sess,
703
+ @d_model, @d_ff, @n_experts,
704
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, gate_exps_idx),
705
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, gate_exps_idx))
706
+ blk.t_w_up_exps = TinyNN.tnn_input_3d_persistent_mmap(@sess,
707
+ @d_model, @d_ff, @n_experts,
708
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, up_exps_idx),
709
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, up_exps_idx))
710
+ blk.t_w_down_exps = TinyNN.tnn_input_3d_persistent_mmap(@sess,
711
+ @d_ff, @d_model, @n_experts,
712
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, down_exps_idx),
713
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, down_exps_idx))
714
+ else
715
+ gate_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate.weight")
716
+ up_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_up.weight")
717
+ down_idx = TinyNN.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_down.weight")
718
+ blk.t_w_gate = TinyNN.tnn_input_2d_persistent_mmap(@sess, @d_ff, @d_model,
719
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, gate_idx),
720
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, gate_idx))
721
+ blk.t_w_up = TinyNN.tnn_input_2d_persistent_mmap(@sess, @d_ff, @d_model,
722
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, up_idx),
723
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, up_idx))
724
+ blk.t_w_down = TinyNN.tnn_input_2d_persistent_mmap(@sess, @d_model, @d_ff,
725
+ TinyNN.tnn_gguf_tensor_type(gguf_handle, down_idx),
726
+ TinyNN.tnn_gguf_tensor_file_offset(gguf_handle, down_idx))
727
+ end
728
+
729
+ li = li + 1
730
+ end
731
+
732
+ # F1.2: mark LoRA tensors as trainable BEFORE finalize_weights.
733
+ # set_param flips a flag on the tensor; the build_backward pass
734
+ # later walks PARAM-flagged nodes to emit grad nodes. Doing it
735
+ # here (rather than in the smoke) keeps the cache class as the
736
+ # single source of truth for what's trainable in a session.
737
+ if @lora_q_enabled
738
+ li2 = 0
739
+ while li2 < @n_layers
740
+ blk2 = @kv_blocks_ffi[li2]
741
+ hq = 0
742
+ while hq < @n_heads
743
+ TinyNN.tnn_set_param(blk2.t_w_lora_a_q[hq])
744
+ TinyNN.tnn_set_param(blk2.t_w_lora_b_q[hq])
745
+ hq = hq + 1
746
+ end
747
+ li2 = li2 + 1
748
+ end
749
+ end
750
+
751
+ # Finalize the regular persistent context (K/V cache buffers).
752
+ # Mmap'd tensors don't need finalization — they were allocated
753
+ # against weights_buf_mmap inline.
754
+ TinyNN.tnn_finalize_weights(@sess)
755
+
756
+ # Upload llama3-style RoPE freq_factors once the backend buffer
757
+ # for @t_rope_freq_factors exists (post-finalize). The values are
758
+ # a per-model constant — never re-uploaded across rebuild cycles.
759
+ if @rope_scaling.kind == :llama3
760
+ ff = Toy::RopeScaling.compute_llama3_freq_factors(
761
+ @d_head, @rope_base,
762
+ @rope_scaling.orig_max_pos, @rope_scaling.factor,
763
+ @rope_scaling.low_freq_factor, @rope_scaling.high_freq_factor)
764
+ TinyNN.tnn_upload_from_float_array(@sess, @t_rope_freq_factors,
765
+ ff, ff.length)
766
+ end
767
+
768
+ # F1.2 step 6b: zero-init persistent Adam moments. AdamW's update
769
+ # rule assumes m = v = 0 at step 0 (otherwise the first step picks
770
+ # up garbage from the buffer). The bias-correction term beta1h/beta2h
771
+ # then ramps in as the moments accumulate.
772
+ if @lora_q_adamw_enabled
773
+ za = Mat.new(@lora_q_rank, @d_model)
774
+ zb = Mat.new(@d_head, @lora_q_rank)
775
+ i = 0
776
+ while i < @lora_q_rank * @d_model; za.flat[i] = 0.0; i = i + 1; end
777
+ j = 0
778
+ while j < @d_head * @lora_q_rank; zb.flat[j] = 0.0; j = j + 1; end
779
+ li_z = 0
780
+ while li_z < @n_layers
781
+ blk_z = @kv_blocks_ffi[li_z]
782
+ hqz = 0
783
+ while hqz < @n_heads
784
+ TinyNN.upload_row_major(@sess, blk_z.t_w_lora_a_q_m[hqz], za)
785
+ TinyNN.upload_row_major(@sess, blk_z.t_w_lora_a_q_v[hqz], za)
786
+ TinyNN.upload_row_major(@sess, blk_z.t_w_lora_b_q_m[hqz], zb)
787
+ TinyNN.upload_row_major(@sess, blk_z.t_w_lora_b_q_v[hqz], zb)
788
+ hqz = hqz + 1
789
+ end
790
+ li_z = li_z + 1
791
+ end
792
+ end
793
+
794
+ # Zero-init K/V cache buffers (same as realize_for + legacy load).
795
+ # P5.1: skip K zero-init when K is Q8_0. upload_row_major writes
796
+ # F32 row-major bytes which would corrupt a Q8 tensor's quantization
797
+ # blocks. The K cache is read only at positions [0, pos+1], and
798
+ # every position is written before it's read, so unset trailing
799
+ # positions are never observed — zero-init is paranoia and safe
800
+ # to skip for Q8. P5.2 flipped V to mirror K's layout, so V's
801
+ # zero-init Mat now has the same shape as K's, and the same Q8
802
+ # skip rule applies.
803
+ kv_zero = Mat.new(max_T, @d_head)
804
+ li = 0
805
+ while li < @n_layers
806
+ blk_f = @kv_blocks_ffi[li]
807
+ hkv = 0
808
+ while hkv < @n_kv
809
+ if @kv_type_k != 8
810
+ TinyNN.upload_row_major(@sess, blk_f.t_K[hkv], kv_zero)
811
+ end
812
+ if @kv_type_v != 8
813
+ TinyNN.upload_row_major(@sess, blk_f.t_V[hkv], kv_zero)
814
+ end
815
+ hkv = hkv + 1
816
+ end
817
+ li = li + 1
818
+ end
819
+
820
+ @realized = true
821
+ end
822
+
823
+ # Auto-dispatch: open the GGUF, peek at its `toy.ggml_native` flag,
824
+ # and route to either the BYO-pointer mmap path (Phase 2) or the
825
+ # legacy realize_for + load_weights copy path. Returns the GGUF
826
+ # handle (or null for the legacy path); the kv_cache holds it via
827
+ # @gguf_handle_keepalive so the mmap stays valid for inference.
828
+ #
829
+ # Caller must have `require_relative "toy/models/toy_smollm2_loader"` at the
830
+ # top-level driver — this file deliberately does NOT require it
831
+ # (require-order with GGUFLoad's methods that touch `weight_type`
832
+ # was triggering a Spinel GC crash in decode_step).
833
+ # F1.2: standard LoRA init for the Q adapters. A = small Gaussian
834
+ # (scale = init_scale, default 0.01); B = zero. With B=0 the LoRA
835
+ # contribution is exactly zero, so forward output matches the base
836
+ # model bit-for-bit at step 0. Call AFTER realize_for_mmap.
837
+ def upload_lora_q_init!(seed, init_scale)
838
+ if !@lora_q_enabled; return; end
839
+ s = seed
840
+ m_a = Mat.new(@lora_q_rank, @d_model)
841
+ m_b = Mat.new(@d_head, @lora_q_rank)
842
+ z_b = m_b
843
+ i_b = 0
844
+ while i_b < @d_head * @lora_q_rank
845
+ z_b.flat[i_b] = 0.0
846
+ i_b = i_b + 1
847
+ end
848
+ li = 0
849
+ while li < @n_layers
850
+ blk = @kv_blocks_ffi[li]
851
+ hq = 0
852
+ while hq < @n_heads
853
+ # Per-(layer, head) Gaussian for A via Box-Muller on an LCG.
854
+ ii = 0
855
+ while ii < @lora_q_rank * @d_model
856
+ s = (s * 1103515245 + 12345) & 0x7FFFFFFF
857
+ u1 = (s.to_f + 1.0) / 2147483648.0
858
+ s = (s * 1103515245 + 12345) & 0x7FFFFFFF
859
+ u2 = (s.to_f + 1.0) / 2147483648.0
860
+ m_a.flat[ii] = init_scale * Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math::PI * u2)
861
+ ii = ii + 1
862
+ end
863
+ TinyNN.upload_row_major(@sess, blk.t_w_lora_a_q[hq], m_a)
864
+ TinyNN.upload_row_major(@sess, blk.t_w_lora_b_q[hq], z_b)
865
+ hq = hq + 1
866
+ end
867
+ li = li + 1
868
+ end
869
+ end
870
+
871
+ def realize_and_load_auto(gguf_path, max_T, cfg, flags)
872
+ gguf = TinyNN.tnn_gguf_load(gguf_path)
873
+ is_native = TinyNN.tnn_gguf_get_bool(gguf, "toy.ggml_native") == 1
874
+ if is_native
875
+ wtype = GGUFLoad.detect_weight_type(gguf_path)
876
+ set_weight_type(wtype)
877
+ realize_for_mmap(gguf, cfg, max_T, flags.untied, flags.qkv_bias)
878
+ puts " BYO-pointer mmap (weight_type=" + wtype.to_s + ")"
879
+ gguf
880
+ else
881
+ TinyNN.tnn_gguf_free(gguf)
882
+ realize_for(max_T, cfg.d_model, cfg.d_ff,
883
+ cfg.n_heads, cfg.n_kv,
884
+ cfg.n_layers, cfg.vocab,
885
+ cfg.rope_base, cfg.rms_eps,
886
+ flags.untied, flags.qkv_bias)
887
+ load_weights(gguf_path)
888
+ puts " legacy copy load"
889
+ TinyNN.tnn_null_ptr
890
+ end
891
+ end
892
+
893
+ # Per-head byte stride for slicing a full [n_heads*d_head, d_model]
894
+ # tensor into n_heads contiguous Dh×D blocks. A per-head slice is
895
+ # d_head rows of d_model elements, so the stride is d_head row-sizes.
896
+ #
897
+ # tnn_row_size delegates to ggml_row_size, which is correct for EVERY
898
+ # type — F32, Q8_0, and the K-quants (Q4_K/Q5_K/Q6_K). The previous
899
+ # hand-coded F32/Q8_0-only branches returned 0 for any other type,
900
+ # which silently made the per-head offset `off_base + hq*0 == off_base`
901
+ # — i.e. every attention head read head 0's weight slice. That
902
+ # collapsed multi-head attention on K-quant MoE models (forced down the
903
+ # realize_for_mmap path), compounding across layers into degenerate
904
+ # output. This was misdiagnosed as a ggml mul_mat_id K-quant bug
905
+ # (ggml#1506); it was ours. Block alignment holds because each row is a
906
+ # whole number of quant blocks (requires d_model % block == 0, which
907
+ # the per-head tnn_input_2d_persistent_mmap also enforces via ne0).
908
+ def head_nbytes(ggml_type, d_head, d_model)
909
+ rs = TinyNN.tnn_row_size(ggml_type, d_model)
910
+ if rs <= 0
911
+ # Fail loud per the never-mask rule: a 0 stride would collapse all
912
+ # heads. tnn_row_size only returns 0 on a bad type/shape.
913
+ puts "FATAL: head_nbytes got row_size<=0 for ggml_type=" +
914
+ ggml_type.to_s + " d_model=" + d_model.to_s +
915
+ " — per-head attention stride would collapse. Aborting."
916
+ exit 1
917
+ end
918
+ d_head * rs
919
+ end
920
+
921
+ # CUDA-MIRROR-SKIP-BEGIN: trace-tap diagnostic ivars are CPU-only;
922
+ # the CUDA mirror gets a no-op stub for trace_tap and an empty
923
+ # dump_trace + enable_trace! so callers don't have to backend-switch.
924
+ # CUDA-MIRROR-STUB: def enable_trace!; end
925
+ # CUDA-MIRROR-STUB: def trace_tap(_name, t); t; end
926
+ # CUDA-MIRROR-STUB: def dump_trace; end
927
+ def enable_trace!
928
+ @trace_on = true
929
+ end
930
+
931
+ # Insert a tap at a named point in the graph. Returns `t` unchanged
932
+ # so callers can write `t = trace_tap("L0.rn1", t)` inline. With
933
+ # tracing off this just returns t; with tracing on it also pushes
934
+ # the (name, tensor) pair and marks the tensor as a scheduler output.
935
+ def trace_tap(name_, t)
936
+ if @trace_on
937
+ @trace_names.push(name_)
938
+ @trace_tensors.push(t)
939
+ TinyNN.tnn_set_output(t)
940
+ end
941
+ t
942
+ end
943
+
944
+ # Walk the captured taps after compute. Resets the arrays at the
945
+ # end so the next decode_step starts fresh.
946
+ def dump_trace
947
+ if !@trace_on
948
+ return
949
+ end
950
+ i = 0
951
+ total = @trace_names.length
952
+ while i < total
953
+ nm = @trace_names[i]
954
+ t = @trace_tensors[i]
955
+ n = TinyNN.tnn_tensor_nelements(t)
956
+ TinyNN.tnn_download(@sess, t)
957
+ mn = TinyNN.tnn_scratch_min_f32(@sess, n)
958
+ mx = TinyNN.tnn_scratch_max_f32(@sess, n)
959
+ sa = TinyNN.tnn_scratch_sum_abs_f32(@sess, n)
960
+ nan = TinyNN.tnn_scratch_nan_count_f32(@sess, n)
961
+ mean_abs = sa / n.to_f
962
+ puts " " + nm.ljust(24) + " n=" + n.to_s.rjust(6) +
963
+ " min=" + mn.to_s +
964
+ " max=" + mx.to_s +
965
+ " |mean|=" + mean_abs.to_s +
966
+ " nan=" + nan.to_s
967
+ i = i + 1
968
+ end
969
+ # Reset for the next decode_step. Spinel-friendly: pop everything
970
+ # rather than reassign the ivar.
971
+ while @trace_names.length > 0
972
+ @trace_names.pop
973
+ end
974
+ while @trace_tensors.length > 0
975
+ @trace_tensors.pop
976
+ end
977
+ end
978
+ # CUDA-MIRROR-SKIP-END
979
+
980
+ # Ruby-OO entry point for "load weights into this realized cache."
981
+ # Auto-detects layout: GGUFs with the `toy.ggml_native` metadata key
982
+ # take the memcpy path (no transpose); legacy GGUFs take the
983
+ # transposing path. Callers stay layout-agnostic.
984
+ def load_weights(path)
985
+ GGUFLoad.load_kv_cache_auto(self, path)
986
+ end
987
+
988
+ # Pull any persistent FFI tensor back to a Ruby Mat (chunked download,
989
+ # works for weight-sized tensors). Required by the design rule that
990
+ # the direct-loader path must keep Mat-roundtrip open — see
991
+ # docs/loader-api.md.
992
+ #
993
+ # `t` is any tensor handle exposed on this cache or its blocks
994
+ # (e.g. `kv.t_token_embed`, `kv.kv_blocks_ffi[3].t_w_o`). `rows` and
995
+ # `cols` are the logical shape; we trust the caller.
996
+ def read_persistent_mat(t, rows, cols)
997
+ TinyNN.download_to_mat(@sess, t, rows, cols)
998
+ end
999
+
1000
+ # Declare every persistent tensor (weights + K/V buffers) and finalize.
1001
+ # `untied` is true for TinyLlama-shape models that have a separate
1002
+ # `output.weight` (lm_head); false for SmolLM2 / Qwen2.5 with tied
1003
+ # embeddings. When false we skip the (vocab × d_model) t_output
1004
+ # allocation entirely. `qkv_bias` is true for Qwen2.x; when false the
1005
+ # b_q/b_k/b_v tensors aren't allocated and Q/K/V matmuls land
1006
+ # without an add.
1007
+ def realize_for(max_T, d_model, d_ff, n_heads, n_kv, n_layers,
1008
+ vocab_size, rope_base, rms_eps, untied, qkv_bias)
1009
+ @max_T = max_T
1010
+ @d_model = d_model
1011
+ @d_ff = d_ff
1012
+ @n_heads = n_heads
1013
+ @n_kv = n_kv
1014
+ @d_head = d_model / n_heads
1015
+ @group_size = n_heads / n_kv
1016
+ @n_layers = n_layers
1017
+ @vocab_size = vocab_size
1018
+ @rope_base = rope_base
1019
+ @rms_eps = rms_eps
1020
+
1021
+ @sess = TinyNN.tnn_session_new(0)
1022
+ @t_token_embed = TinyNN.tnn_input_2d_f32_persistent(@sess, vocab_size, d_model)
1023
+ @t_final_norm_gamma = TinyNN.tnn_input_1d_f32_persistent(@sess, d_model)
1024
+ @has_untied_output = untied
1025
+ @has_qkv_bias = qkv_bias
1026
+ if untied
1027
+ @t_output = alloc_2d_w(vocab_size, d_model)
1028
+ end
1029
+
1030
+ @kv_blocks_ffi = [SmolLM2KVBlockFFI.new]
1031
+ li = 1
1032
+ while li < n_layers
1033
+ @kv_blocks_ffi.push(SmolLM2KVBlockFFI.new)
1034
+ li = li + 1
1035
+ end
1036
+
1037
+ li = 0
1038
+ while li < n_layers
1039
+ blk = @kv_blocks_ffi[li]
1040
+ blk.t_rn1_gamma = TinyNN.tnn_input_1d_f32_persistent(@sess, d_model)
1041
+ blk.t_rn2_gamma = TinyNN.tnn_input_1d_f32_persistent(@sess, d_model)
1042
+
1043
+ # Q: n_heads per-head matrices of (d_head, d_model). Quantizable.
1044
+ blk.t_w_q = [alloc_2d_w(d_head, d_model)]
1045
+ if qkv_bias
1046
+ blk.t_b_q = [TinyNN.tnn_input_1d_f32_persistent(@sess, d_head)]
1047
+ end
1048
+ hq = 1
1049
+ while hq < n_heads
1050
+ blk.t_w_q.push(alloc_2d_w(d_head, d_model))
1051
+ if qkv_bias
1052
+ blk.t_b_q.push(TinyNN.tnn_input_1d_f32_persistent(@sess, d_head))
1053
+ end
1054
+ hq = hq + 1
1055
+ end
1056
+
1057
+ # K, V (and the persistent K/V buffers): n_kv per-head. Linear
1058
+ # weights quantizable; K/V cache buffers follow @kv_type_*
1059
+ # (P5.1 K, P5.2 V); biases stay F32.
1060
+ blk.t_w_k = [alloc_2d_w(d_head, d_model)]
1061
+ blk.t_w_v = [alloc_2d_w(d_head, d_model)]
1062
+ # P5.1: Q8 K alloc when enabled (see realize_for_mmap parallel path).
1063
+ if @kv_type_k == 8
1064
+ blk.t_K = [TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8)]
1065
+ else
1066
+ blk.t_K = [TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, d_head)]
1067
+ end
1068
+ # P5.2: V now mirrors K's layout (ne=[d_head, max_T]).
1069
+ if @kv_type_v == 8
1070
+ blk.t_V = [TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8)]
1071
+ else
1072
+ blk.t_V = [TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, d_head)]
1073
+ end
1074
+ if qkv_bias
1075
+ # K bias: 1-D (broadcasts over [d_head, 1] k matmul result).
1076
+ # V bias: 1-D too (the V matmul is now ordered weight-first, so
1077
+ # its result is [d_head, 1] like K — matches a 1-D bias).
1078
+ blk.t_b_k = [TinyNN.tnn_input_1d_f32_persistent(@sess, d_head)]
1079
+ blk.t_b_v = [TinyNN.tnn_input_1d_f32_persistent(@sess, d_head)]
1080
+ end
1081
+ hkv = 1
1082
+ while hkv < n_kv
1083
+ blk.t_w_k.push(alloc_2d_w(d_head, d_model))
1084
+ blk.t_w_v.push(alloc_2d_w(d_head, d_model))
1085
+ if @kv_type_k == 8
1086
+ blk.t_K.push(TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8))
1087
+ else
1088
+ blk.t_K.push(TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, d_head))
1089
+ end
1090
+ if @kv_type_v == 8
1091
+ blk.t_V.push(TinyNN.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8))
1092
+ else
1093
+ blk.t_V.push(TinyNN.tnn_input_2d_f32_persistent(@sess, max_T, d_head))
1094
+ end
1095
+ if qkv_bias
1096
+ blk.t_b_k.push(TinyNN.tnn_input_1d_f32_persistent(@sess, d_head))
1097
+ blk.t_b_v.push(TinyNN.tnn_input_1d_f32_persistent(@sess, d_head))
1098
+ end
1099
+ hkv = hkv + 1
1100
+ end
1101
+
1102
+ blk.t_w_o = alloc_2d_w(d_model, @n_heads * @d_head)
1103
+ blk.t_w_gate = alloc_2d_w(d_ff, d_model)
1104
+ blk.t_w_up = alloc_2d_w(d_ff, d_model)
1105
+ blk.t_w_down = alloc_2d_w(d_model, d_ff)
1106
+ li = li + 1
1107
+ end
1108
+
1109
+ TinyNN.tnn_finalize_weights(@sess)
1110
+ @realized = true
1111
+ end
1112
+
1113
+ # Build the compute graph for one decode position.
1114
+ def build_decode_step(pos)
1115
+ eps = @rms_eps
1116
+ scale = 1.0 / Math.sqrt(@d_head.to_f)
1117
+ d_model = @d_model
1118
+ d_head = @d_head
1119
+ max_T = @max_T
1120
+ bytes_d_head = d_head * 4
1121
+ bytes_max_T = max_T * 4
1122
+ # P5.1+P5.2: row size for K and V. F32 → d_head*4; Q8_0 →
1123
+ # ggml_row_size(Q8_0, d_head) (block 32 × 34 bytes; 68 at d_head=64).
1124
+ # V is in the same layout as K post-P5.2 so the math is symmetric.
1125
+ bytes_d_head_k = @kv_type_k == 8 ? TinyNN.tnn_row_size(8, d_head) : bytes_d_head
1126
+ bytes_d_head_v = @kv_type_v == 8 ? TinyNN.tnn_row_size(8, d_head) : bytes_d_head
1127
+
1128
+ # Inputs: token id + RoPE position. Both length 1.
1129
+ t_token_id = TinyNN.tnn_input_1d_i32(@sess, 1)
1130
+ t_pos = TinyNN.tnn_input_1d_i32_ctx(@sess, 1)
1131
+
1132
+ t_x = TinyNN.tnn_get_rows(@sess, @t_token_embed, t_token_id) # ne=[d_model, 1]
1133
+ # I-Gemma (#113): Gemma 2 scales token embeddings by sqrt(d_model)
1134
+ # post-lookup. Non-Gemma archs use @embed_scale = 1.0 (no-op
1135
+ # branch). The scalar is computed at flag-detection time so we
1136
+ # don't pay a Math.sqrt landmine in the hot path.
1137
+ if @embed_scale != 1.0
1138
+ t_x = TinyNN.tnn_scale(@sess, t_x, @embed_scale)
1139
+ end
1140
+ t_x = trace_tap("embed", t_x)
1141
+
1142
+ li = 0
1143
+ while li < @n_layers
1144
+ t_x = build_block_step(t_x, @kv_blocks_ffi[li], t_pos, pos,
1145
+ scale, eps, bytes_d_head, bytes_d_head_k,
1146
+ bytes_d_head_v, bytes_max_T, li)
1147
+ li = li + 1
1148
+ end
1149
+
1150
+ t_x_final = TinyNN.tnn_rms_norm(@sess, t_x, @t_final_norm_gamma, eps)
1151
+ t_x_final = trace_tap("final_norm", t_x_final)
1152
+ # Logits: untied path matmuls against t_output (lm_head); tied
1153
+ # path against t_token_embed. Both tensors are [vocab, d_model],
1154
+ # so the matmul shape is identical either way.
1155
+ if @has_untied_output
1156
+ t_kv_logits = TinyNN.tnn_matmul(@sess, @t_output, t_x_final)
1157
+ else
1158
+ t_kv_logits = TinyNN.tnn_matmul(@sess, @t_token_embed, t_x_final)
1159
+ end
1160
+ # I-Gemma (#113): final logit soft-cap. Gemma 2 applies
1161
+ # tanh(logits / final_softcap) * final_softcap to the output
1162
+ # logits before argmax / sampling. No-op for other models.
1163
+ if @final_softcap > 0.0
1164
+ t_kv_logits = TinyNN.tnn_scale(@sess, t_kv_logits, 1.0 / @final_softcap)
1165
+ t_kv_logits = TinyNN.tnn_tanh(@sess, t_kv_logits)
1166
+ t_kv_logits = TinyNN.tnn_scale(@sess, t_kv_logits, @final_softcap)
1167
+ end
1168
+ TinyNN.tnn_set_output(t_kv_logits)
1169
+ SmolLM2KVStepResult.new(t_token_id, t_pos, t_kv_logits)
1170
+ end
1171
+
1172
+ def build_block_step(t_x, blk, t_pos, pos, scale, eps,
1173
+ bytes_d_head, bytes_d_head_k, bytes_d_head_v,
1174
+ bytes_max_T, layer_idx)
1175
+ # Layer-tag prefix for tap names (e.g. "L00."). String concat of an
1176
+ # int needs explicit .to_s; ljust pads so all names align in output.
1177
+ tag = "L" + layer_idx.to_s + "."
1178
+
1179
+ t_h = TinyNN.tnn_rms_norm(@sess, t_x, blk.t_rn1_gamma, eps)
1180
+ t_h = trace_tap(tag + "rn1", t_h)
1181
+
1182
+ # --- compute K, V for each KV head (n_kv times), rope K, cpy into buffers ---
1183
+ hkv = 0
1184
+ while hkv < @n_kv
1185
+ t_k_raw = TinyNN.tnn_matmul(@sess, blk.t_w_k[hkv], t_h) # ne=[d_head, 1]
1186
+ if @has_qkv_bias
1187
+ t_k_pre = TinyNN.tnn_add(@sess, t_k_raw, blk.t_b_k[hkv])
1188
+ else
1189
+ t_k_pre = t_k_raw
1190
+ end
1191
+ # Tap K (head 0 only) post-bias, pre-RoPE.
1192
+ if hkv == 0
1193
+ t_k_pre = trace_tap(tag + "k_pre", t_k_pre)
1194
+ end
1195
+ # M1 + #110: QK-norm. Two flavors:
1196
+ # kind=1 (Qwen3): blk.t_k_norm_gamma is [d_head], shared
1197
+ # across all KV heads; pass directly.
1198
+ # kind=2 (OLMoE / Granite, per-head approximation):
1199
+ # blk.t_k_norm_gamma is [n_kv * d_head] = [d_model_kv];
1200
+ # view the per-head [d_head] slice at byte offset
1201
+ # hkv*d_head*4. This computes per-head variance (not the
1202
+ # true full-Q-vector variance) but applies the correct
1203
+ # per-element gamma scaling. Cheap and close-enough for
1204
+ # models where per-head magnitudes are similar (which they
1205
+ # typically are for projections of a single input).
1206
+ if @has_qk_norm
1207
+ if @qk_norm_kind == 2
1208
+ k_gamma_view = TinyNN.tnn_view_1d(@sess, blk.t_k_norm_gamma,
1209
+ @d_head, hkv * @d_head * 4)
1210
+ t_k_pre = TinyNN.tnn_rms_norm(@sess, t_k_pre, k_gamma_view, @rms_eps)
1211
+ else
1212
+ t_k_pre = TinyNN.tnn_rms_norm(@sess, t_k_pre, blk.t_k_norm_gamma, @rms_eps)
1213
+ end
1214
+ end
1215
+ t_k_rot = TinyNN.tnn_rope_ext(@sess, t_k_pre, t_pos, @d_head,
1216
+ @rope_base, @rope_scaling.freq_scale,
1217
+ @rope_scaling.ext_factor,
1218
+ @rope_scaling.attn_factor,
1219
+ @rope_scaling.beta_fast,
1220
+ @rope_scaling.beta_slow,
1221
+ @t_rope_freq_factors)
1222
+ if hkv == 0
1223
+ t_k_rot = trace_tap(tag + "k_rot", t_k_rot)
1224
+ end
1225
+ # V matmul: weight in A position so ggml's matmul kernel can
1226
+ # dispatch to Q8 (and other quantized) kernels. Result is
1227
+ # [d_head, 1] instead of the legacy [1, d_head]; a contiguous
1228
+ # view_2d before the cpy reinterprets it as a [1, d_head] row
1229
+ # without moving bytes.
1230
+ t_v_raw = TinyNN.tnn_matmul(@sess, blk.t_w_v[hkv], t_h) # ne=[d_head, 1]
1231
+ if @has_qkv_bias
1232
+ t_v_new = TinyNN.tnn_add(@sess, t_v_raw, blk.t_b_v[hkv]) # bias is 1-D [d_head]
1233
+ else
1234
+ t_v_new = t_v_raw
1235
+ end
1236
+ if hkv == 0
1237
+ t_v_new = trace_tap(tag + "v_new", t_v_new)
1238
+ end
1239
+
1240
+ # P5.1+P5.2: K and V both use the same per-position write pattern.
1241
+ # bytes_d_head_{k,v} reflect each cache's dtype (F32 → d_head*4,
1242
+ # Q8_0 → type-aware row size from tnn_row_size). cpy quantizes
1243
+ # f32 source → Q8 destination automatically when types differ.
1244
+ t_K_slot = TinyNN.tnn_view_2d(@sess, blk.t_K[hkv],
1245
+ @d_head, 1, bytes_d_head_k, pos * bytes_d_head_k)
1246
+ t_cpy_k = TinyNN.tnn_cpy(@sess, t_k_rot, t_K_slot)
1247
+ t_V_slot = TinyNN.tnn_view_2d(@sess, blk.t_V[hkv],
1248
+ @d_head, 1, bytes_d_head_v, pos * bytes_d_head_v)
1249
+ t_cpy_v = TinyNN.tnn_cpy(@sess, t_v_new, t_V_slot)
1250
+ TinyNN.tnn_add_to_graph(@sess, t_cpy_k)
1251
+ TinyNN.tnn_add_to_graph(@sess, t_cpy_v)
1252
+ hkv = hkv + 1
1253
+ end
1254
+
1255
+ # --- per-Q-head attention ---
1256
+ t_head_out0 = build_attention_qhead_step(t_h, blk, 0, t_pos, pos,
1257
+ scale, bytes_d_head, bytes_d_head_k,
1258
+ bytes_d_head_v, bytes_max_T, tag, true,
1259
+ layer_idx)
1260
+ t_head_outs = [t_head_out0]
1261
+ hq = 1
1262
+ while hq < @n_heads
1263
+ t_head_outs.push(build_attention_qhead_step(t_h, blk, hq, t_pos, pos,
1264
+ scale, bytes_d_head, bytes_d_head_k,
1265
+ bytes_d_head_v, bytes_max_T, tag, false,
1266
+ layer_idx))
1267
+ hq = hq + 1
1268
+ end
1269
+
1270
+ t_concat = t_head_outs[0]
1271
+ hq = 1
1272
+ while hq < @n_heads
1273
+ t_concat = TinyNN.tnn_concat(@sess, t_concat, t_head_outs[hq], 0)
1274
+ hq = hq + 1
1275
+ end
1276
+ t_concat = trace_tap(tag + "concat", t_concat)
1277
+
1278
+ t_out_proj = TinyNN.tnn_matmul(@sess, blk.t_w_o, t_concat)
1279
+ t_out_proj = trace_tap(tag + "attn_out", t_out_proj)
1280
+ # I-Gemma (#113): post-attention RMSNorm applied to the attention
1281
+ # output BEFORE the residual add. Gemma 2's sandwich structure:
1282
+ # pre_norm(x) → attention → post_norm → residual + …
1283
+ # No-op when has_post_norms is false (every non-Gemma arch).
1284
+ if @has_post_norms
1285
+ t_out_proj = TinyNN.tnn_rms_norm(@sess, t_out_proj, blk.t_post_attn_norm_gamma, eps)
1286
+ t_out_proj = trace_tap(tag + "post_attn_norm", t_out_proj)
1287
+ end
1288
+ t_x_attn = TinyNN.tnn_add(@sess, t_x, t_out_proj)
1289
+ t_x_attn = trace_tap(tag + "post_attn", t_x_attn)
1290
+
1291
+ # --- FFN ---
1292
+ t_h2 = TinyNN.tnn_rms_norm(@sess, t_x_attn, blk.t_rn2_gamma, eps)
1293
+ t_h2 = trace_tap(tag + "rn2", t_h2)
1294
+
1295
+ if @is_moe
1296
+ t_dn = build_moe_ffn(blk, t_h2, tag)
1297
+ else
1298
+ # --- SwiGLU FFN (dense) ---
1299
+ t_gate = TinyNN.tnn_matmul(@sess, blk.t_w_gate, t_h2) # ne=[d_ff, 1]
1300
+ t_gate = trace_tap(tag + "gate", t_gate)
1301
+ t_up = TinyNN.tnn_matmul(@sess, blk.t_w_up, t_h2) # ne=[d_ff, 1]
1302
+ t_up = trace_tap(tag + "up", t_up)
1303
+ t_silug = TinyNN.tnn_silu(@sess, t_gate)
1304
+ t_silug = trace_tap(tag + "silu_gate", t_silug)
1305
+ t_gated = TinyNN.tnn_mul(@sess, t_silug, t_up)
1306
+ t_gated = trace_tap(tag + "gated", t_gated)
1307
+ t_dn = TinyNN.tnn_matmul(@sess, blk.t_w_down, t_gated) # ne=[d_model, 1]
1308
+ t_dn = trace_tap(tag + "dn", t_dn)
1309
+ end
1310
+
1311
+ # I-Gemma (#113): post-FFN RMSNorm on the FFN output before the
1312
+ # residual add. Same pattern as the post-attn norm above.
1313
+ if @has_post_norms
1314
+ t_dn = TinyNN.tnn_rms_norm(@sess, t_dn, blk.t_post_ffn_norm_gamma, eps)
1315
+ t_dn = trace_tap(tag + "post_ffn_norm", t_dn)
1316
+ end
1317
+ t_post_ffn = TinyNN.tnn_add(@sess, t_x_attn, t_dn)
1318
+ trace_tap(tag + "post_ffn", t_post_ffn)
1319
+ end
1320
+
1321
+ # M2.3: Mixtral / Qwen-MoE routed FFN. Ports the validated graph from
1322
+ # tinynn/ab_smoke_moe_ffn into the production decode path. Shapes:
1323
+ # t_h2 [d_model, 1] input (post-norm)
1324
+ # router_logits [n_experts, 1] matmul(w_router, h2)
1325
+ # probs [n_experts, 1] softmax(logits)
1326
+ # top_idx [n_experts_used, 1] top_k(probs)
1327
+ # weights [1, n_experts_used, 1] get_rows(reshape_3d(probs,1,n_exp,1), top_idx)
1328
+ # e_gate / e_up [d_ff, n_experts_used, 1] mul_mat_id(...exps, h2, top_idx)
1329
+ # e_down [d_model, n_experts_used, 1] after weight × sum
1330
+ #
1331
+ # The (mul/transpose/sum_rows/reshape) sum-across-K is the same trick
1332
+ # the smoke uses; ggml has no axis-1 reduce primitive.
1333
+ def build_moe_ffn(blk, t_h2, tag)
1334
+ t_logits = TinyNN.tnn_matmul(@sess, blk.t_w_router, t_h2) # ne=[n_exp, 1]
1335
+ t_logits = trace_tap(tag + "moe_logits", t_logits)
1336
+ t_probs = TinyNN.tnn_softmax(@sess, t_logits) # ne=[n_exp, 1]
1337
+ t_top_idx = TinyNN.tnn_top_k(@sess, t_probs, @n_experts_used) # ne=[K, 1]
1338
+ t_probs_3d = TinyNN.tnn_reshape_3d(@sess, t_probs, 1, @n_experts, 1)
1339
+ t_w_route = TinyNN.tnn_get_rows(@sess, t_probs_3d, t_top_idx) # ne=[1, K, 1]
1340
+
1341
+ t_e_gate = TinyNN.tnn_mul_mat_id(@sess, blk.t_w_gate_exps, t_h2, t_top_idx)
1342
+ t_e_up = TinyNN.tnn_mul_mat_id(@sess, blk.t_w_up_exps, t_h2, t_top_idx)
1343
+ t_e_silu = TinyNN.tnn_silu(@sess, t_e_gate)
1344
+ t_e_gated = TinyNN.tnn_mul(@sess, t_e_silu, t_e_up) # ne=[d_ff, K, 1]
1345
+ t_e_down = TinyNN.tnn_mul_mat_id(@sess, blk.t_w_down_exps, t_e_gated, t_top_idx)
1346
+ t_e_down = trace_tap(tag + "moe_e_down", t_e_down) # ne=[d_model, K, 1]
1347
+
1348
+ # Broadcast weights over d_model: [d_model, K, 1] × [1, K, 1] → [d_model, K, 1].
1349
+ t_weighted = TinyNN.tnn_mul(@sess, t_e_down, t_w_route)
1350
+
1351
+ # Sum across K (axis 1). Reshape to 2D (T=1 collapses), transpose
1352
+ # [d_model, K] → [K, d_model], sum_rows along ne0=K → [1, d_model],
1353
+ # reshape back to [d_model, 1].
1354
+ t_weighted_2d = TinyNN.tnn_reshape_2d(@sess, t_weighted, @d_model, @n_experts_used)
1355
+ t_weighted_T = TinyNN.tnn_transpose(@sess, t_weighted_2d)
1356
+ t_summed_T = TinyNN.tnn_sum_rows(@sess, t_weighted_T) # ne=[1, d_model]
1357
+ t_dn = TinyNN.tnn_reshape_2d(@sess, t_summed_T, @d_model, 1)
1358
+ trace_tap(tag + "moe_out", t_dn)
1359
+ end
1360
+
1361
+ # One query head. Uses the (already-written) K and V of the
1362
+ # corresponding KV head — index = hq / group_size. `tag` is the
1363
+ # "L<i>." layer prefix; `tap_this_head` is true only for head 0 so we
1364
+ # don't multiply taps by n_heads in trace mode.
1365
+ def build_attention_qhead_step(t_h, blk, hq, t_pos, pos, scale,
1366
+ bytes_d_head, bytes_d_head_k, bytes_d_head_v,
1367
+ bytes_max_T, tag, tap_this_head,
1368
+ layer_idx)
1369
+ hkv = hq / @group_size
1370
+
1371
+ # I-Gemma (#113): per-layer SWA toggle. Gemma 2 alternates layers
1372
+ # between full attention and sliding-window. When @swa_alternates
1373
+ # is true, only EVEN layers see the SWA window; odd layers get
1374
+ # effectively full attention (window = 0 ⇒ hist_count = pos+1).
1375
+ # Non-Gemma archs: @swa_alternates is false; all layers apply
1376
+ # @swa_window uniformly (or 0 for no-SWA models).
1377
+ swa_for_this_layer = @swa_window
1378
+ if @swa_alternates && layer_idx.odd?
1379
+ swa_for_this_layer = 0
1380
+ end
1381
+
1382
+ t_q_raw = TinyNN.tnn_matmul(@sess, blk.t_w_q[hq], t_h) # ne=[d_head, 1]
1383
+ # F1.2: optional LoRA on Q. Standard placement is BEFORE the bias
1384
+ # add (HF LoRA practice — the bias stays a property of the base
1385
+ # projection, LoRA only adjusts the linear part). Math:
1386
+ # q_lora = w_lora_b[hq] @ (w_lora_a[hq] @ t_h)
1387
+ # q_raw := q_raw + q_lora
1388
+ # With B init to zero, q_lora == 0 and q_raw is unchanged.
1389
+ if @lora_q_enabled
1390
+ t_lora_a_h = TinyNN.tnn_matmul(@sess, blk.t_w_lora_a_q[hq], t_h) # ne=[r, 1]
1391
+ t_lora_b_a_h = TinyNN.tnn_matmul(@sess, blk.t_w_lora_b_q[hq], t_lora_a_h)# ne=[d_head, 1]
1392
+ t_q_raw = TinyNN.tnn_add(@sess, t_q_raw, t_lora_b_a_h)
1393
+ end
1394
+ if @has_qkv_bias
1395
+ t_q_pre = TinyNN.tnn_add(@sess, t_q_raw, blk.t_b_q[hq])
1396
+ else
1397
+ t_q_pre = t_q_raw
1398
+ end
1399
+ if tap_this_head
1400
+ t_q_pre = trace_tap(tag + "q_pre", t_q_pre)
1401
+ end
1402
+ if @has_qk_norm
1403
+ if @qk_norm_kind == 2
1404
+ # OLMoE / Granite per-head gamma slice (see build_block_step's
1405
+ # K-norm comment). The gamma tensor is [d_model]; head hq's
1406
+ # slice lives at byte offset hq*d_head*4.
1407
+ q_gamma_view = TinyNN.tnn_view_1d(@sess, blk.t_q_norm_gamma,
1408
+ @d_head, hq * @d_head * 4)
1409
+ t_q_pre = TinyNN.tnn_rms_norm(@sess, t_q_pre, q_gamma_view, @rms_eps)
1410
+ else
1411
+ t_q_pre = TinyNN.tnn_rms_norm(@sess, t_q_pre, blk.t_q_norm_gamma, @rms_eps)
1412
+ end
1413
+ end
1414
+ t_q = TinyNN.tnn_rope_ext(@sess, t_q_pre, t_pos, @d_head,
1415
+ @rope_base, @rope_scaling.freq_scale,
1416
+ @rope_scaling.ext_factor,
1417
+ @rope_scaling.attn_factor,
1418
+ @rope_scaling.beta_fast,
1419
+ @rope_scaling.beta_slow,
1420
+ @t_rope_freq_factors)
1421
+ if tap_this_head
1422
+ t_q = trace_tap(tag + "q_rot", t_q)
1423
+ end
1424
+
1425
+ # M3 + I-Gemma: sliding-window attention. When swa_for_this_layer
1426
+ # > 0, restrict the K/V view to the last `min(pos+1, swa_window)`
1427
+ # positions. swa_for_this_layer differs from @swa_window only
1428
+ # when @swa_alternates is set (Gemma 2's even/odd layer pattern).
1429
+ if swa_for_this_layer > 0 && (pos + 1) > swa_for_this_layer
1430
+ hist_start = pos + 1 - swa_for_this_layer
1431
+ hist_count = swa_for_this_layer
1432
+ else
1433
+ hist_start = 0
1434
+ hist_count = pos + 1
1435
+ end
1436
+ # P5.1+P5.2: K and V views share the same byte-stride math.
1437
+ # ggml_mul_mat dequantizes Q8 source on the fly when reads happen.
1438
+ t_K_hist = TinyNN.tnn_view_2d(@sess, blk.t_K[hkv],
1439
+ @d_head, hist_count, bytes_d_head_k,
1440
+ hist_start * bytes_d_head_k)
1441
+ # P5.2: V is now ne=[d_head, max_T] (positions on ne1, mirror of K).
1442
+ # The history view at [d_head, hist_count] is what flash_attn_ext
1443
+ # expects natively — no transpose-cont in the flash path now.
1444
+ t_V_hist = TinyNN.tnn_view_2d(@sess, blk.t_V[hkv],
1445
+ @d_head, hist_count, bytes_d_head_v,
1446
+ hist_start * bytes_d_head_v)
1447
+
1448
+ if @use_flash_attn
1449
+ # P4.1+P5.2: fused softmax(Q·Kᵀ·scale + mask)·V via
1450
+ # ggml_flash_attn_ext. Reshape Q/K/V to the 3D shapes
1451
+ # flash_attn_ext expects (ne[3] defaults to 1 so we don't need
1452
+ # a fourth dim). V's layout is already correct post-P5.2 — no
1453
+ # transpose needed.
1454
+ t_q_3d = TinyNN.tnn_reshape_3d(@sess, t_q, @d_head, 1, 1)
1455
+ t_K_3d = TinyNN.tnn_reshape_3d(@sess, t_K_hist, @d_head, hist_count, 1)
1456
+ t_V_3d = TinyNN.tnn_reshape_3d(@sess, t_V_hist, @d_head, hist_count, 1)
1457
+ # I-Gemma (#113): pass logit soft-cap to flash_attn_ext. The
1458
+ # kernel applies tanh(x/softcap)*softcap to attention logits
1459
+ # internally. 0.0 disables (every non-Gemma model).
1460
+ t_out_4d = TinyNN.tnn_flash_attn_ext(@sess, t_q_3d, t_K_3d, t_V_3d, nil,
1461
+ scale, 0.0, @attn_softcap)
1462
+ # Output ne=[d_head, n_head=1, T_q=1, batch=1]; collapse to 2D.
1463
+ t_head = TinyNN.tnn_reshape_2d(@sess, t_out_4d, @d_head, 1)
1464
+ if tap_this_head
1465
+ t_head = trace_tap(tag + "head0_flash", t_head)
1466
+ end
1467
+ return t_head
1468
+ end
1469
+
1470
+ t_scores = TinyNN.tnn_matmul(@sess, t_K_hist, t_q)
1471
+ if tap_this_head
1472
+ t_scores = trace_tap(tag + "scores", t_scores)
1473
+ end
1474
+ t_scaled = TinyNN.tnn_scale(@sess, t_scores, scale)
1475
+ # I-Gemma (#113): logit soft-cap in the non-flash path.
1476
+ # y = softcap * tanh(x / softcap)
1477
+ # Composed via two scales + tanh. No-op when @attn_softcap == 0.
1478
+ if @attn_softcap > 0.0
1479
+ t_scaled = TinyNN.tnn_scale(@sess, t_scaled, 1.0 / @attn_softcap)
1480
+ t_scaled = TinyNN.tnn_tanh(@sess, t_scaled)
1481
+ t_scaled = TinyNN.tnn_scale(@sess, t_scaled, @attn_softcap)
1482
+ end
1483
+ t_attn = TinyNN.tnn_softmax(@sess, t_scaled)
1484
+ if tap_this_head
1485
+ t_attn = trace_tap(tag + "softmax", t_attn)
1486
+ end
1487
+ # P5.2: V is now [d_head, hist_count]; ggml_mul_mat needs the
1488
+ # matching k axis (hist_count) on both inputs, so transpose V_hist
1489
+ # (free view; tnn_transpose materializes via ggml_cont — one copy
1490
+ # of d_head × hist_count × 4 bytes per Q-head per layer). Cheap
1491
+ # at decode (typical hist_count ~ a few hundred) and uniform with
1492
+ # how flash takes V — both paths see the same V layout now.
1493
+ t_V_T = TinyNN.tnn_transpose(@sess, t_V_hist)
1494
+ t_head = TinyNN.tnn_matmul(@sess, t_V_T, t_attn)
1495
+ if tap_this_head
1496
+ t_head = trace_tap(tag + "head0", t_head)
1497
+ end
1498
+ t_head
1499
+ end
1500
+ end
1501
+
1502
+ # Init-param names deliberately differ from the ivar names — same
1503
+ # defensive pattern as GPT2KVStepResult.
1504
+ class SmolLM2KVStepResult
1505
+ attr_accessor :t_token_id, :t_pos, :kv_step_logits
1506
+ def initialize(tok_ptr, pos_ptr, logits_ptr)
1507
+ @t_token_id = tok_ptr
1508
+ @t_pos = pos_ptr
1509
+ @kv_step_logits = logits_ptr
1510
+ end
1511
+ end
1512
+
1513
+ module SmolLM2KV
1514
+ # Upload all Toy::SmolLM2 weights into a realized cache (+ zero-init
1515
+ # the K/V buffers).
1516
+ def self.upload_from(kv_cache, model)
1517
+ sess = kv_cache.sess
1518
+ n = kv_cache.n_layers
1519
+ n_heads = kv_cache.n_heads
1520
+ n_kv = kv_cache.n_kv
1521
+ d_model = kv_cache.d_model
1522
+ d_head = kv_cache.d_head
1523
+ max_T = kv_cache.max_T
1524
+
1525
+ TinyNN.upload_row_major(sess, kv_cache.t_token_embed, model.token_embed.weight)
1526
+ TinyNN.tnn_upload_from_float_array(sess, kv_cache.t_final_norm_gamma,
1527
+ model.final_norm.gamma, d_model)
1528
+ if kv_cache.has_untied_output
1529
+ TinyNN.upload_row_major(sess, kv_cache.t_output, model.output_proj)
1530
+ end
1531
+
1532
+ # P5.2: K and V share the same layout ne=[d_head, max_T] now,
1533
+ # so they share the same zero-init Mat.
1534
+ kv_zero = Mat.new(max_T, d_head)
1535
+
1536
+ li = 0
1537
+ while li < n
1538
+ blk_n = model.stack[li]
1539
+ blk_f = kv_cache.kv_blocks_ffi[li]
1540
+
1541
+ TinyNN.tnn_upload_from_float_array(sess, blk_f.t_rn1_gamma, blk_n.rn1.gamma, d_model)
1542
+ TinyNN.tnn_upload_from_float_array(sess, blk_f.t_rn2_gamma, blk_n.rn2.gamma, d_model)
1543
+
1544
+ hq = 0
1545
+ while hq < n_heads
1546
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_q[hq], blk_n.attn.w_q[hq])
1547
+ if kv_cache.has_qkv_bias
1548
+ TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_q[hq], blk_n.attn.b_q[hq], d_head)
1549
+ end
1550
+ hq = hq + 1
1551
+ end
1552
+
1553
+ hkv = 0
1554
+ while hkv < n_kv
1555
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_k[hkv], blk_n.attn.w_k[hkv])
1556
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_v[hkv], blk_n.attn.w_v[hkv])
1557
+ if kv_cache.has_qkv_bias
1558
+ TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_k[hkv], blk_n.attn.b_k[hkv], d_head)
1559
+ TinyNN.tnn_upload_from_float_array(sess, blk_f.t_b_v[hkv], blk_n.attn.b_v[hkv], d_head)
1560
+ end
1561
+ # P5.1+P5.2: same Q8 skip rule as realize_for_mmap.
1562
+ if kv_cache.kv_type_k != 8
1563
+ TinyNN.upload_row_major(sess, blk_f.t_K[hkv], kv_zero)
1564
+ end
1565
+ if kv_cache.kv_type_v != 8
1566
+ TinyNN.upload_row_major(sess, blk_f.t_V[hkv], kv_zero)
1567
+ end
1568
+ hkv = hkv + 1
1569
+ end
1570
+
1571
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_o, blk_n.attn.w_o)
1572
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_gate, blk_n.ffn.w_gate)
1573
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_up, blk_n.ffn.w_up)
1574
+ TinyNN.stage_transposed_and_upload(sess, blk_f.t_w_down, blk_n.ffn.w_down)
1575
+
1576
+ li = li + 1
1577
+ end
1578
+ end
1579
+
1580
+ # Decode one new token at position `pos`. Returns the (1, vocab)
1581
+ # logits Mat for the new position. If `kv_cache.trace_on` is set the
1582
+ # rebuild path inserts taps and we dump stats before reading logits.
1583
+ def self.decode_step(kv_cache, token_id, pos)
1584
+ TinyNN.tnn_reset_for_rebuild(kv_cache.sess)
1585
+ step = kv_cache.build_decode_step(pos)
1586
+ TinyNN.tnn_realize(kv_cache.sess, step.kv_step_logits)
1587
+ TinyNN.upload_int_array(kv_cache.sess, step.t_token_id, [token_id])
1588
+ TinyNN.upload_int_array(kv_cache.sess, step.t_pos, [pos])
1589
+ TinyNN.tnn_compute(kv_cache.sess)
1590
+ kv_cache.dump_trace
1591
+ TinyNN.download_row_major(kv_cache.sess, step.kv_step_logits, 1, kv_cache.vocab_size)
1592
+ end
1593
+ end