toy 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2107) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +1124 -0
  3. data/LICENSE +21 -0
  4. data/Makefile +2022 -0
  5. data/README.md +154 -0
  6. data/bin/toy +10 -0
  7. data/lib/toy/compute.rb +135 -0
  8. data/lib/toy/compute_cuda.rb +104 -0
  9. data/lib/toy/compute_metal.rb +97 -0
  10. data/lib/toy/core/cli/describe.rb +188 -0
  11. data/lib/toy/core/cli/eval.rb +385 -0
  12. data/lib/toy/core/cli/exit_codes.rb +15 -0
  13. data/lib/toy/core/cli/fetch.rb +238 -0
  14. data/lib/toy/core/cli/infer.rb +268 -0
  15. data/lib/toy/core/cli/install.rb +228 -0
  16. data/lib/toy/core/cli/list.rb +86 -0
  17. data/lib/toy/core/cli/manifest.rb +49 -0
  18. data/lib/toy/core/cli/new.rb +594 -0
  19. data/lib/toy/core/cli/serve.rb +237 -0
  20. data/lib/toy/core/cli/train.rb +471 -0
  21. data/lib/toy/core/cli.rb +165 -0
  22. data/lib/toy/core/config.rb +64 -0
  23. data/lib/toy/core/gguf_meta.rb +161 -0
  24. data/lib/toy/core/model_scan.rb +221 -0
  25. data/lib/toy/core/run_log.rb +94 -0
  26. data/lib/toy/core/toy_root.rb +95 -0
  27. data/lib/toy/dev/toy_card.rb +299 -0
  28. data/lib/toy/dev/toy_describe_flow.rb +412 -0
  29. data/lib/toy/dev/toy_logprobs.rb +86 -0
  30. data/lib/toy/dev/toy_tap.rb +183 -0
  31. data/lib/toy/dev/toy_token_drift.rb +121 -0
  32. data/lib/toy/ffi/tinynn.rb +1491 -0
  33. data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
  34. data/lib/toy/ffi/tinynn_metal.rb +359 -0
  35. data/lib/toy/ffi_manifest.rb +84 -0
  36. data/lib/toy/io/bpe.rb +325 -0
  37. data/lib/toy/io/gguf_kv.rb +35 -0
  38. data/lib/toy/io/gguf_load.rb +331 -0
  39. data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
  40. data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
  41. data/lib/toy/io/model_index.rb +206 -0
  42. data/lib/toy/io/run_bundle.rb +280 -0
  43. data/lib/toy/io/tokenizer.rb +613 -0
  44. data/lib/toy/io/toy_corpus_loader.rb +52 -0
  45. data/lib/toy/io/toy_events.rb +56 -0
  46. data/lib/toy/io/toy_image_loader.rb +48 -0
  47. data/lib/toy/llm/adamw.rb +169 -0
  48. data/lib/toy/llm/archs/llama_arch.rb +233 -0
  49. data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
  50. data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
  51. data/lib/toy/llm/blocks/transformer_block.rb +876 -0
  52. data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
  53. data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
  54. data/lib/toy/llm/classify_batch.rb +88 -0
  55. data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
  56. data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
  57. data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
  58. data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
  59. data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
  60. data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
  61. data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
  62. data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
  63. data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
  64. data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
  65. data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
  66. data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
  67. data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
  68. data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
  69. data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
  70. data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
  71. data/lib/toy/llm/labels.rb +142 -0
  72. data/lib/toy/llm/primitives/gqa.rb +62 -0
  73. data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
  74. data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
  75. data/lib/toy/llm/primitives/rms_norm.rb +39 -0
  76. data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
  77. data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
  78. data/lib/toy/llm/primitives/rope.rb +68 -0
  79. data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
  80. data/lib/toy/llm/primitives/rope_metal.rb +72 -0
  81. data/lib/toy/llm/primitives/swiglu.rb +41 -0
  82. data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
  83. data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
  84. data/lib/toy/llm/recipe_options.rb +71 -0
  85. data/lib/toy/llm/recipes/from_scratch.rb +105 -0
  86. data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
  87. data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
  88. data/lib/toy/llm/recipes/lora.rb +110 -0
  89. data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
  90. data/lib/toy/llm/recipes/lora_metal.rb +114 -0
  91. data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
  92. data/lib/toy/llm/recipes/warm_start.rb +235 -0
  93. data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
  94. data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
  95. data/lib/toy/llm/training_batch.rb +133 -0
  96. data/lib/toy/models/arch.rb +253 -0
  97. data/lib/toy/models/gpt2.rb +311 -0
  98. data/lib/toy/models/toy_gpt2.rb +177 -0
  99. data/lib/toy/models/toy_smollm2.rb +393 -0
  100. data/lib/toy/models/toy_vit.rb +83 -0
  101. data/lib/toy/models/transformer.rb +1494 -0
  102. data/lib/toy/models/transformer_lm.rb +298 -0
  103. data/lib/toy/models/transformer_lm_cuda.rb +159 -0
  104. data/lib/toy/models/transformer_lm_metal.rb +142 -0
  105. data/lib/toy/mri.rb +300 -0
  106. data/lib/toy/run/eval.rb +76 -0
  107. data/lib/toy/run/eval_cuda.rb +66 -0
  108. data/lib/toy/run/eval_lmc.rb +334 -0
  109. data/lib/toy/run/eval_metal.rb +67 -0
  110. data/lib/toy/run/infer.rb +130 -0
  111. data/lib/toy/run/infer_cuda.rb +118 -0
  112. data/lib/toy/run/infer_metal.rb +119 -0
  113. data/lib/toy/run/infer_trace.rb +37 -0
  114. data/lib/toy/run/serve.rb +144 -0
  115. data/lib/toy/run/train.rb +404 -0
  116. data/lib/toy/run/train_cuda.rb +397 -0
  117. data/lib/toy/run/train_gpt2.rb +103 -0
  118. data/lib/toy/run/train_gpt2_cuda.rb +85 -0
  119. data/lib/toy/run/train_gpt2_metal.rb +85 -0
  120. data/lib/toy/run/train_lora.rb +207 -0
  121. data/lib/toy/run/train_lora_cuda.rb +219 -0
  122. data/lib/toy/run/train_metal.rb +227 -0
  123. data/lib/toy/run/train_vit.rb +251 -0
  124. data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
  125. data/lib/toy/serve/openai/handlers.rb +143 -0
  126. data/lib/toy/serve/openai/server.rb +159 -0
  127. data/lib/toy/train/sampler.rb +314 -0
  128. data/lib/toy/train/toy_chat_template.rb +179 -0
  129. data/lib/toy/train/toy_drift_grad.rb +176 -0
  130. data/lib/toy/train/toy_gguf_fuse.rb +428 -0
  131. data/lib/toy/train/toy_gguf_writer.rb +100 -0
  132. data/lib/toy/train/toy_lr_schedule.rb +39 -0
  133. data/lib/toy/train/toy_sample.rb +125 -0
  134. data/lib/toy/train/toy_trainer.rb +86 -0
  135. data/lib/toy/train/training.rb +160 -0
  136. data/lib/toy/version.rb +11 -0
  137. data/lib/toy.rb +902 -0
  138. data/prep/progress +118 -0
  139. data/prep/quietly +64 -0
  140. data/sig/toy.rbs +397 -0
  141. data/sig/toy_compute.rbs +450 -0
  142. data/spinel-ext.json +122 -0
  143. data/tinynn/Makefile +71 -0
  144. data/tinynn/tinynn_backend_cuda.c +99 -0
  145. data/tinynn/tinynn_backend_metal.m +75 -0
  146. data/tinynn/tinynn_events.c +122 -0
  147. data/tinynn/tinynn_events.h +83 -0
  148. data/tinynn/tinynn_ggml.c +2460 -0
  149. data/tinynn/tinynn_ggml.h +545 -0
  150. data/tinynn/tinynn_gguf.c +783 -0
  151. data/tinynn/tinynn_gguf.h +167 -0
  152. data/tinynn/tinynn_trace.c +180 -0
  153. data/tinynn/tinynn_trace.h +85 -0
  154. data/vendor/ggml/AUTHORS +335 -0
  155. data/vendor/ggml/CMakeLists.txt +505 -0
  156. data/vendor/ggml/CONTRIBUTING.md +3 -0
  157. data/vendor/ggml/LICENSE +21 -0
  158. data/vendor/ggml/README.md +50 -0
  159. data/vendor/ggml/ci/run.sh +395 -0
  160. data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
  161. data/vendor/ggml/cmake/GitVars.cmake +22 -0
  162. data/vendor/ggml/cmake/common.cmake +50 -0
  163. data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
  164. data/vendor/ggml/docs/gguf.md +828 -0
  165. data/vendor/ggml/examples/CMakeLists.txt +34 -0
  166. data/vendor/ggml/examples/common-ggml.cpp +244 -0
  167. data/vendor/ggml/examples/common-ggml.h +18 -0
  168. data/vendor/ggml/examples/common.cpp +675 -0
  169. data/vendor/ggml/examples/common.h +322 -0
  170. data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
  171. data/vendor/ggml/examples/gpt-2/README.md +225 -0
  172. data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
  173. data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
  174. data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
  175. data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
  176. data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
  177. data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
  178. data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
  179. data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
  180. data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
  181. data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
  182. data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
  183. data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
  184. data/vendor/ggml/examples/gpt-j/README.md +239 -0
  185. data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
  186. data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
  187. data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
  188. data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
  189. data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
  190. data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
  191. data/vendor/ggml/examples/magika/README.md +23 -0
  192. data/vendor/ggml/examples/magika/convert.py +32 -0
  193. data/vendor/ggml/examples/magika/main.cpp +374 -0
  194. data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
  195. data/vendor/ggml/examples/mnist/README.md +206 -0
  196. data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
  197. data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
  198. data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
  199. data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
  200. data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
  201. data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
  202. data/vendor/ggml/examples/mnist/server.py +36 -0
  203. data/vendor/ggml/examples/mnist/web/index.html +178 -0
  204. data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
  205. data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
  206. data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
  207. data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
  208. data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
  209. data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
  210. data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
  211. data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
  212. data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
  213. data/vendor/ggml/examples/prompts/replit.txt +100 -0
  214. data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
  215. data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
  216. data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
  217. data/vendor/ggml/examples/prompts/whisper.txt +100 -0
  218. data/vendor/ggml/examples/python/README.md +115 -0
  219. data/vendor/ggml/examples/python/api.h +14 -0
  220. data/vendor/ggml/examples/python/example_add_quant.py +25 -0
  221. data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
  222. data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
  223. data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
  224. data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
  225. data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
  226. data/vendor/ggml/examples/python/ggml/utils.py +182 -0
  227. data/vendor/ggml/examples/python/regenerate.py +42 -0
  228. data/vendor/ggml/examples/python/stubs.py +128 -0
  229. data/vendor/ggml/examples/python/test_tensor.py +258 -0
  230. data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
  231. data/vendor/ggml/examples/sam/README.md +95 -0
  232. data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
  233. data/vendor/ggml/examples/sam/example.jpg +0 -0
  234. data/vendor/ggml/examples/sam/sam.cpp +2370 -0
  235. data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
  236. data/vendor/ggml/examples/simple/README.md +61 -0
  237. data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
  238. data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
  239. data/vendor/ggml/examples/stb_image.h +7987 -0
  240. data/vendor/ggml/examples/stb_image_write.h +1724 -0
  241. data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
  242. data/vendor/ggml/examples/test-cmake/README.md +3 -0
  243. data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
  244. data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
  245. data/vendor/ggml/examples/yolo/README.md +59 -0
  246. data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
  247. data/vendor/ggml/examples/yolo/data/coco.names +80 -0
  248. data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
  249. data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
  250. data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
  251. data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
  252. data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
  253. data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
  254. data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
  255. data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
  256. data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
  257. data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
  258. data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
  259. data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
  260. data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
  261. data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
  262. data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
  263. data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
  264. data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
  265. data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
  266. data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
  267. data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
  268. data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
  269. data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
  270. data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
  271. data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
  272. data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
  273. data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
  274. data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
  275. data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
  276. data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
  277. data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
  278. data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
  279. data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
  280. data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
  281. data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
  282. data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
  283. data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
  284. data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
  285. data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
  286. data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
  287. data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
  288. data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
  289. data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
  290. data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
  291. data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
  292. data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
  293. data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
  294. data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
  295. data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
  296. data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
  297. data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
  298. data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
  299. data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
  300. data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
  301. data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
  302. data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
  303. data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
  304. data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
  305. data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
  306. data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
  307. data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
  308. data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
  309. data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
  310. data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
  311. data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
  312. data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
  313. data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
  314. data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
  315. data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
  316. data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
  317. data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
  318. data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
  319. data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
  320. data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
  321. data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
  322. data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
  323. data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
  324. data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
  325. data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
  326. data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
  327. data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
  328. data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
  329. data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
  330. data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
  331. data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
  332. data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
  333. data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
  334. data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
  335. data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
  336. data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
  337. data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
  338. data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
  339. data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
  340. data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
  341. data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
  342. data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
  343. data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
  344. data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
  345. data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
  346. data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
  347. data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
  348. data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
  349. data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
  350. data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
  351. data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
  352. data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
  353. data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
  354. data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
  355. data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
  356. data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
  357. data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
  358. data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
  359. data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
  360. data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
  361. data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
  362. data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
  363. data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
  364. data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
  365. data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
  366. data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
  367. data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
  368. data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
  369. data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
  370. data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
  371. data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
  372. data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
  373. data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
  374. data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
  375. data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
  376. data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
  377. data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
  378. data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
  379. data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
  380. data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
  381. data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
  382. data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
  383. data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
  384. data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
  385. data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
  386. data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
  387. data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
  388. data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
  389. data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
  390. data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
  391. data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
  392. data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
  393. data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
  394. data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
  395. data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
  396. data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
  397. data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
  398. data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
  399. data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
  400. data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
  401. data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
  402. data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
  403. data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
  404. data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
  405. data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
  406. data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
  407. data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
  408. data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
  409. data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
  410. data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
  411. data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
  412. data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
  413. data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
  414. data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
  415. data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
  416. data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
  417. data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
  418. data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
  419. data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
  420. data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
  421. data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
  422. data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
  423. data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
  424. data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
  425. data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
  426. data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
  427. data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
  428. data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
  429. data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
  430. data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
  431. data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
  432. data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
  433. data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
  434. data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
  435. data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
  436. data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
  437. data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
  438. data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
  439. data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
  440. data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
  441. data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
  442. data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
  443. data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
  444. data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
  445. data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
  446. data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
  447. data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
  448. data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
  449. data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
  450. data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
  451. data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
  452. data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
  453. data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
  454. data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
  455. data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
  456. data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
  457. data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
  458. data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
  459. data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
  460. data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
  461. data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
  462. data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
  463. data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
  464. data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
  465. data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
  466. data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
  467. data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
  468. data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
  469. data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
  470. data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
  471. data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
  472. data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
  473. data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
  474. data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
  475. data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
  476. data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
  477. data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
  478. data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
  479. data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
  480. data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
  481. data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
  482. data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
  483. data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
  484. data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
  485. data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
  486. data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
  487. data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
  488. data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
  489. data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
  490. data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
  491. data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
  492. data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
  493. data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
  494. data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
  495. data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
  496. data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
  497. data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
  498. data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
  499. data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
  500. data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
  501. data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
  502. data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
  503. data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
  504. data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
  505. data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
  506. data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
  507. data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
  508. data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
  509. data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
  510. data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
  511. data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
  512. data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
  513. data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
  514. data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
  515. data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
  516. data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
  517. data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
  518. data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
  519. data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
  520. data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
  521. data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
  522. data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
  523. data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
  524. data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
  525. data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
  526. data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
  527. data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
  528. data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
  529. data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
  530. data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
  531. data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
  532. data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
  533. data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
  534. data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
  535. data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
  536. data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
  537. data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
  538. data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
  539. data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
  540. data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
  541. data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
  542. data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
  543. data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
  544. data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
  545. data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
  546. data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
  547. data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
  548. data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
  549. data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
  550. data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
  551. data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
  552. data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
  553. data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
  554. data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
  555. data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
  556. data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
  557. data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
  558. data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
  559. data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
  560. data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
  561. data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
  562. data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
  563. data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
  564. data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
  565. data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
  566. data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
  567. data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
  568. data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
  569. data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
  570. data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
  571. data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
  572. data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
  573. data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
  574. data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
  575. data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
  576. data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
  577. data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
  578. data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
  579. data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
  580. data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
  581. data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
  582. data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
  583. data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
  584. data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
  585. data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
  586. data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
  587. data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
  588. data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
  589. data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
  590. data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
  591. data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
  592. data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
  593. data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
  594. data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
  595. data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
  596. data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
  597. data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
  598. data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
  599. data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
  600. data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
  601. data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
  602. data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
  603. data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
  604. data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
  605. data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
  606. data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
  607. data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
  608. data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
  609. data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
  610. data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
  611. data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
  612. data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
  613. data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
  614. data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
  615. data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
  616. data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
  617. data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
  618. data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
  619. data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
  620. data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
  621. data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
  622. data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
  623. data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
  624. data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
  625. data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
  626. data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
  627. data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
  628. data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
  629. data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
  630. data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
  631. data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
  632. data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
  633. data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
  634. data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
  635. data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
  636. data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
  637. data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
  638. data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
  639. data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
  640. data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
  641. data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
  642. data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
  643. data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
  644. data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
  645. data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
  646. data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
  647. data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
  648. data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
  649. data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
  650. data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
  651. data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
  652. data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
  653. data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
  654. data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
  655. data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
  656. data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
  657. data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
  658. data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
  659. data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
  660. data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
  661. data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
  662. data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
  663. data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
  664. data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
  665. data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
  666. data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
  667. data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
  668. data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
  669. data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
  670. data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
  671. data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
  672. data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
  673. data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
  674. data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
  675. data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
  676. data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
  677. data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
  678. data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
  679. data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
  680. data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
  681. data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
  682. data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
  683. data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
  684. data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
  685. data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
  686. data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
  687. data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
  688. data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
  689. data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
  690. data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
  691. data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
  692. data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
  693. data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
  694. data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
  695. data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
  696. data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
  697. data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
  698. data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
  699. data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
  700. data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
  701. data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
  702. data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
  703. data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
  704. data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
  705. data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
  706. data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
  707. data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
  708. data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
  709. data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
  710. data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
  711. data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
  712. data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
  713. data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
  714. data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
  715. data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
  716. data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
  717. data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
  718. data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
  719. data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
  720. data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
  721. data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
  722. data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
  723. data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
  724. data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
  725. data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
  726. data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
  727. data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
  728. data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
  729. data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
  730. data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
  731. data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
  732. data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
  733. data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
  734. data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
  735. data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
  736. data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
  737. data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
  738. data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
  739. data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
  740. data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
  741. data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
  742. data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
  743. data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
  744. data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
  745. data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
  746. data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
  747. data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
  748. data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
  749. data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
  750. data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
  751. data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
  752. data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
  753. data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
  754. data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
  755. data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
  756. data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
  757. data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
  758. data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
  759. data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
  760. data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
  761. data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
  762. data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
  763. data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
  764. data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
  765. data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
  766. data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
  767. data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
  768. data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
  769. data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
  770. data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
  771. data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
  772. data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
  773. data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
  774. data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
  775. data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
  776. data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
  777. data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
  778. data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
  779. data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
  780. data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
  781. data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
  782. data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
  783. data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
  784. data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
  785. data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
  786. data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
  787. data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
  788. data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
  789. data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
  790. data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
  791. data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
  792. data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
  793. data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
  794. data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
  795. data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
  796. data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
  797. data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
  798. data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
  799. data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
  800. data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
  801. data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
  802. data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
  803. data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
  804. data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
  805. data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
  806. data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
  807. data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
  808. data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
  809. data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
  810. data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
  811. data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
  812. data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
  813. data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
  814. data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
  815. data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
  816. data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
  817. data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
  818. data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
  819. data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
  820. data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
  821. data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
  822. data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
  823. data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
  824. data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
  825. data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
  826. data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
  827. data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
  828. data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
  829. data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
  830. data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
  831. data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
  832. data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
  833. data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
  834. data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
  835. data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
  836. data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
  837. data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
  838. data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
  839. data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
  840. data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
  841. data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
  842. data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
  843. data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
  844. data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
  845. data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
  846. data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
  847. data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
  848. data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
  849. data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
  850. data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
  851. data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
  852. data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
  853. data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
  854. data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
  855. data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
  856. data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
  857. data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
  858. data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
  859. data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
  860. data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
  861. data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
  862. data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
  863. data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
  864. data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
  865. data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
  866. data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
  867. data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
  868. data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
  869. data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
  870. data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
  871. data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
  872. data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
  873. data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
  874. data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
  875. data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
  876. data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
  877. data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
  878. data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
  879. data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
  880. data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
  881. data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
  882. data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
  883. data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
  884. data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
  885. data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
  886. data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
  887. data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
  888. data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
  889. data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
  890. data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
  891. data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
  892. data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
  893. data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
  894. data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
  895. data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
  896. data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
  897. data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
  898. data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
  899. data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
  900. data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
  901. data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
  902. data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
  903. data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
  904. data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
  905. data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
  906. data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
  907. data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
  908. data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
  909. data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
  910. data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
  911. data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
  912. data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
  913. data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
  914. data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
  915. data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
  916. data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
  917. data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
  918. data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
  919. data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
  920. data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
  921. data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
  922. data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
  923. data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
  924. data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
  925. data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
  926. data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
  927. data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
  928. data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
  929. data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
  930. data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
  931. data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
  932. data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
  933. data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
  934. data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
  935. data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
  936. data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
  937. data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
  938. data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
  939. data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
  940. data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
  941. data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
  942. data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
  943. data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
  944. data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
  945. data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
  946. data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
  947. data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
  948. data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
  949. data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
  950. data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
  951. data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
  952. data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
  953. data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
  954. data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
  955. data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
  956. data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
  957. data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
  958. data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
  959. data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
  960. data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
  961. data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
  962. data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
  963. data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
  964. data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
  965. data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
  966. data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
  967. data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
  968. data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
  969. data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
  970. data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
  971. data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
  972. data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
  973. data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
  974. data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
  975. data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
  976. data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
  977. data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
  978. data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
  979. data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
  980. data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
  981. data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
  982. data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
  983. data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
  984. data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
  985. data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
  986. data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
  987. data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
  988. data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
  989. data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
  990. data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
  991. data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
  992. data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
  993. data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
  994. data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
  995. data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
  996. data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
  997. data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
  998. data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
  999. data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
  1000. data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
  1001. data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
  1002. data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
  1003. data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
  1004. data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
  1005. data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
  1006. data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
  1007. data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
  1008. data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
  1009. data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
  1010. data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
  1011. data/vendor/ggml/ggml.pc.in +10 -0
  1012. data/vendor/ggml/include/ggml-alloc.h +85 -0
  1013. data/vendor/ggml/include/ggml-backend.h +431 -0
  1014. data/vendor/ggml/include/ggml-blas.h +25 -0
  1015. data/vendor/ggml/include/ggml-cann.h +123 -0
  1016. data/vendor/ggml/include/ggml-cpp.h +39 -0
  1017. data/vendor/ggml/include/ggml-cpu.h +151 -0
  1018. data/vendor/ggml/include/ggml-cuda.h +50 -0
  1019. data/vendor/ggml/include/ggml-hexagon.h +19 -0
  1020. data/vendor/ggml/include/ggml-metal.h +61 -0
  1021. data/vendor/ggml/include/ggml-opencl.h +26 -0
  1022. data/vendor/ggml/include/ggml-openvino.h +37 -0
  1023. data/vendor/ggml/include/ggml-opt.h +256 -0
  1024. data/vendor/ggml/include/ggml-rpc.h +35 -0
  1025. data/vendor/ggml/include/ggml-sycl.h +49 -0
  1026. data/vendor/ggml/include/ggml-virtgpu.h +14 -0
  1027. data/vendor/ggml/include/ggml-vulkan.h +29 -0
  1028. data/vendor/ggml/include/ggml-webgpu.h +19 -0
  1029. data/vendor/ggml/include/ggml-zdnn.h +17 -0
  1030. data/vendor/ggml/include/ggml-zendnn.h +22 -0
  1031. data/vendor/ggml/include/ggml.h +2845 -0
  1032. data/vendor/ggml/include/gguf.h +204 -0
  1033. data/vendor/ggml/requirements.txt +12 -0
  1034. data/vendor/ggml/scripts/gen-authors.sh +9 -0
  1035. data/vendor/ggml/scripts/release.sh +296 -0
  1036. data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
  1037. data/vendor/ggml/scripts/sync-llama.last +1 -0
  1038. data/vendor/ggml/scripts/sync-llama.sh +21 -0
  1039. data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
  1040. data/vendor/ggml/scripts/sync-whisper.last +1 -0
  1041. data/vendor/ggml/scripts/sync-whisper.sh +17 -0
  1042. data/vendor/ggml/src/CMakeLists.txt +493 -0
  1043. data/vendor/ggml/src/ggml-alloc.c +1248 -0
  1044. data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
  1045. data/vendor/ggml/src/ggml-backend-dl.h +45 -0
  1046. data/vendor/ggml/src/ggml-backend-impl.h +275 -0
  1047. data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
  1048. data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
  1049. data/vendor/ggml/src/ggml-backend.cpp +2371 -0
  1050. data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  1051. data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
  1052. data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
  1053. data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
  1054. data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
  1055. data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
  1056. data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
  1057. data/vendor/ggml/src/ggml-cann/common.h +651 -0
  1058. data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
  1059. data/vendor/ggml/src/ggml-common.h +1900 -0
  1060. data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
  1061. data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
  1062. data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
  1063. data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
  1064. data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  1065. data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  1066. data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  1067. data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
  1068. data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
  1069. data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
  1070. data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  1071. data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
  1072. data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  1073. data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
  1074. data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
  1075. data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  1076. data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
  1077. data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
  1078. data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  1079. data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
  1080. data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
  1081. data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
  1082. data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  1083. data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
  1084. data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  1085. data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
  1086. data/vendor/ggml/src/ggml-cpu/common.h +95 -0
  1087. data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
  1088. data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
  1089. data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
  1090. data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
  1091. data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
  1092. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
  1093. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  1094. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
  1095. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  1096. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
  1097. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  1098. data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
  1099. data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
  1100. data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
  1101. data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
  1102. data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
  1103. data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
  1104. data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
  1105. data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
  1106. data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
  1107. data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
  1108. data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
  1109. data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
  1110. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
  1111. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
  1112. data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
  1113. data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
  1114. data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
  1115. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
  1116. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
  1117. data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
  1118. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
  1119. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
  1120. data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
  1121. data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
  1122. data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
  1123. data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  1124. data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
  1125. data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
  1126. data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
  1127. data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
  1128. data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
  1129. data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
  1130. data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
  1131. data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
  1132. data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
  1133. data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
  1134. data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
  1135. data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
  1136. data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
  1137. data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
  1138. data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
  1139. data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
  1140. data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
  1141. data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
  1142. data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
  1143. data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
  1144. data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
  1145. data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
  1146. data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
  1147. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
  1148. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  1149. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  1150. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  1151. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
  1152. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
  1153. data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
  1154. data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  1155. data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
  1156. data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
  1157. data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
  1158. data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  1159. data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  1160. data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  1161. data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
  1162. data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
  1163. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
  1164. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  1165. data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
  1166. data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  1167. data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
  1168. data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
  1169. data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
  1170. data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
  1171. data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  1172. data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
  1173. data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
  1174. data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
  1175. data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
  1176. data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
  1177. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
  1178. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
  1179. data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
  1180. data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
  1181. data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
  1182. data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
  1183. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
  1184. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  1185. data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
  1186. data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
  1187. data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
  1188. data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
  1189. data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
  1190. data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
  1191. data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
  1192. data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
  1193. data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
  1194. data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
  1195. data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
  1196. data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
  1197. data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
  1198. data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
  1199. data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
  1200. data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
  1201. data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
  1202. data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
  1203. data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
  1204. data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
  1205. data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
  1206. data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
  1207. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  1208. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  1209. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  1210. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  1211. data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
  1212. data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  1213. data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
  1214. data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
  1215. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  1216. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  1217. data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
  1218. data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  1219. data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
  1220. data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
  1221. data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
  1222. data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
  1223. data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
  1224. data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
  1225. data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
  1226. data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
  1227. data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
  1228. data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
  1229. data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  1230. data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
  1231. data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
  1232. data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
  1233. data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
  1234. data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
  1235. data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
  1236. data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
  1237. data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
  1238. data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  1239. data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  1240. data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
  1241. data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  1242. data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
  1243. data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  1244. data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
  1245. data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
  1246. data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
  1247. data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  1248. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
  1249. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
  1250. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
  1251. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  1252. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  1253. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
  1254. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
  1255. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
  1256. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
  1257. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
  1258. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  1259. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  1260. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
  1261. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  1262. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
  1263. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
  1264. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  1265. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  1266. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  1267. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
  1268. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
  1269. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  1270. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  1271. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
  1272. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  1273. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
  1274. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  1275. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
  1276. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  1277. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  1278. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  1279. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  1280. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  1281. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
  1282. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
  1283. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
  1284. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
  1285. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
  1286. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
  1287. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
  1288. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
  1289. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  1290. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  1291. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  1292. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  1293. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  1294. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  1295. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
  1296. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  1297. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  1298. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  1299. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  1300. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  1301. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  1302. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
  1303. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  1304. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  1305. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  1306. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  1307. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  1308. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  1309. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
  1310. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  1311. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  1312. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  1313. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  1314. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  1315. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  1316. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
  1317. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  1318. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  1319. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  1320. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  1321. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  1322. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  1323. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
  1324. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  1325. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  1326. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  1327. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  1328. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  1329. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  1330. data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
  1331. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  1332. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  1333. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  1334. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  1335. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  1336. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  1337. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  1338. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  1339. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  1340. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  1341. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  1342. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  1343. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  1344. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  1345. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  1346. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  1347. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  1348. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  1349. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  1350. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  1351. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  1352. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  1353. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  1354. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  1355. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  1356. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
  1357. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
  1358. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  1359. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  1360. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  1361. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  1362. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  1363. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  1364. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  1365. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  1366. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  1367. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  1368. data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
  1369. data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
  1370. data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
  1371. data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
  1372. data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
  1373. data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
  1374. data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
  1375. data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  1376. data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
  1377. data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
  1378. data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
  1379. data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
  1380. data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
  1381. data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
  1382. data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
  1383. data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
  1384. data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
  1385. data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
  1386. data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
  1387. data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
  1388. data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
  1389. data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
  1390. data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
  1391. data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
  1392. data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  1393. data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
  1394. data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
  1395. data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
  1396. data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
  1397. data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
  1398. data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
  1399. data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
  1400. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  1401. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
  1402. data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
  1403. data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  1404. data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
  1405. data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
  1406. data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
  1407. data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
  1408. data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
  1409. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
  1410. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
  1411. data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
  1412. data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
  1413. data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
  1414. data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
  1415. data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  1416. data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
  1417. data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
  1418. data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
  1419. data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  1420. data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
  1421. data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  1422. data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  1423. data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  1424. data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
  1425. data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  1426. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
  1427. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  1428. data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  1429. data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
  1430. data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
  1431. data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
  1432. data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
  1433. data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
  1434. data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
  1435. data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
  1436. data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
  1437. data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
  1438. data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  1439. data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
  1440. data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
  1441. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  1442. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  1443. data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  1444. data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  1445. data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
  1446. data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
  1447. data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
  1448. data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
  1449. data/vendor/ggml/src/ggml-impl.h +783 -0
  1450. data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  1451. data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  1452. data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  1453. data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  1454. data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
  1455. data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
  1456. data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
  1457. data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
  1458. data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
  1459. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
  1460. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
  1461. data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
  1462. data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
  1463. data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
  1464. data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
  1465. data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
  1466. data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
  1467. data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
  1468. data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  1469. data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  1470. data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  1471. data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  1472. data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  1473. data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  1474. data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  1475. data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
  1476. data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  1477. data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
  1478. data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  1479. data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  1480. data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  1481. data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  1482. data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  1483. data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  1484. data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  1485. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  1486. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  1487. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  1488. data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  1489. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  1490. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
  1491. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
  1492. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
  1493. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
  1494. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
  1495. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
  1496. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
  1497. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  1498. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
  1499. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
  1500. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
  1501. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
  1502. data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
  1503. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  1504. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
  1505. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
  1506. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
  1507. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
  1508. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
  1509. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
  1510. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
  1511. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
  1512. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  1513. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
  1514. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
  1515. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
  1516. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
  1517. data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  1518. data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  1519. data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  1520. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  1521. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  1522. data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  1523. data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  1524. data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
  1525. data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
  1526. data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  1527. data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  1528. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  1529. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  1530. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  1531. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
  1532. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  1533. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  1534. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
  1535. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
  1536. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  1537. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  1538. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  1539. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  1540. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  1541. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  1542. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  1543. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  1544. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  1545. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  1546. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  1547. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  1548. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
  1549. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
  1550. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  1551. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  1552. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  1553. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  1554. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  1555. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  1556. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  1557. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  1558. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  1559. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  1560. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
  1561. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
  1562. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
  1563. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  1564. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  1565. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  1566. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  1567. data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  1568. data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  1569. data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  1570. data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  1571. data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  1572. data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  1573. data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  1574. data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  1575. data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  1576. data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  1577. data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  1578. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  1579. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  1580. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  1581. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  1582. data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  1583. data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  1584. data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  1585. data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  1586. data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  1587. data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  1588. data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  1589. data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  1590. data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
  1591. data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  1592. data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  1593. data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  1594. data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  1595. data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
  1596. data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  1597. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
  1598. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  1599. data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
  1600. data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
  1601. data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  1602. data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  1603. data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  1604. data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  1605. data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  1606. data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  1607. data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  1608. data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  1609. data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  1610. data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  1611. data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  1612. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  1613. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  1614. data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  1615. data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  1616. data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  1617. data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  1618. data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
  1619. data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  1620. data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  1621. data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  1622. data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  1623. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
  1624. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  1625. data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  1626. data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
  1627. data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
  1628. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  1629. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  1630. data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  1631. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  1632. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  1633. data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
  1634. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
  1635. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  1636. data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
  1637. data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
  1638. data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
  1639. data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
  1640. data/vendor/ggml/src/ggml-opt.cpp +1094 -0
  1641. data/vendor/ggml/src/ggml-quants.c +5491 -0
  1642. data/vendor/ggml/src/ggml-quants.h +112 -0
  1643. data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
  1644. data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
  1645. data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
  1646. data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
  1647. data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
  1648. data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
  1649. data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
  1650. data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
  1651. data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
  1652. data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  1653. data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
  1654. data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
  1655. data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
  1656. data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
  1657. data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
  1658. data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
  1659. data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
  1660. data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
  1661. data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  1662. data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  1663. data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
  1664. data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
  1665. data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
  1666. data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
  1667. data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
  1668. data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
  1669. data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
  1670. data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
  1671. data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  1672. data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
  1673. data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
  1674. data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
  1675. data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
  1676. data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
  1677. data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
  1678. data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
  1679. data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
  1680. data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
  1681. data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
  1682. data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
  1683. data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
  1684. data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
  1685. data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
  1686. data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
  1687. data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
  1688. data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
  1689. data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
  1690. data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
  1691. data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
  1692. data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
  1693. data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
  1694. data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
  1695. data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  1696. data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
  1697. data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
  1698. data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
  1699. data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
  1700. data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
  1701. data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
  1702. data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
  1703. data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
  1704. data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
  1705. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  1706. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  1707. data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
  1708. data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
  1709. data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
  1710. data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  1711. data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  1712. data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
  1713. data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
  1714. data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
  1715. data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
  1716. data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
  1717. data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
  1718. data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
  1719. data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  1720. data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
  1721. data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
  1722. data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
  1723. data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
  1724. data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
  1725. data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  1726. data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
  1727. data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
  1728. data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
  1729. data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
  1730. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  1731. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  1732. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  1733. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  1734. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
  1735. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  1736. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  1737. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  1738. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  1739. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  1740. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
  1741. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
  1742. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
  1743. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
  1744. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
  1745. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
  1746. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
  1747. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
  1748. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
  1749. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
  1750. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
  1751. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
  1752. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
  1753. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
  1754. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
  1755. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
  1756. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
  1757. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
  1758. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
  1759. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
  1760. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
  1761. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
  1762. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
  1763. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
  1764. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
  1765. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
  1766. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
  1767. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
  1768. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
  1769. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
  1770. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
  1771. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
  1772. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
  1773. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
  1774. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
  1775. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
  1776. data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  1777. data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  1778. data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
  1779. data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
  1780. data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
  1781. data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
  1782. data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
  1783. data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
  1784. data/vendor/ggml/src/ggml-threading.cpp +12 -0
  1785. data/vendor/ggml/src/ggml-threading.h +14 -0
  1786. data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  1787. data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  1788. data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  1789. data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  1790. data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  1791. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  1792. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  1793. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  1794. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  1795. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  1796. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  1797. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  1798. data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  1799. data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  1800. data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  1801. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  1802. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  1803. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  1804. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  1805. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  1806. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  1807. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
  1808. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
  1809. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  1810. data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
  1811. data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  1812. data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  1813. data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  1814. data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  1815. data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  1816. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  1817. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  1818. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  1819. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  1820. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  1821. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  1822. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
  1823. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  1824. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  1825. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  1826. data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
  1827. data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
  1828. data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
  1829. data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  1830. data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
  1831. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  1832. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  1833. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  1834. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  1835. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  1836. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  1837. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  1838. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  1839. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  1840. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  1841. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  1842. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  1843. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  1844. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  1845. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  1846. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  1847. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  1848. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  1849. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  1850. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
  1851. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  1852. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  1853. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  1854. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  1855. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  1856. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  1857. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  1858. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  1859. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
  1860. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
  1861. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  1862. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  1863. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  1864. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  1865. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  1866. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  1867. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  1868. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  1869. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  1870. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  1871. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  1872. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
  1873. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
  1874. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  1875. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  1876. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  1877. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  1878. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  1879. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  1880. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  1881. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  1882. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  1883. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  1884. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
  1885. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  1886. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  1887. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  1888. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
  1889. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  1890. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  1891. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  1892. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  1893. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  1894. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
  1895. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
  1896. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
  1897. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
  1898. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
  1899. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  1900. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
  1901. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  1902. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  1903. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
  1904. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  1905. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  1906. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  1907. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  1908. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  1909. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  1910. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
  1911. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  1912. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  1913. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  1914. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  1915. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
  1916. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
  1917. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  1918. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  1919. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  1920. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
  1921. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
  1922. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  1923. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  1924. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
  1925. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  1926. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  1927. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  1928. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  1929. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  1930. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  1931. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  1932. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  1933. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  1934. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  1935. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  1936. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  1937. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  1938. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  1939. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  1940. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  1941. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
  1942. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  1943. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  1944. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  1945. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
  1946. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  1947. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  1948. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
  1949. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  1950. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  1951. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  1952. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
  1953. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
  1954. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  1955. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  1956. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  1957. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  1958. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  1959. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  1960. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
  1961. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  1962. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  1963. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  1964. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  1965. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  1966. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  1967. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  1968. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  1969. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  1970. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
  1971. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  1972. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  1973. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  1974. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
  1975. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  1976. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  1977. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  1978. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  1979. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  1980. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  1981. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  1982. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  1983. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  1984. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  1985. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  1986. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  1987. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  1988. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  1989. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  1990. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  1991. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  1992. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  1993. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  1994. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  1995. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  1996. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  1997. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  1998. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  1999. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  2000. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  2001. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  2002. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  2003. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  2004. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  2005. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  2006. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
  2007. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  2008. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
  2009. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  2010. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  2011. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
  2012. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  2013. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  2014. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  2015. data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  2016. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
  2017. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
  2018. data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  2019. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
  2020. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  2021. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  2022. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  2023. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
  2024. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
  2025. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  2026. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
  2027. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
  2028. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  2029. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
  2030. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
  2031. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
  2032. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
  2033. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
  2034. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
  2035. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
  2036. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
  2037. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
  2038. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
  2039. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  2040. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
  2041. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
  2042. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
  2043. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
  2044. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
  2045. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
  2046. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
  2047. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
  2048. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
  2049. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  2050. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  2051. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
  2052. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
  2053. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
  2054. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  2055. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
  2056. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  2057. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
  2058. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
  2059. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
  2060. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
  2061. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  2062. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
  2063. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
  2064. data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  2065. data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
  2066. data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
  2067. data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  2068. data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  2069. data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
  2070. data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
  2071. data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  2072. data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
  2073. data/vendor/ggml/src/ggml.c +7777 -0
  2074. data/vendor/ggml/src/ggml.cpp +26 -0
  2075. data/vendor/ggml/src/gguf.cpp +1556 -0
  2076. data/vendor/ggml/tests/CMakeLists.txt +356 -0
  2077. data/vendor/ggml/tests/test-arange.cpp +100 -0
  2078. data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
  2079. data/vendor/ggml/tests/test-cont.c +170 -0
  2080. data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
  2081. data/vendor/ggml/tests/test-conv-transpose.c +248 -0
  2082. data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
  2083. data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
  2084. data/vendor/ggml/tests/test-conv1d.cpp +289 -0
  2085. data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
  2086. data/vendor/ggml/tests/test-conv2d.cpp +391 -0
  2087. data/vendor/ggml/tests/test-customop.c +300 -0
  2088. data/vendor/ggml/tests/test-dup.c +111 -0
  2089. data/vendor/ggml/tests/test-interpolate.cpp +166 -0
  2090. data/vendor/ggml/tests/test-opt.cpp +1003 -0
  2091. data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
  2092. data/vendor/ggml/tests/test-pool.c +274 -0
  2093. data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
  2094. data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
  2095. data/vendor/ggml/tests/test-rel-pos.c +87 -0
  2096. data/vendor/ggml/tests/test-roll.cpp +128 -0
  2097. data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
  2098. data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
  2099. data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
  2100. data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
  2101. data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
  2102. data/vendor-patches/0005-concat-backward.patch +36 -0
  2103. data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
  2104. data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
  2105. data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
  2106. data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
  2107. metadata +2161 -0
@@ -0,0 +1,1526 @@
1
+ # lib/toy/llm/engine/llama_kv_engine_cuda.rb — CUDA mirror of lib/toy/llm/engine/llama_kv_engine.rb.
2
+ #
3
+ # AUTO-GENERATED by prep/gen_cuda_mirror.rb. Do not edit by hand;
4
+ # edit the CPU source and re-run the generator. The CPU file's
5
+ # header explains the architecture; this mirror keeps the same
6
+ # contract on the GPU backend via TinyNNCuda.
7
+ #
8
+ # Mirror of lib/toy/llm/engine/gpt2_kv_engine.rb (was lib/gpt2_ffi_kv.rb)
9
+ # but for the llama-family architecture:
10
+ # - RMSNorm (no beta) instead of LayerNorm
11
+ # - No biases on Q / K / V / O / FFN projections
12
+ # - SwiGLU FFN: down( silu(gate(x)) * up(x) )
13
+ # - RoPE applied to Q and K before the dot product
14
+ # - GQA: K and V are stored per-`n_kv`-head, not per-`n_heads`-head.
15
+ # Each KV head is shared by group_size = n_heads / n_kv query heads.
16
+ #
17
+ # Per decode step builds a single-position compute graph; K and V at
18
+ # the current position are written into persistent per-layer buffers
19
+ # via cpy-into-view (same pattern as the GPT-2 cache). Cost per step:
20
+ # constant in prompt length.
21
+
22
+ require_relative "../../models/transformer"
23
+ require_relative "../../../toy"
24
+ require_relative "../../models/toy_smollm2"
25
+ require_relative "../../ffi/tinynn_cuda"
26
+ # NOTE: not requiring "toy_smollm2_loader" here. Requiring it from
27
+ # inside this file triggers a Spinel GC mark crash in decode_step
28
+ # (sp_gc_mark / sp_PtrArray_new_scan) for reasons we haven't fully
29
+ # isolated — likely something about require-order interaction with
30
+ # Spinel's type inference around GGUFLoad. Callers that use
31
+ # realize_and_load_auto (or any method here that references
32
+ # GGUFLoad) must `require_relative "toy/models/toy_smollm2_loader"` from their
33
+ # top-level driver file BEFORE this file is loaded. The OpenAI API
34
+ # binaries and the realize-mmap demos already do.
35
+
36
+ # Per-block persistent tensors for the SmolLM2 KV cache.
37
+ #
38
+ # Q is split per query head (n_heads of them).
39
+ # K, V, and their persistent buffers are split per KV head (n_kv of them).
40
+ class SmolLM2KVBlockFFICuda
41
+ attr_accessor :t_rn1_gamma, :t_rn2_gamma,
42
+ :t_w_q, :t_w_k, :t_w_v, :t_w_o,
43
+ :t_b_q, :t_b_k, :t_b_v,
44
+ # M1: per-block QK-norm (Qwen3). RMSNorm on Q and K with
45
+ # a shared [d_head] gamma applied to every head before
46
+ # RoPE. Allocated only when has_qk_norm is set. The
47
+ # null-ptr seed lets graph-builder code branch cleanly.
48
+ :t_q_norm_gamma, :t_k_norm_gamma,
49
+ # I-Gemma (#113): post-attention and post-FFN RMSNorm
50
+ # gammas. Gemma 2 sandwiches each sublayer between a
51
+ # pre-norm (the existing t_rn1_gamma / t_rn2_gamma)
52
+ # and a post-norm (these). Shape [d_model] each.
53
+ # Allocated only when cache.has_post_norms is set.
54
+ :t_post_attn_norm_gamma, :t_post_ffn_norm_gamma,
55
+ :t_w_gate, :t_w_up, :t_w_down,
56
+ # M2.3 MoE. When SmolLM2KVFFICacheCuda#is_moe is true, the
57
+ # FFN block is replaced with a Mixtral-style routed FFN:
58
+ # t_w_router : 2D [d_model, n_experts] — gating
59
+ # t_w_gate_exps : 3D [d_model, d_ff, n_experts]
60
+ # t_w_up_exps : 3D [d_model, d_ff, n_experts]
61
+ # t_w_down_exps : 3D [d_ff, d_model, n_experts]
62
+ # Set by realize_for_mmap when GGUF carries
63
+ # blk.0.ffn_gate_inp.weight (the MoE-presence sentinel).
64
+ :t_w_router, :t_w_gate_exps, :t_w_up_exps, :t_w_down_exps,
65
+ :t_K, :t_V,
66
+ # F1.2: optional LoRA adapters on Q projection (one
67
+ # rank-R pair per Q head). t_w_lora_a_q[hq] has shape
68
+ # (r, d_model); t_w_lora_b_q[hq] has shape (d_head, r).
69
+ # Allocated only when cache.lora_q_enabled at realize
70
+ # time. Trainable f32 tensors in ctx_w (not mmap'd from
71
+ # GGUF — adapters are session-local).
72
+ :t_w_lora_a_q, :t_w_lora_b_q,
73
+ # F1.2 step 6b: optional persistent Adam moments paired
74
+ # with the LoRA-A/B tensors above. Allocated in ctx_w
75
+ # (NOT compute ctx) so they survive tnn_reset_for_rebuild
76
+ # between multi-position SFT steps. Same shapes as A/B.
77
+ # Allocated only when cache.lora_q_adamw_enabled. The
78
+ # m/v live next to A/B so a future "save adapter +
79
+ # optimizer state" hook can serialize them together.
80
+ :t_w_lora_a_q_m, :t_w_lora_a_q_v,
81
+ :t_w_lora_b_q_m, :t_w_lora_b_q_v
82
+
83
+ def initialize
84
+ @t_rn1_gamma = TinyNNCuda.tnn_null_ptr
85
+ @t_rn2_gamma = TinyNNCuda.tnn_null_ptr
86
+ @t_q_norm_gamma = TinyNNCuda.tnn_null_ptr
87
+ @t_k_norm_gamma = TinyNNCuda.tnn_null_ptr
88
+ @t_post_attn_norm_gamma = TinyNNCuda.tnn_null_ptr
89
+ @t_post_ffn_norm_gamma = TinyNNCuda.tnn_null_ptr
90
+ @t_w_q = [TinyNNCuda.tnn_null_ptr]
91
+ @t_w_k = [TinyNNCuda.tnn_null_ptr]
92
+ @t_w_v = [TinyNNCuda.tnn_null_ptr]
93
+ @t_b_q = [TinyNNCuda.tnn_null_ptr] # per-Q-head bias (Qwen2.x)
94
+ @t_b_k = [TinyNNCuda.tnn_null_ptr] # per-KV-head bias
95
+ @t_b_v = [TinyNNCuda.tnn_null_ptr] # per-KV-head bias (1-D [d_head])
96
+ @t_K = [TinyNNCuda.tnn_null_ptr]
97
+ @t_V = [TinyNNCuda.tnn_null_ptr]
98
+ @t_w_o = TinyNNCuda.tnn_null_ptr
99
+ @t_w_gate = TinyNNCuda.tnn_null_ptr
100
+ @t_w_up = TinyNNCuda.tnn_null_ptr
101
+ @t_w_down = TinyNNCuda.tnn_null_ptr
102
+ @t_w_router = TinyNNCuda.tnn_null_ptr
103
+ @t_w_gate_exps = TinyNNCuda.tnn_null_ptr
104
+ @t_w_up_exps = TinyNNCuda.tnn_null_ptr
105
+ @t_w_down_exps = TinyNNCuda.tnn_null_ptr
106
+ @t_w_lora_a_q = [TinyNNCuda.tnn_null_ptr]
107
+ @t_w_lora_b_q = [TinyNNCuda.tnn_null_ptr]
108
+ @t_w_lora_a_q_m = [TinyNNCuda.tnn_null_ptr]
109
+ @t_w_lora_a_q_v = [TinyNNCuda.tnn_null_ptr]
110
+ @t_w_lora_b_q_m = [TinyNNCuda.tnn_null_ptr]
111
+ @t_w_lora_b_q_v = [TinyNNCuda.tnn_null_ptr]
112
+ end
113
+ end
114
+
115
+ class SmolLM2KVFFICacheCuda
116
+ attr_accessor :sess, :t_token_embed, :t_final_norm_gamma,
117
+ :t_output, :has_untied_output, :has_qkv_bias,
118
+ # M1: Qwen3 added per-block QK-norm. When true, the
119
+ # graph builder applies tnn_rms_norm to Q and K with
120
+ # blk.t_q_norm_gamma / blk.t_k_norm_gamma (shape
121
+ # [d_head], shared across heads) BEFORE tnn_rope_ext.
122
+ # Detect by presence of "blk.0.attn_q_norm.weight" in
123
+ # the GGUF. Always false on Qwen2.5 / Llama-family.
124
+ :has_qk_norm,
125
+ # #110: which QK-norm flavor — 1 = per-head shared
126
+ # gamma (Qwen3, gamma shape [d_head]); 2 = full-Q
127
+ # gamma (OLMoE / Granite, gamma shape [d_model],
128
+ # applied to the concatenated Q before head split).
129
+ # 0 = none. Set by realize_for_mmap from the detected
130
+ # flags. The graph builder branches on this.
131
+ :qk_norm_kind,
132
+ # I-Gemma (#113): Gemma 2-specific knobs. All default
133
+ # to inert values (no-op) for non-Gemma models.
134
+ # has_post_norms: blk.X has post_attention_norm +
135
+ # post_ffw_norm tensors after the residual adds.
136
+ # embed_scale: post-token-embed multiplier
137
+ # (sqrt(d_model) for Gemma 2; 1.0 otherwise).
138
+ # attn_softcap: tanh-softcap on attention logits
139
+ # (50.0 for Gemma 2; 0.0 disables).
140
+ # final_softcap: tanh-softcap on the final output
141
+ # logits (30.0 for Gemma 2; 0.0 disables).
142
+ # swa_alternates: when true, only EVEN layers apply
143
+ # sliding window; odd layers see full attention.
144
+ :has_post_norms, :embed_scale,
145
+ :attn_softcap, :final_softcap, :swa_alternates,
146
+ # M3: SWA window. 0 = no sliding window (full causal).
147
+ # >0 = attend only to the last `swa_window` positions
148
+ # in the K/V cache. Phi-3-mini-4k sets this to 2048;
149
+ # Gemma 2 local layers set it to 4096. Realize-time
150
+ # parameter (set via realize_for_mmap or post-init).
151
+ :swa_window,
152
+ :kv_blocks_ffi,
153
+ :max_T, :d_model, :d_ff, :n_heads, :n_kv, :d_head,
154
+ :group_size, :n_layers, :vocab_size, :rope_base,
155
+ :rope_scaling, :t_rope_freq_factors,
156
+ :rms_eps, :realized,
157
+ # Phase 3: ggml type for 2D linear weights. Default
158
+ # 0 = GGML_TYPE_F32 (legacy). 8 = GGML_TYPE_Q8_0. Set
159
+ # via #set_weight_type before #realize_for to keep
160
+ # quantized weights quantized in memory.
161
+ :weight_type,
162
+ # P5.1+P5.2: KV cache dtype. 0 = F32 (legacy), 8 = Q8_0.
163
+ # `enable_kv_q8!` sets both to Q8_0; finer-grained
164
+ # control is reserved for future debugging. Per-position
165
+ # writes go through ggml_cpy which quantizes f32→Q8 at
166
+ # the destination view. P5.2 flipped V's layout to
167
+ # match K (`ne=[d_head, max_T]`, positions on ne1), so
168
+ # both write paths span contiguous d_head-vectors —
169
+ # block-aligned for Q8 at d_head=64 (=2 blocks of 32).
170
+ :kv_type_k, :kv_type_v,
171
+ # P4.1: opt into ggml_flash_attn_ext in the attention
172
+ # step (default false → existing scale→softmax→matmul
173
+ # triplet). When true, each Q head's attention is one
174
+ # fused kernel call. Backward NOT supported (flash_back
175
+ # aborts in vendored ggml), so this is INFERENCE only.
176
+ # Set via enable_flash_attn! BEFORE realize_for_*.
177
+ :use_flash_attn,
178
+ # M2.3: MoE flags. is_moe → replace SwiGLU FFN with the
179
+ # routed expert FFN (router → softmax → top_k → 3× mul_mat_id
180
+ # → silu·up → weighted sum). Set by detect_smollm2_flags
181
+ # when GGUF carries blk.0.ffn_gate_inp.weight.
182
+ :is_moe, :n_experts, :n_experts_used,
183
+ :gguf_handle_keepalive,
184
+ # F1.2: LoRA on Q projection. enable_lora_q!(r) sets
185
+ # both flags BEFORE realize. When enabled, each block
186
+ # gets per-Q-head trainable A/B adapter pairs spliced
187
+ # into the Q matmul: q_eff = w_q[h]@h + B[h]@A[h]@h.
188
+ :lora_q_enabled, :lora_q_rank,
189
+ # F1.2 step 6b: when true, realize_for_mmap also
190
+ # allocates persistent AdamW moments (m, v) for every
191
+ # LoRA-A/B pair in ctx_w. Required for multi-position
192
+ # SFT: between graph rebuilds the compute ctx is freed,
193
+ # so moments held there would be lost (NaN on cycle 2+).
194
+ :lora_q_adamw_enabled
195
+
196
+ def initialize
197
+ @realized = false
198
+ @max_T = 0
199
+ @d_model = 0
200
+ @d_ff = 0
201
+ @n_heads = 0
202
+ @n_kv = 0
203
+ @d_head = 0
204
+ @group_size = 0
205
+ @n_layers = 0
206
+ @vocab_size = 0
207
+ @rope_base = 10000.0
208
+ @rope_scaling = Toy::RopeScaling.none
209
+ @t_rope_freq_factors = TinyNNCuda.tnn_null_ptr
210
+ @rms_eps = 1.0e-5
211
+ @sess = TinyNNCuda.tnn_null_ptr
212
+ @t_token_embed = TinyNNCuda.tnn_null_ptr
213
+ @t_final_norm_gamma = TinyNNCuda.tnn_null_ptr
214
+ @t_output = TinyNNCuda.tnn_null_ptr
215
+ @has_untied_output = false
216
+ @has_qkv_bias = false
217
+ @has_qk_norm = false
218
+ @qk_norm_kind = 0
219
+ @swa_window = 0
220
+ @has_post_norms = false
221
+ @embed_scale = 1.0
222
+ @attn_softcap = 0.0
223
+ @final_softcap = 0.0
224
+ @swa_alternates = false
225
+ @kv_blocks_ffi = [SmolLM2KVBlockFFICuda.new]
226
+ @weight_type = 0 # GGML_TYPE_F32; legacy default
227
+ @kv_type_k = 0 # GGML_TYPE_F32; opt in via enable_kv_q8!
228
+ @kv_type_v = 0 # GGML_TYPE_F32; opt in via enable_kv_q8!
229
+ @use_flash_attn = false # opt in via enable_flash_attn!
230
+ @is_moe = false
231
+ @n_experts = 0
232
+ @n_experts_used = 0
233
+ @gguf_handle_keepalive = TinyNNCuda.tnn_null_ptr # set by realize_for_mmap
234
+ @lora_q_enabled = false
235
+ @lora_q_rank = 0
236
+ @lora_q_adamw_enabled = false
237
+ end
238
+
239
+ # P5.1: opt into Q8_0 storage for the K cache. Must be called BEFORE
240
+ # realize_for_mmap. V stays F32 in this phase — its layout
241
+ # (positions along ne0) makes per-position Q8 writes non-block-
242
+ # aligned. K's layout (positions along ne1, d_head along ne0)
243
+ # writes whole d_head-vectors at a time, which for d_head=64
244
+ # spans exactly 2 Q8_0 blocks of 32 elements each → aligned. The
245
+ # write path uses ggml_cpy which quantizes on f32→Q8 destination;
246
+ # the read path (attention matmul) dequantizes block-by-block
247
+ # inside ggml's kernel. Cuts K-cache memory & bandwidth ~4×.
248
+ # P5.1+P5.2: opt into Q8_0 for the K and V caches. Halves K and V
249
+ # memory + bandwidth (3.75× smaller at d_head=64).
250
+ #
251
+ # Auto-enables flash attention. Reason: the non-flash V matmul
252
+ # requires a transpose-cont of V_hist, which is structurally
253
+ # impossible for Q8_0 (transposing flips the d_head and hist_count
254
+ # axes; hist_count generally isn't a multiple of 32, so the
255
+ # contiguous Q8 destination can't be allocated). flash_attn_ext
256
+ # consumes V in its natural [d_head, hist_count] orientation,
257
+ # which dodges the transpose entirely — so Q8 V works there.
258
+ #
259
+ # Inference-only. flash_attn's backward aborts in vendored ggml.
260
+ def enable_kv_q8!
261
+ @kv_type_k = 8 # GGML_TYPE_Q8_0
262
+ @kv_type_v = 8
263
+ @use_flash_attn = true
264
+ end
265
+
266
+ # P4.1: opt into ggml_flash_attn_ext for inference. Per-Q-head it
267
+ # replaces the (scale → softmax → matmul) triplet with one fused
268
+ # call. The V cache stays in its current [max_T, d_head] layout —
269
+ # we transpose-materialize it per step (cheap; one ggml_cont). A
270
+ # future cleanup (P5.2) flips V's layout to remove the transpose
271
+ # and unlock V Q8.
272
+ #
273
+ # Backward is unsupported in vendored ggml (flash_attn_back aborts),
274
+ # so this path is INFERENCE only. Call BEFORE realize_for_mmap.
275
+ def enable_flash_attn!
276
+ @use_flash_attn = true
277
+ end
278
+
279
+ # M2.3: opt into the MoE FFN graph. Must be called BEFORE realize_for_mmap.
280
+ # n_experts is the total count in the GGUF; n_experts_used is the
281
+ # top-K routed per token. Mixtral-8x7B: enable_moe!(8, 2). Qwen3-30B-
282
+ # A3B: enable_moe!(128, 8) (with optional shared expert — not yet
283
+ # supported in this path).
284
+ def enable_moe!(n_experts, n_experts_used)
285
+ @is_moe = true
286
+ @n_experts = n_experts
287
+ @n_experts_used = n_experts_used
288
+ end
289
+
290
+ # F1.2: enable per-Q-head LoRA on this session's forward graph. Call
291
+ # BEFORE realize_for_mmap. Adapter A is (r, d_model), adapter B is
292
+ # (d_head, r); both trainable F32 tensors in ctx_w (not mmap'd, so
293
+ # writes survive). Standard LoRA init: A = small Gaussian, B = 0,
294
+ # which makes the adapter a no-op at step 0 (forward output ==
295
+ # baseline). Use upload_lora_zero!(seed) to set up that init.
296
+ def enable_lora_q!(r)
297
+ @lora_q_enabled = true
298
+ @lora_q_rank = r
299
+ end
300
+
301
+ # F1.2 step 6b: allocate persistent AdamW moments (m, v) alongside
302
+ # each LoRA-A/B pair, in ctx_w. Requires enable_lora_q!(...) to have
303
+ # been called first (so the rank is known). Call BEFORE
304
+ # realize_for_mmap. Without this, multi-position SFT loses Adam
305
+ # state at every graph rebuild and diverges to NaN.
306
+ def enable_lora_q_adamw!
307
+ @lora_q_adamw_enabled = true
308
+ end
309
+
310
+ # Phase 3 opt-in: set the ggml type used for 2D linear weights when
311
+ # realize_for runs. 0 = F32, 8 = Q8_0. Call BEFORE realize_for —
312
+ # the persistent tensors are allocated there.
313
+ def set_weight_type(t)
314
+ @weight_type = t
315
+ end
316
+
317
+ # Allocate one persistent 2D linear weight tensor at the configured
318
+ # type. Used by realize_for; keeps the Q8/F32 branch in one place.
319
+ # Non-2D-linear tensors (norms, biases, K/V cache, t_output) stay
320
+ # F32 even in Q8 mode — quantizing them costs accuracy with no
321
+ # compute saving.
322
+ def alloc_2d_w(rows, cols)
323
+ if @weight_type == 0
324
+ TinyNNCuda.tnn_input_2d_f32_persistent(@sess, rows, cols)
325
+ else
326
+ TinyNNCuda.tnn_input_2d_persistent_typed(@sess, rows, cols, @weight_type)
327
+ end
328
+ end
329
+
330
+ # Phase 2 BYO-pointer realization. Like realize_for but every
331
+ # GGUF-resident tensor (token_embed, norms, biases, all 2D linears,
332
+ # untied output) is allocated to POINT AT the file's mmap'd pages
333
+ # rather than copied into a backend buffer. Only K/V cache and the
334
+ # compute scratch live in backend-allocated memory. The kv_cache
335
+ # holds the GGUF handle so the mmap stays alive for its lifetime.
336
+ #
337
+ # Caller flow:
338
+ # gguf = TinyNNCuda.tnn_gguf_load(path) # mmap'd, no_alloc
339
+ # flags = GGUFLoad.detect_smollm2_flags(path)
340
+ # wtype = GGUFLoad.detect_weight_type(path)
341
+ # kv = SmolLM2KVFFICacheCuda.new
342
+ # kv.realize_for_mmap(gguf, cfg, MAX_T, flags.untied, flags.qkv_bias)
343
+ # # weights are already in place; no load_weights call needed.
344
+ def realize_for_mmap(gguf_handle, cfg, max_T, untied, qkv_bias, qk_norm)
345
+ @max_T = max_T
346
+ @d_model = cfg.d_model
347
+ @d_ff = cfg.d_ff
348
+ @n_heads = cfg.n_heads
349
+ @n_kv = cfg.n_kv
350
+ @d_head = cfg.head_dim
351
+ @group_size = cfg.n_heads / cfg.n_kv
352
+ @n_layers = cfg.n_layers
353
+ @vocab_size = cfg.vocab
354
+ @rope_base = cfg.rope_base
355
+ @rope_scaling = cfg.rope_scaling
356
+ @rms_eps = cfg.rms_eps
357
+
358
+ @gguf_handle_keepalive = gguf_handle # prevent GC; mmap must outlive @sess
359
+ @sess = TinyNNCuda.tnn_session_new(1)
360
+ @has_untied_output = untied
361
+ @has_qkv_bias = qkv_bias
362
+ @has_qk_norm = qk_norm
363
+ # #110: if caller didn't pre-set qk_norm_kind via the
364
+ # attr_accessor, default to 1 (per-head shared) for backward
365
+ # compat with the Qwen3 detection that established the qk_norm
366
+ # path. Models that want full-Q (OLMoE / Granite) must set
367
+ # kv.qk_norm_kind = 2 BEFORE calling realize_for_mmap.
368
+ if @has_qk_norm && @qk_norm_kind == 0
369
+ @qk_norm_kind = 1
370
+ end
371
+
372
+ # llama3 / LongRoPE: allocate the (d_head/2)-elem freq_factors
373
+ # tensor in ctx_w before finalize_weights. We compute and upload
374
+ # the values after finalize (see below). For all other rope_scaling
375
+ # kinds the FFI call still needs a pointer — pass tnn_null_ptr.
376
+ if @rope_scaling.kind == :llama3
377
+ @t_rope_freq_factors = TinyNNCuda.tnn_rope_freq_factors_alloc(@sess, @d_head)
378
+ else
379
+ @t_rope_freq_factors = TinyNNCuda.tnn_null_ptr
380
+ end
381
+
382
+ # Wire the GGUF's mmap region into the session as the source of
383
+ # weight bytes. Subsequent tnn_input_*_persistent_mmap calls
384
+ # allocate tensors with .data inside this region — no copy.
385
+ map_base = TinyNNCuda.tnn_gguf_mmap_base(gguf_handle)
386
+ map_size = TinyNNCuda.tnn_gguf_mmap_size(gguf_handle)
387
+ TinyNNCuda.tnn_session_attach_weight_mmap(@sess, map_base, map_size)
388
+
389
+ # toy#gguf-checkpoint-reload (#153) — from-scratch checkpoints
390
+ # written by ToyGGUFWriter store one tensor per head
391
+ # (blk.N.attn_q.head_H.weight) rather than the fused llama.cpp
392
+ # shape. Detect via the head_0 sentinel; the per-Q-head/K/V
393
+ # loaders below branch on it.
394
+ @per_head_attn = TinyNNCuda.tnn_gguf_find_index(gguf_handle, "blk.0.attn_q.head_0.weight") >= 0
395
+ if @per_head_attn
396
+ puts " per-head tensors detected (toy from-scratch checkpoint)"
397
+ end
398
+
399
+ # Globals — embeddings + final norm + optional untied output.
400
+ eidx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, "token_embd.weight")
401
+ eoff = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, eidx)
402
+ etyp = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, eidx)
403
+ @t_token_embed = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
404
+ @vocab_size, @d_model, etyp, eoff)
405
+
406
+ fnidx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, "output_norm.weight")
407
+ fnoff = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, fnidx)
408
+ @t_final_norm_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess,
409
+ @d_model, 0, fnoff) # 0 = GGML_TYPE_F32
410
+
411
+ if untied
412
+ oidx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, "output.weight")
413
+ ooff = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, oidx)
414
+ otyp = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, oidx)
415
+ @t_output = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
416
+ @vocab_size, @d_model, otyp, ooff)
417
+ end
418
+
419
+ @kv_blocks_ffi = [SmolLM2KVBlockFFICuda.new]
420
+ li = 1
421
+ while li < @n_layers
422
+ @kv_blocks_ffi.push(SmolLM2KVBlockFFICuda.new)
423
+ li = li + 1
424
+ end
425
+
426
+ li = 0
427
+ while li < @n_layers
428
+ blk = @kv_blocks_ffi[li]
429
+ prefix = "blk." + li.to_s
430
+
431
+ # Norms — 1D F32 mmap'd directly.
432
+ rn1_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_norm.weight")
433
+ rn2_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_norm.weight")
434
+ blk.t_rn1_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
435
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, rn1_idx))
436
+ blk.t_rn2_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
437
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, rn2_idx))
438
+
439
+ # I-Gemma (#113): post-attention and post-FFN RMSNorm gammas
440
+ # (Gemma 2 sandwiches each sublayer between pre+post norms).
441
+ # Tensor names: blk.X.post_attention_norm.weight, blk.X.post_ffw_norm.weight.
442
+ if @has_post_norms
443
+ pa_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".post_attention_norm.weight")
444
+ pf_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".post_ffw_norm.weight")
445
+ blk.t_post_attn_norm_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
446
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, pa_idx))
447
+ blk.t_post_ffn_norm_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_model, 0,
448
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, pf_idx))
449
+ end
450
+
451
+ # M1 + #110: QK-norm gammas. Two flavors detected via shape:
452
+ # kind=1: Qwen3 — gamma shape [d_head], shared across heads.
453
+ # kind=2: OLMoE / Granite — gamma shape [d_model], applied to
454
+ # the full Q before head split. Allocate the full
455
+ # [d_model] tensor; the graph builder either does a
456
+ # full-Q rms_norm OR views per-head d_head slices.
457
+ gamma_nelems = (@qk_norm_kind == 2) ? @d_model : @d_head
458
+ if @has_qk_norm
459
+ qn_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q_norm.weight")
460
+ kn_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k_norm.weight")
461
+ blk.t_q_norm_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, gamma_nelems, 0,
462
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, qn_idx))
463
+ # K norm follows the same flavor as Q.
464
+ k_gamma_nelems = (@qk_norm_kind == 2) ? (@n_kv * @d_head) : @d_head
465
+ blk.t_k_norm_gamma = TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, k_gamma_nelems, 0,
466
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, kn_idx))
467
+ end
468
+
469
+ # Q per-head — two layouts:
470
+ # 1) Fused (llama.cpp): single attn_q.weight tensor; each head
471
+ # is a contiguous slice at offset q_base + h * head_nbytes.
472
+ # 2) Per-head (toy from-scratch ckpt, #153): each head has its
473
+ # own attn_q.head_H.weight tensor with its own file offset.
474
+ if @per_head_attn
475
+ q0_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.head_0.weight")
476
+ q0_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, q0_idx)
477
+ blk.t_w_q = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
478
+ @d_head, @d_model, q0_type,
479
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, q0_idx))]
480
+ hq = 1
481
+ while hq < @n_heads
482
+ qh_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.head_" + hq.to_s + ".weight")
483
+ blk.t_w_q.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
484
+ @d_head, @d_model, q0_type,
485
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, qh_idx)))
486
+ hq = hq + 1
487
+ end
488
+ else
489
+ q_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.weight")
490
+ q_off_base = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, q_idx)
491
+ q_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, q_idx)
492
+ q_stride = head_nbytes(q_type, @d_head, @d_model)
493
+ blk.t_w_q = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
494
+ @d_head, @d_model, q_type, q_off_base)]
495
+ hq = 1
496
+ while hq < @n_heads
497
+ blk.t_w_q.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
498
+ @d_head, @d_model, q_type,
499
+ q_off_base + hq * q_stride))
500
+ hq = hq + 1
501
+ end
502
+ end
503
+
504
+ # F1.2: per-Q-head LoRA adapter slots. F32-only, allocated in
505
+ # ctx_w (trainable, not mmap'd). A: (r, d_model). B: (d_head, r).
506
+ # Standard init (A small Gaussian + B zero) makes the adapter
507
+ # equal to zero at step 0 → forward output matches the base
508
+ # model exactly. Caller seeds via upload_lora_q_init!(seed).
509
+ if @lora_q_enabled
510
+ blk.t_w_lora_a_q = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
511
+ @lora_q_rank, @d_model)]
512
+ blk.t_w_lora_b_q = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
513
+ @d_head, @lora_q_rank)]
514
+ hq = 1
515
+ while hq < @n_heads
516
+ blk.t_w_lora_a_q.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
517
+ @lora_q_rank, @d_model))
518
+ blk.t_w_lora_b_q.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
519
+ @d_head, @lora_q_rank))
520
+ hq = hq + 1
521
+ end
522
+
523
+ # F1.2 step 6b: persistent AdamW moments paired with the LoRA
524
+ # adapter tensors above. Same shapes. Live in ctx_w so they
525
+ # survive tnn_reset_for_rebuild across multi-position SFT.
526
+ if @lora_q_adamw_enabled
527
+ blk.t_w_lora_a_q_m = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
528
+ @lora_q_rank, @d_model)]
529
+ blk.t_w_lora_a_q_v = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
530
+ @lora_q_rank, @d_model)]
531
+ blk.t_w_lora_b_q_m = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
532
+ @d_head, @lora_q_rank)]
533
+ blk.t_w_lora_b_q_v = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
534
+ @d_head, @lora_q_rank)]
535
+ hqm = 1
536
+ while hqm < @n_heads
537
+ blk.t_w_lora_a_q_m.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
538
+ @lora_q_rank, @d_model))
539
+ blk.t_w_lora_a_q_v.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
540
+ @lora_q_rank, @d_model))
541
+ blk.t_w_lora_b_q_m.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
542
+ @d_head, @lora_q_rank))
543
+ blk.t_w_lora_b_q_v.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess,
544
+ @d_head, @lora_q_rank))
545
+ hqm = hqm + 1
546
+ end
547
+ end
548
+ end
549
+
550
+ # K, V per-kv-head — same dual-layout split (#153).
551
+ if @per_head_attn
552
+ k0_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.head_0.weight")
553
+ v0_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.head_0.weight")
554
+ k_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, k0_idx)
555
+ v_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, v0_idx)
556
+ blk.t_w_k = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
557
+ @d_head, @d_model, k_type,
558
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, k0_idx))]
559
+ blk.t_w_v = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
560
+ @d_head, @d_model, v_type,
561
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, v0_idx))]
562
+ k_stride = 0 # unused in per-head branch but referenced later
563
+ v_stride = 0
564
+ else
565
+ k_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.weight")
566
+ v_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.weight")
567
+ k_off_base = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, k_idx)
568
+ v_off_base = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, v_idx)
569
+ k_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, k_idx)
570
+ v_type = TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, v_idx)
571
+ k_stride = head_nbytes(k_type, @d_head, @d_model)
572
+ v_stride = head_nbytes(v_type, @d_head, @d_model)
573
+ blk.t_w_k = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
574
+ @d_head, @d_model, k_type, k_off_base)]
575
+ blk.t_w_v = [TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
576
+ @d_head, @d_model, v_type, v_off_base)]
577
+ end
578
+ # P5.1+P5.2: K and V allocs both follow @kv_type_*. Layout is
579
+ # `ne=[d_head, max_T]` for both — positions on ne1, d_head on
580
+ # ne0. Per-position writes span a contiguous d_head-vector
581
+ # which is Q8-block-aligned at d_head=64 (=2 blocks of 32).
582
+ # See the struct comment on :kv_type_k / :kv_type_v.
583
+ if @kv_type_k == 8
584
+ blk.t_K = [TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8)]
585
+ else
586
+ blk.t_K = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, @d_head)]
587
+ end
588
+ if @kv_type_v == 8
589
+ blk.t_V = [TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8)]
590
+ else
591
+ blk.t_V = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, @d_head)]
592
+ end
593
+ hkv = 1
594
+ while hkv < @n_kv
595
+ if @per_head_attn
596
+ kh_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.head_" + hkv.to_s + ".weight")
597
+ vh_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.head_" + hkv.to_s + ".weight")
598
+ blk.t_w_k.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
599
+ @d_head, @d_model, k_type,
600
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, kh_idx)))
601
+ blk.t_w_v.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
602
+ @d_head, @d_model, v_type,
603
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, vh_idx)))
604
+ else
605
+ blk.t_w_k.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
606
+ @d_head, @d_model, k_type,
607
+ k_off_base + hkv * k_stride))
608
+ blk.t_w_v.push(TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
609
+ @d_head, @d_model, v_type,
610
+ v_off_base + hkv * v_stride))
611
+ end
612
+ if @kv_type_k == 8
613
+ blk.t_K.push(TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8))
614
+ else
615
+ blk.t_K.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, @d_head))
616
+ end
617
+ if @kv_type_v == 8
618
+ blk.t_V.push(TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, @d_head, 8))
619
+ else
620
+ blk.t_V.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, @d_head))
621
+ end
622
+ hkv = hkv + 1
623
+ end
624
+
625
+ # Q/K/V biases — 1D F32 per head, contiguous in the file.
626
+ if qkv_bias
627
+ qb_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_q.bias")
628
+ kb_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_k.bias")
629
+ vb_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_v.bias")
630
+ qb_off = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, qb_idx)
631
+ kb_off = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, kb_idx)
632
+ vb_off = TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, vb_idx)
633
+ bias_stride = @d_head * 4 # f32
634
+
635
+ blk.t_b_q = [TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, qb_off)]
636
+ hq = 1
637
+ while hq < @n_heads
638
+ blk.t_b_q.push(TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
639
+ qb_off + hq * bias_stride))
640
+ hq = hq + 1
641
+ end
642
+
643
+ blk.t_b_k = [TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, kb_off)]
644
+ blk.t_b_v = [TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0, vb_off)]
645
+ hkv = 1
646
+ while hkv < @n_kv
647
+ blk.t_b_k.push(TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
648
+ kb_off + hkv * bias_stride))
649
+ blk.t_b_v.push(TinyNNCuda.tnn_input_1d_persistent_mmap(@sess, @d_head, 0,
650
+ vb_off + hkv * bias_stride))
651
+ hkv = hkv + 1
652
+ end
653
+ end
654
+
655
+ # O / FFN — full 2D weights, no per-head slicing.
656
+ o_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".attn_output.weight")
657
+ # M1.1: o_proj maps [n_heads * d_head] → [d_model]. For models
658
+ # where d_head = d_model / n_heads (SmolLM2 / Llama / Qwen2.5)
659
+ # these are equal; for Qwen3 with explicit head_dim=128 they
660
+ # differ (n_heads * d_head = 2048, d_model = 1024).
661
+ blk.t_w_o = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess, @d_model, @n_heads * @d_head,
662
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, o_idx),
663
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, o_idx))
664
+
665
+ if @is_moe
666
+ # M2.3: MoE FFN. Per-expert weight matrices are stacked along
667
+ # ne2 in the GGUF (llama.cpp convention):
668
+ # ffn_gate_inp.weight : ne=[d_model, n_experts]
669
+ # ffn_gate_exps.weight: ne=[d_model, d_ff, n_experts]
670
+ # ffn_up_exps.weight : ne=[d_model, d_ff, n_experts]
671
+ # ffn_down_exps.weight: ne=[d_ff, d_model, n_experts]
672
+ # All mmap'd in place — Mixtral-8x7B Q4_K_M (26 GB) loads without
673
+ # any RAM copy.
674
+ router_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate_inp.weight")
675
+ gate_exps_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate_exps.weight")
676
+ up_exps_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_up_exps.weight")
677
+ down_exps_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_down_exps.weight")
678
+ # #112 (RESOLVED): K-quant MoE experts work. The old warning here
679
+ # blamed ggml's mul_mat_id kernel for the OLMoE-Q4_K_M corruption,
680
+ # but the op was always correct for K-quants (verified by op-level
681
+ # and real-bytes reproducers in tinynn/ggml1506_*). The actual bug
682
+ # was head_nbytes() returning 0 for K-quant ATTENTION weights,
683
+ # collapsing every head onto head 0 — fixed there. K-quant expert
684
+ # stacks (gate/up/down, including OLMoE's mixed q4_K+q6_K down_exps)
685
+ # load and run coherently. See docs/notes/mul_mat_id_quants.md.
686
+ blk.t_w_router = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess,
687
+ @n_experts, @d_model,
688
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, router_idx),
689
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, router_idx))
690
+ blk.t_w_gate_exps = TinyNNCuda.tnn_input_3d_persistent_mmap(@sess,
691
+ @d_model, @d_ff, @n_experts,
692
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, gate_exps_idx),
693
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, gate_exps_idx))
694
+ blk.t_w_up_exps = TinyNNCuda.tnn_input_3d_persistent_mmap(@sess,
695
+ @d_model, @d_ff, @n_experts,
696
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, up_exps_idx),
697
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, up_exps_idx))
698
+ blk.t_w_down_exps = TinyNNCuda.tnn_input_3d_persistent_mmap(@sess,
699
+ @d_ff, @d_model, @n_experts,
700
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, down_exps_idx),
701
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, down_exps_idx))
702
+ else
703
+ gate_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_gate.weight")
704
+ up_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_up.weight")
705
+ down_idx = TinyNNCuda.tnn_gguf_find_index(gguf_handle, prefix + ".ffn_down.weight")
706
+ blk.t_w_gate = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess, @d_ff, @d_model,
707
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, gate_idx),
708
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, gate_idx))
709
+ blk.t_w_up = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess, @d_ff, @d_model,
710
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, up_idx),
711
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, up_idx))
712
+ blk.t_w_down = TinyNNCuda.tnn_input_2d_persistent_mmap(@sess, @d_model, @d_ff,
713
+ TinyNNCuda.tnn_gguf_tensor_type(gguf_handle, down_idx),
714
+ TinyNNCuda.tnn_gguf_tensor_file_offset(gguf_handle, down_idx))
715
+ end
716
+
717
+ li = li + 1
718
+ end
719
+
720
+ # F1.2: mark LoRA tensors as trainable BEFORE finalize_weights.
721
+ # set_param flips a flag on the tensor; the build_backward pass
722
+ # later walks PARAM-flagged nodes to emit grad nodes. Doing it
723
+ # here (rather than in the smoke) keeps the cache class as the
724
+ # single source of truth for what's trainable in a session.
725
+ if @lora_q_enabled
726
+ li2 = 0
727
+ while li2 < @n_layers
728
+ blk2 = @kv_blocks_ffi[li2]
729
+ hq = 0
730
+ while hq < @n_heads
731
+ TinyNNCuda.tnn_set_param(blk2.t_w_lora_a_q[hq])
732
+ TinyNNCuda.tnn_set_param(blk2.t_w_lora_b_q[hq])
733
+ hq = hq + 1
734
+ end
735
+ li2 = li2 + 1
736
+ end
737
+ end
738
+
739
+ # Finalize the regular persistent context (K/V cache buffers).
740
+ # Mmap'd tensors don't need finalization — they were allocated
741
+ # against weights_buf_mmap inline.
742
+ TinyNNCuda.tnn_finalize_weights(@sess)
743
+
744
+ # Upload llama3-style RoPE freq_factors once the backend buffer
745
+ # for @t_rope_freq_factors exists (post-finalize). The values are
746
+ # a per-model constant — never re-uploaded across rebuild cycles.
747
+ if @rope_scaling.kind == :llama3
748
+ ff = Toy::RopeScaling.compute_llama3_freq_factors(
749
+ @d_head, @rope_base,
750
+ @rope_scaling.orig_max_pos, @rope_scaling.factor,
751
+ @rope_scaling.low_freq_factor, @rope_scaling.high_freq_factor)
752
+ TinyNNCuda.tnn_upload_from_float_array(@sess, @t_rope_freq_factors,
753
+ ff, ff.length)
754
+ end
755
+
756
+ # F1.2 step 6b: zero-init persistent Adam moments. AdamW's update
757
+ # rule assumes m = v = 0 at step 0 (otherwise the first step picks
758
+ # up garbage from the buffer). The bias-correction term beta1h/beta2h
759
+ # then ramps in as the moments accumulate.
760
+ if @lora_q_adamw_enabled
761
+ za = Mat.new(@lora_q_rank, @d_model)
762
+ zb = Mat.new(@d_head, @lora_q_rank)
763
+ i = 0
764
+ while i < @lora_q_rank * @d_model; za.flat[i] = 0.0; i = i + 1; end
765
+ j = 0
766
+ while j < @d_head * @lora_q_rank; zb.flat[j] = 0.0; j = j + 1; end
767
+ li_z = 0
768
+ while li_z < @n_layers
769
+ blk_z = @kv_blocks_ffi[li_z]
770
+ hqz = 0
771
+ while hqz < @n_heads
772
+ TinyNNCuda.upload_row_major(@sess, blk_z.t_w_lora_a_q_m[hqz], za)
773
+ TinyNNCuda.upload_row_major(@sess, blk_z.t_w_lora_a_q_v[hqz], za)
774
+ TinyNNCuda.upload_row_major(@sess, blk_z.t_w_lora_b_q_m[hqz], zb)
775
+ TinyNNCuda.upload_row_major(@sess, blk_z.t_w_lora_b_q_v[hqz], zb)
776
+ hqz = hqz + 1
777
+ end
778
+ li_z = li_z + 1
779
+ end
780
+ end
781
+
782
+ # Zero-init K/V cache buffers (same as realize_for + legacy load).
783
+ # P5.1: skip K zero-init when K is Q8_0. upload_row_major writes
784
+ # F32 row-major bytes which would corrupt a Q8 tensor's quantization
785
+ # blocks. The K cache is read only at positions [0, pos+1], and
786
+ # every position is written before it's read, so unset trailing
787
+ # positions are never observed — zero-init is paranoia and safe
788
+ # to skip for Q8. P5.2 flipped V to mirror K's layout, so V's
789
+ # zero-init Mat now has the same shape as K's, and the same Q8
790
+ # skip rule applies.
791
+ kv_zero = Mat.new(max_T, @d_head)
792
+ li = 0
793
+ while li < @n_layers
794
+ blk_f = @kv_blocks_ffi[li]
795
+ hkv = 0
796
+ while hkv < @n_kv
797
+ if @kv_type_k != 8
798
+ TinyNNCuda.upload_row_major(@sess, blk_f.t_K[hkv], kv_zero)
799
+ end
800
+ if @kv_type_v != 8
801
+ TinyNNCuda.upload_row_major(@sess, blk_f.t_V[hkv], kv_zero)
802
+ end
803
+ hkv = hkv + 1
804
+ end
805
+ li = li + 1
806
+ end
807
+
808
+ @realized = true
809
+ end
810
+
811
+ # Auto-dispatch: open the GGUF, peek at its `toy.ggml_native` flag,
812
+ # and route to either the BYO-pointer mmap path (Phase 2) or the
813
+ # legacy realize_for + load_weights copy path. Returns the GGUF
814
+ # handle (or null for the legacy path); the kv_cache holds it via
815
+ # @gguf_handle_keepalive so the mmap stays valid for inference.
816
+ #
817
+ # Caller must have `require_relative "toy/models/toy_smollm2_loader"` at the
818
+ # top-level driver — this file deliberately does NOT require it
819
+ # (require-order with GGUFLoad's methods that touch `weight_type`
820
+ # was triggering a Spinel GC crash in decode_step).
821
+ # F1.2: standard LoRA init for the Q adapters. A = small Gaussian
822
+ # (scale = init_scale, default 0.01); B = zero. With B=0 the LoRA
823
+ # contribution is exactly zero, so forward output matches the base
824
+ # model bit-for-bit at step 0. Call AFTER realize_for_mmap.
825
+ def upload_lora_q_init!(seed, init_scale)
826
+ if !@lora_q_enabled; return; end
827
+ s = seed
828
+ m_a = Mat.new(@lora_q_rank, @d_model)
829
+ m_b = Mat.new(@d_head, @lora_q_rank)
830
+ z_b = m_b
831
+ i_b = 0
832
+ while i_b < @d_head * @lora_q_rank
833
+ z_b.flat[i_b] = 0.0
834
+ i_b = i_b + 1
835
+ end
836
+ li = 0
837
+ while li < @n_layers
838
+ blk = @kv_blocks_ffi[li]
839
+ hq = 0
840
+ while hq < @n_heads
841
+ # Per-(layer, head) Gaussian for A via Box-Muller on an LCG.
842
+ ii = 0
843
+ while ii < @lora_q_rank * @d_model
844
+ s = (s * 1103515245 + 12345) & 0x7FFFFFFF
845
+ u1 = (s.to_f + 1.0) / 2147483648.0
846
+ s = (s * 1103515245 + 12345) & 0x7FFFFFFF
847
+ u2 = (s.to_f + 1.0) / 2147483648.0
848
+ m_a.flat[ii] = init_scale * Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math::PI * u2)
849
+ ii = ii + 1
850
+ end
851
+ TinyNNCuda.upload_row_major(@sess, blk.t_w_lora_a_q[hq], m_a)
852
+ TinyNNCuda.upload_row_major(@sess, blk.t_w_lora_b_q[hq], z_b)
853
+ hq = hq + 1
854
+ end
855
+ li = li + 1
856
+ end
857
+ end
858
+
859
+ def realize_and_load_auto(gguf_path, max_T, cfg, flags)
860
+ gguf = TinyNNCuda.tnn_gguf_load(gguf_path)
861
+ is_native = TinyNNCuda.tnn_gguf_get_bool(gguf, "toy.ggml_native") == 1
862
+ if is_native
863
+ wtype = GGUFLoad.detect_weight_type(gguf_path)
864
+ set_weight_type(wtype)
865
+ realize_for_mmap(gguf, cfg, max_T, flags.untied, flags.qkv_bias)
866
+ puts " BYO-pointer mmap (weight_type=" + wtype.to_s + ")"
867
+ gguf
868
+ else
869
+ TinyNNCuda.tnn_gguf_free(gguf)
870
+ realize_for(max_T, cfg.d_model, cfg.d_ff,
871
+ cfg.n_heads, cfg.n_kv,
872
+ cfg.n_layers, cfg.vocab,
873
+ cfg.rope_base, cfg.rms_eps,
874
+ flags.untied, flags.qkv_bias)
875
+ load_weights(gguf_path)
876
+ puts " legacy copy load"
877
+ TinyNNCuda.tnn_null_ptr
878
+ end
879
+ end
880
+
881
+ # Per-head byte stride for slicing a full [n_heads*d_head, d_model]
882
+ # tensor into n_heads contiguous Dh×D blocks. A per-head slice is
883
+ # d_head rows of d_model elements, so the stride is d_head row-sizes.
884
+ #
885
+ # tnn_row_size delegates to ggml_row_size, which is correct for EVERY
886
+ # type — F32, Q8_0, and the K-quants (Q4_K/Q5_K/Q6_K). The previous
887
+ # hand-coded F32/Q8_0-only branches returned 0 for any other type,
888
+ # which silently made the per-head offset `off_base + hq*0 == off_base`
889
+ # — i.e. every attention head read head 0's weight slice. That
890
+ # collapsed multi-head attention on K-quant MoE models (forced down the
891
+ # realize_for_mmap path), compounding across layers into degenerate
892
+ # output. This was misdiagnosed as a ggml mul_mat_id K-quant bug
893
+ # (ggml#1506); it was ours. Block alignment holds because each row is a
894
+ # whole number of quant blocks (requires d_model % block == 0, which
895
+ # the per-head tnn_input_2d_persistent_mmap also enforces via ne0).
896
+ def head_nbytes(ggml_type, d_head, d_model)
897
+ rs = TinyNNCuda.tnn_row_size(ggml_type, d_model)
898
+ if rs <= 0
899
+ # Fail loud per the never-mask rule: a 0 stride would collapse all
900
+ # heads. tnn_row_size only returns 0 on a bad type/shape.
901
+ puts "FATAL: head_nbytes got row_size<=0 for ggml_type=" +
902
+ ggml_type.to_s + " d_model=" + d_model.to_s +
903
+ " — per-head attention stride would collapse. Aborting."
904
+ exit 1
905
+ end
906
+ d_head * rs
907
+ end
908
+
909
+ def enable_trace!; end
910
+ def trace_tap(_name, t); t; end
911
+ def dump_trace; end
912
+
913
+ # Ruby-OO entry point for "load weights into this realized cache."
914
+ # Auto-detects layout: GGUFs with the `toy.ggml_native` metadata key
915
+ # take the memcpy path (no transpose); legacy GGUFs take the
916
+ # transposing path. Callers stay layout-agnostic.
917
+ def load_weights(path)
918
+ GGUFLoad.load_kv_cache_auto(self, path)
919
+ end
920
+
921
+ # Pull any persistent FFI tensor back to a Ruby Mat (chunked download,
922
+ # works for weight-sized tensors). Required by the design rule that
923
+ # the direct-loader path must keep Mat-roundtrip open — see
924
+ # docs/loader-api.md.
925
+ #
926
+ # `t` is any tensor handle exposed on this cache or its blocks
927
+ # (e.g. `kv.t_token_embed`, `kv.kv_blocks_ffi[3].t_w_o`). `rows` and
928
+ # `cols` are the logical shape; we trust the caller.
929
+ def read_persistent_mat(t, rows, cols)
930
+ TinyNNCuda.download_to_mat(@sess, t, rows, cols)
931
+ end
932
+
933
+ # Declare every persistent tensor (weights + K/V buffers) and finalize.
934
+ # `untied` is true for TinyLlama-shape models that have a separate
935
+ # `output.weight` (lm_head); false for SmolLM2 / Qwen2.5 with tied
936
+ # embeddings. When false we skip the (vocab × d_model) t_output
937
+ # allocation entirely. `qkv_bias` is true for Qwen2.x; when false the
938
+ # b_q/b_k/b_v tensors aren't allocated and Q/K/V matmuls land
939
+ # without an add.
940
+ def realize_for(max_T, d_model, d_ff, n_heads, n_kv, n_layers,
941
+ vocab_size, rope_base, rms_eps, untied, qkv_bias)
942
+ @max_T = max_T
943
+ @d_model = d_model
944
+ @d_ff = d_ff
945
+ @n_heads = n_heads
946
+ @n_kv = n_kv
947
+ @d_head = d_model / n_heads
948
+ @group_size = n_heads / n_kv
949
+ @n_layers = n_layers
950
+ @vocab_size = vocab_size
951
+ @rope_base = rope_base
952
+ @rms_eps = rms_eps
953
+
954
+ @sess = TinyNNCuda.tnn_session_new(1)
955
+ @t_token_embed = TinyNNCuda.tnn_input_2d_f32_persistent(@sess, vocab_size, d_model)
956
+ @t_final_norm_gamma = TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_model)
957
+ @has_untied_output = untied
958
+ @has_qkv_bias = qkv_bias
959
+ if untied
960
+ @t_output = alloc_2d_w(vocab_size, d_model)
961
+ end
962
+
963
+ @kv_blocks_ffi = [SmolLM2KVBlockFFICuda.new]
964
+ li = 1
965
+ while li < n_layers
966
+ @kv_blocks_ffi.push(SmolLM2KVBlockFFICuda.new)
967
+ li = li + 1
968
+ end
969
+
970
+ li = 0
971
+ while li < n_layers
972
+ blk = @kv_blocks_ffi[li]
973
+ blk.t_rn1_gamma = TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_model)
974
+ blk.t_rn2_gamma = TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_model)
975
+
976
+ # Q: n_heads per-head matrices of (d_head, d_model). Quantizable.
977
+ blk.t_w_q = [alloc_2d_w(d_head, d_model)]
978
+ if qkv_bias
979
+ blk.t_b_q = [TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head)]
980
+ end
981
+ hq = 1
982
+ while hq < n_heads
983
+ blk.t_w_q.push(alloc_2d_w(d_head, d_model))
984
+ if qkv_bias
985
+ blk.t_b_q.push(TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head))
986
+ end
987
+ hq = hq + 1
988
+ end
989
+
990
+ # K, V (and the persistent K/V buffers): n_kv per-head. Linear
991
+ # weights quantizable; K/V cache buffers follow @kv_type_*
992
+ # (P5.1 K, P5.2 V); biases stay F32.
993
+ blk.t_w_k = [alloc_2d_w(d_head, d_model)]
994
+ blk.t_w_v = [alloc_2d_w(d_head, d_model)]
995
+ # P5.1: Q8 K alloc when enabled (see realize_for_mmap parallel path).
996
+ if @kv_type_k == 8
997
+ blk.t_K = [TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8)]
998
+ else
999
+ blk.t_K = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, d_head)]
1000
+ end
1001
+ # P5.2: V now mirrors K's layout (ne=[d_head, max_T]).
1002
+ if @kv_type_v == 8
1003
+ blk.t_V = [TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8)]
1004
+ else
1005
+ blk.t_V = [TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, d_head)]
1006
+ end
1007
+ if qkv_bias
1008
+ # K bias: 1-D (broadcasts over [d_head, 1] k matmul result).
1009
+ # V bias: 1-D too (the V matmul is now ordered weight-first, so
1010
+ # its result is [d_head, 1] like K — matches a 1-D bias).
1011
+ blk.t_b_k = [TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head)]
1012
+ blk.t_b_v = [TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head)]
1013
+ end
1014
+ hkv = 1
1015
+ while hkv < n_kv
1016
+ blk.t_w_k.push(alloc_2d_w(d_head, d_model))
1017
+ blk.t_w_v.push(alloc_2d_w(d_head, d_model))
1018
+ if @kv_type_k == 8
1019
+ blk.t_K.push(TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8))
1020
+ else
1021
+ blk.t_K.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, d_head))
1022
+ end
1023
+ if @kv_type_v == 8
1024
+ blk.t_V.push(TinyNNCuda.tnn_input_2d_persistent_typed(@sess, max_T, d_head, 8))
1025
+ else
1026
+ blk.t_V.push(TinyNNCuda.tnn_input_2d_f32_persistent(@sess, max_T, d_head))
1027
+ end
1028
+ if qkv_bias
1029
+ blk.t_b_k.push(TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head))
1030
+ blk.t_b_v.push(TinyNNCuda.tnn_input_1d_f32_persistent(@sess, d_head))
1031
+ end
1032
+ hkv = hkv + 1
1033
+ end
1034
+
1035
+ blk.t_w_o = alloc_2d_w(d_model, @n_heads * @d_head)
1036
+ blk.t_w_gate = alloc_2d_w(d_ff, d_model)
1037
+ blk.t_w_up = alloc_2d_w(d_ff, d_model)
1038
+ blk.t_w_down = alloc_2d_w(d_model, d_ff)
1039
+ li = li + 1
1040
+ end
1041
+
1042
+ TinyNNCuda.tnn_finalize_weights(@sess)
1043
+ @realized = true
1044
+ end
1045
+
1046
+ # Build the compute graph for one decode position.
1047
+ def build_decode_step(pos)
1048
+ eps = @rms_eps
1049
+ scale = 1.0 / Math.sqrt(@d_head.to_f)
1050
+ d_model = @d_model
1051
+ d_head = @d_head
1052
+ max_T = @max_T
1053
+ bytes_d_head = d_head * 4
1054
+ bytes_max_T = max_T * 4
1055
+ # P5.1+P5.2: row size for K and V. F32 → d_head*4; Q8_0 →
1056
+ # ggml_row_size(Q8_0, d_head) (block 32 × 34 bytes; 68 at d_head=64).
1057
+ # V is in the same layout as K post-P5.2 so the math is symmetric.
1058
+ bytes_d_head_k = @kv_type_k == 8 ? TinyNNCuda.tnn_row_size(8, d_head) : bytes_d_head
1059
+ bytes_d_head_v = @kv_type_v == 8 ? TinyNNCuda.tnn_row_size(8, d_head) : bytes_d_head
1060
+
1061
+ # Inputs: token id + RoPE position. Both length 1.
1062
+ t_token_id = TinyNNCuda.tnn_input_1d_i32(@sess, 1)
1063
+ t_pos = TinyNNCuda.tnn_input_1d_i32_ctx(@sess, 1)
1064
+
1065
+ t_x = TinyNNCuda.tnn_get_rows(@sess, @t_token_embed, t_token_id) # ne=[d_model, 1]
1066
+ # I-Gemma (#113): Gemma 2 scales token embeddings by sqrt(d_model)
1067
+ # post-lookup. Non-Gemma archs use @embed_scale = 1.0 (no-op
1068
+ # branch). The scalar is computed at flag-detection time so we
1069
+ # don't pay a Math.sqrt landmine in the hot path.
1070
+ if @embed_scale != 1.0
1071
+ t_x = TinyNNCuda.tnn_scale(@sess, t_x, @embed_scale)
1072
+ end
1073
+ t_x = trace_tap("embed", t_x)
1074
+
1075
+ li = 0
1076
+ while li < @n_layers
1077
+ t_x = build_block_step(t_x, @kv_blocks_ffi[li], t_pos, pos,
1078
+ scale, eps, bytes_d_head, bytes_d_head_k,
1079
+ bytes_d_head_v, bytes_max_T, li)
1080
+ li = li + 1
1081
+ end
1082
+
1083
+ t_x_final = TinyNNCuda.tnn_rms_norm(@sess, t_x, @t_final_norm_gamma, eps)
1084
+ t_x_final = trace_tap("final_norm", t_x_final)
1085
+ # Logits: untied path matmuls against t_output (lm_head); tied
1086
+ # path against t_token_embed. Both tensors are [vocab, d_model],
1087
+ # so the matmul shape is identical either way.
1088
+ if @has_untied_output
1089
+ t_kv_logits = TinyNNCuda.tnn_matmul(@sess, @t_output, t_x_final)
1090
+ else
1091
+ t_kv_logits = TinyNNCuda.tnn_matmul(@sess, @t_token_embed, t_x_final)
1092
+ end
1093
+ # I-Gemma (#113): final logit soft-cap. Gemma 2 applies
1094
+ # tanh(logits / final_softcap) * final_softcap to the output
1095
+ # logits before argmax / sampling. No-op for other models.
1096
+ if @final_softcap > 0.0
1097
+ t_kv_logits = TinyNNCuda.tnn_scale(@sess, t_kv_logits, 1.0 / @final_softcap)
1098
+ t_kv_logits = TinyNNCuda.tnn_tanh(@sess, t_kv_logits)
1099
+ t_kv_logits = TinyNNCuda.tnn_scale(@sess, t_kv_logits, @final_softcap)
1100
+ end
1101
+ TinyNNCuda.tnn_set_output(t_kv_logits)
1102
+ SmolLM2KVStepResultCuda.new(t_token_id, t_pos, t_kv_logits)
1103
+ end
1104
+
1105
+ def build_block_step(t_x, blk, t_pos, pos, scale, eps,
1106
+ bytes_d_head, bytes_d_head_k, bytes_d_head_v,
1107
+ bytes_max_T, layer_idx)
1108
+ # Layer-tag prefix for tap names (e.g. "L00."). String concat of an
1109
+ # int needs explicit .to_s; ljust pads so all names align in output.
1110
+ tag = "L" + layer_idx.to_s + "."
1111
+
1112
+ t_h = TinyNNCuda.tnn_rms_norm(@sess, t_x, blk.t_rn1_gamma, eps)
1113
+ t_h = trace_tap(tag + "rn1", t_h)
1114
+
1115
+ # --- compute K, V for each KV head (n_kv times), rope K, cpy into buffers ---
1116
+ hkv = 0
1117
+ while hkv < @n_kv
1118
+ t_k_raw = TinyNNCuda.tnn_matmul(@sess, blk.t_w_k[hkv], t_h) # ne=[d_head, 1]
1119
+ if @has_qkv_bias
1120
+ t_k_pre = TinyNNCuda.tnn_add(@sess, t_k_raw, blk.t_b_k[hkv])
1121
+ else
1122
+ t_k_pre = t_k_raw
1123
+ end
1124
+ # Tap K (head 0 only) post-bias, pre-RoPE.
1125
+ if hkv == 0
1126
+ t_k_pre = trace_tap(tag + "k_pre", t_k_pre)
1127
+ end
1128
+ # M1 + #110: QK-norm. Two flavors:
1129
+ # kind=1 (Qwen3): blk.t_k_norm_gamma is [d_head], shared
1130
+ # across all KV heads; pass directly.
1131
+ # kind=2 (OLMoE / Granite, per-head approximation):
1132
+ # blk.t_k_norm_gamma is [n_kv * d_head] = [d_model_kv];
1133
+ # view the per-head [d_head] slice at byte offset
1134
+ # hkv*d_head*4. This computes per-head variance (not the
1135
+ # true full-Q-vector variance) but applies the correct
1136
+ # per-element gamma scaling. Cheap and close-enough for
1137
+ # models where per-head magnitudes are similar (which they
1138
+ # typically are for projections of a single input).
1139
+ if @has_qk_norm
1140
+ if @qk_norm_kind == 2
1141
+ k_gamma_view = TinyNNCuda.tnn_view_1d(@sess, blk.t_k_norm_gamma,
1142
+ @d_head, hkv * @d_head * 4)
1143
+ t_k_pre = TinyNNCuda.tnn_rms_norm(@sess, t_k_pre, k_gamma_view, @rms_eps)
1144
+ else
1145
+ t_k_pre = TinyNNCuda.tnn_rms_norm(@sess, t_k_pre, blk.t_k_norm_gamma, @rms_eps)
1146
+ end
1147
+ end
1148
+ t_k_rot = TinyNNCuda.tnn_rope_ext(@sess, t_k_pre, t_pos, @d_head,
1149
+ @rope_base, @rope_scaling.freq_scale,
1150
+ @rope_scaling.ext_factor,
1151
+ @rope_scaling.attn_factor,
1152
+ @rope_scaling.beta_fast,
1153
+ @rope_scaling.beta_slow,
1154
+ @t_rope_freq_factors)
1155
+ if hkv == 0
1156
+ t_k_rot = trace_tap(tag + "k_rot", t_k_rot)
1157
+ end
1158
+ # V matmul: weight in A position so ggml's matmul kernel can
1159
+ # dispatch to Q8 (and other quantized) kernels. Result is
1160
+ # [d_head, 1] instead of the legacy [1, d_head]; a contiguous
1161
+ # view_2d before the cpy reinterprets it as a [1, d_head] row
1162
+ # without moving bytes.
1163
+ t_v_raw = TinyNNCuda.tnn_matmul(@sess, blk.t_w_v[hkv], t_h) # ne=[d_head, 1]
1164
+ if @has_qkv_bias
1165
+ t_v_new = TinyNNCuda.tnn_add(@sess, t_v_raw, blk.t_b_v[hkv]) # bias is 1-D [d_head]
1166
+ else
1167
+ t_v_new = t_v_raw
1168
+ end
1169
+ if hkv == 0
1170
+ t_v_new = trace_tap(tag + "v_new", t_v_new)
1171
+ end
1172
+
1173
+ # P5.1+P5.2: K and V both use the same per-position write pattern.
1174
+ # bytes_d_head_{k,v} reflect each cache's dtype (F32 → d_head*4,
1175
+ # Q8_0 → type-aware row size from tnn_row_size). cpy quantizes
1176
+ # f32 source → Q8 destination automatically when types differ.
1177
+ t_K_slot = TinyNNCuda.tnn_view_2d(@sess, blk.t_K[hkv],
1178
+ @d_head, 1, bytes_d_head_k, pos * bytes_d_head_k)
1179
+ t_cpy_k = TinyNNCuda.tnn_cpy(@sess, t_k_rot, t_K_slot)
1180
+ t_V_slot = TinyNNCuda.tnn_view_2d(@sess, blk.t_V[hkv],
1181
+ @d_head, 1, bytes_d_head_v, pos * bytes_d_head_v)
1182
+ t_cpy_v = TinyNNCuda.tnn_cpy(@sess, t_v_new, t_V_slot)
1183
+ TinyNNCuda.tnn_add_to_graph(@sess, t_cpy_k)
1184
+ TinyNNCuda.tnn_add_to_graph(@sess, t_cpy_v)
1185
+ hkv = hkv + 1
1186
+ end
1187
+
1188
+ # --- per-Q-head attention ---
1189
+ t_head_out0 = build_attention_qhead_step(t_h, blk, 0, t_pos, pos,
1190
+ scale, bytes_d_head, bytes_d_head_k,
1191
+ bytes_d_head_v, bytes_max_T, tag, true,
1192
+ layer_idx)
1193
+ t_head_outs = [t_head_out0]
1194
+ hq = 1
1195
+ while hq < @n_heads
1196
+ t_head_outs.push(build_attention_qhead_step(t_h, blk, hq, t_pos, pos,
1197
+ scale, bytes_d_head, bytes_d_head_k,
1198
+ bytes_d_head_v, bytes_max_T, tag, false,
1199
+ layer_idx))
1200
+ hq = hq + 1
1201
+ end
1202
+
1203
+ t_concat = t_head_outs[0]
1204
+ hq = 1
1205
+ while hq < @n_heads
1206
+ t_concat = TinyNNCuda.tnn_concat(@sess, t_concat, t_head_outs[hq], 0)
1207
+ hq = hq + 1
1208
+ end
1209
+ t_concat = trace_tap(tag + "concat", t_concat)
1210
+
1211
+ t_out_proj = TinyNNCuda.tnn_matmul(@sess, blk.t_w_o, t_concat)
1212
+ t_out_proj = trace_tap(tag + "attn_out", t_out_proj)
1213
+ # I-Gemma (#113): post-attention RMSNorm applied to the attention
1214
+ # output BEFORE the residual add. Gemma 2's sandwich structure:
1215
+ # pre_norm(x) → attention → post_norm → residual + …
1216
+ # No-op when has_post_norms is false (every non-Gemma arch).
1217
+ if @has_post_norms
1218
+ t_out_proj = TinyNNCuda.tnn_rms_norm(@sess, t_out_proj, blk.t_post_attn_norm_gamma, eps)
1219
+ t_out_proj = trace_tap(tag + "post_attn_norm", t_out_proj)
1220
+ end
1221
+ t_x_attn = TinyNNCuda.tnn_add(@sess, t_x, t_out_proj)
1222
+ t_x_attn = trace_tap(tag + "post_attn", t_x_attn)
1223
+
1224
+ # --- FFN ---
1225
+ t_h2 = TinyNNCuda.tnn_rms_norm(@sess, t_x_attn, blk.t_rn2_gamma, eps)
1226
+ t_h2 = trace_tap(tag + "rn2", t_h2)
1227
+
1228
+ if @is_moe
1229
+ t_dn = build_moe_ffn(blk, t_h2, tag)
1230
+ else
1231
+ # --- SwiGLU FFN (dense) ---
1232
+ t_gate = TinyNNCuda.tnn_matmul(@sess, blk.t_w_gate, t_h2) # ne=[d_ff, 1]
1233
+ t_gate = trace_tap(tag + "gate", t_gate)
1234
+ t_up = TinyNNCuda.tnn_matmul(@sess, blk.t_w_up, t_h2) # ne=[d_ff, 1]
1235
+ t_up = trace_tap(tag + "up", t_up)
1236
+ t_silug = TinyNNCuda.tnn_silu(@sess, t_gate)
1237
+ t_silug = trace_tap(tag + "silu_gate", t_silug)
1238
+ t_gated = TinyNNCuda.tnn_mul(@sess, t_silug, t_up)
1239
+ t_gated = trace_tap(tag + "gated", t_gated)
1240
+ t_dn = TinyNNCuda.tnn_matmul(@sess, blk.t_w_down, t_gated) # ne=[d_model, 1]
1241
+ t_dn = trace_tap(tag + "dn", t_dn)
1242
+ end
1243
+
1244
+ # I-Gemma (#113): post-FFN RMSNorm on the FFN output before the
1245
+ # residual add. Same pattern as the post-attn norm above.
1246
+ if @has_post_norms
1247
+ t_dn = TinyNNCuda.tnn_rms_norm(@sess, t_dn, blk.t_post_ffn_norm_gamma, eps)
1248
+ t_dn = trace_tap(tag + "post_ffn_norm", t_dn)
1249
+ end
1250
+ t_post_ffn = TinyNNCuda.tnn_add(@sess, t_x_attn, t_dn)
1251
+ trace_tap(tag + "post_ffn", t_post_ffn)
1252
+ end
1253
+
1254
+ # M2.3: Mixtral / Qwen-MoE routed FFN. Ports the validated graph from
1255
+ # tinynn/ab_smoke_moe_ffn into the production decode path. Shapes:
1256
+ # t_h2 [d_model, 1] input (post-norm)
1257
+ # router_logits [n_experts, 1] matmul(w_router, h2)
1258
+ # probs [n_experts, 1] softmax(logits)
1259
+ # top_idx [n_experts_used, 1] top_k(probs)
1260
+ # weights [1, n_experts_used, 1] get_rows(reshape_3d(probs,1,n_exp,1), top_idx)
1261
+ # e_gate / e_up [d_ff, n_experts_used, 1] mul_mat_id(...exps, h2, top_idx)
1262
+ # e_down [d_model, n_experts_used, 1] after weight × sum
1263
+ #
1264
+ # The (mul/transpose/sum_rows/reshape) sum-across-K is the same trick
1265
+ # the smoke uses; ggml has no axis-1 reduce primitive.
1266
+ def build_moe_ffn(blk, t_h2, tag)
1267
+ t_logits = TinyNNCuda.tnn_matmul(@sess, blk.t_w_router, t_h2) # ne=[n_exp, 1]
1268
+ t_logits = trace_tap(tag + "moe_logits", t_logits)
1269
+ t_probs = TinyNNCuda.tnn_softmax(@sess, t_logits) # ne=[n_exp, 1]
1270
+ t_top_idx = TinyNNCuda.tnn_top_k(@sess, t_probs, @n_experts_used) # ne=[K, 1]
1271
+ t_probs_3d = TinyNNCuda.tnn_reshape_3d(@sess, t_probs, 1, @n_experts, 1)
1272
+ t_w_route = TinyNNCuda.tnn_get_rows(@sess, t_probs_3d, t_top_idx) # ne=[1, K, 1]
1273
+
1274
+ t_e_gate = TinyNNCuda.tnn_mul_mat_id(@sess, blk.t_w_gate_exps, t_h2, t_top_idx)
1275
+ t_e_up = TinyNNCuda.tnn_mul_mat_id(@sess, blk.t_w_up_exps, t_h2, t_top_idx)
1276
+ t_e_silu = TinyNNCuda.tnn_silu(@sess, t_e_gate)
1277
+ t_e_gated = TinyNNCuda.tnn_mul(@sess, t_e_silu, t_e_up) # ne=[d_ff, K, 1]
1278
+ t_e_down = TinyNNCuda.tnn_mul_mat_id(@sess, blk.t_w_down_exps, t_e_gated, t_top_idx)
1279
+ t_e_down = trace_tap(tag + "moe_e_down", t_e_down) # ne=[d_model, K, 1]
1280
+
1281
+ # Broadcast weights over d_model: [d_model, K, 1] × [1, K, 1] → [d_model, K, 1].
1282
+ t_weighted = TinyNNCuda.tnn_mul(@sess, t_e_down, t_w_route)
1283
+
1284
+ # Sum across K (axis 1). Reshape to 2D (T=1 collapses), transpose
1285
+ # [d_model, K] → [K, d_model], sum_rows along ne0=K → [1, d_model],
1286
+ # reshape back to [d_model, 1].
1287
+ t_weighted_2d = TinyNNCuda.tnn_reshape_2d(@sess, t_weighted, @d_model, @n_experts_used)
1288
+ t_weighted_T = TinyNNCuda.tnn_transpose(@sess, t_weighted_2d)
1289
+ t_summed_T = TinyNNCuda.tnn_sum_rows(@sess, t_weighted_T) # ne=[1, d_model]
1290
+ t_dn = TinyNNCuda.tnn_reshape_2d(@sess, t_summed_T, @d_model, 1)
1291
+ trace_tap(tag + "moe_out", t_dn)
1292
+ end
1293
+
1294
+ # One query head. Uses the (already-written) K and V of the
1295
+ # corresponding KV head — index = hq / group_size. `tag` is the
1296
+ # "L<i>." layer prefix; `tap_this_head` is true only for head 0 so we
1297
+ # don't multiply taps by n_heads in trace mode.
1298
+ def build_attention_qhead_step(t_h, blk, hq, t_pos, pos, scale,
1299
+ bytes_d_head, bytes_d_head_k, bytes_d_head_v,
1300
+ bytes_max_T, tag, tap_this_head,
1301
+ layer_idx)
1302
+ hkv = hq / @group_size
1303
+
1304
+ # I-Gemma (#113): per-layer SWA toggle. Gemma 2 alternates layers
1305
+ # between full attention and sliding-window. When @swa_alternates
1306
+ # is true, only EVEN layers see the SWA window; odd layers get
1307
+ # effectively full attention (window = 0 ⇒ hist_count = pos+1).
1308
+ # Non-Gemma archs: @swa_alternates is false; all layers apply
1309
+ # @swa_window uniformly (or 0 for no-SWA models).
1310
+ swa_for_this_layer = @swa_window
1311
+ if @swa_alternates && layer_idx.odd?
1312
+ swa_for_this_layer = 0
1313
+ end
1314
+
1315
+ t_q_raw = TinyNNCuda.tnn_matmul(@sess, blk.t_w_q[hq], t_h) # ne=[d_head, 1]
1316
+ # F1.2: optional LoRA on Q. Standard placement is BEFORE the bias
1317
+ # add (HF LoRA practice — the bias stays a property of the base
1318
+ # projection, LoRA only adjusts the linear part). Math:
1319
+ # q_lora = w_lora_b[hq] @ (w_lora_a[hq] @ t_h)
1320
+ # q_raw := q_raw + q_lora
1321
+ # With B init to zero, q_lora == 0 and q_raw is unchanged.
1322
+ if @lora_q_enabled
1323
+ t_lora_a_h = TinyNNCuda.tnn_matmul(@sess, blk.t_w_lora_a_q[hq], t_h) # ne=[r, 1]
1324
+ t_lora_b_a_h = TinyNNCuda.tnn_matmul(@sess, blk.t_w_lora_b_q[hq], t_lora_a_h)# ne=[d_head, 1]
1325
+ t_q_raw = TinyNNCuda.tnn_add(@sess, t_q_raw, t_lora_b_a_h)
1326
+ end
1327
+ if @has_qkv_bias
1328
+ t_q_pre = TinyNNCuda.tnn_add(@sess, t_q_raw, blk.t_b_q[hq])
1329
+ else
1330
+ t_q_pre = t_q_raw
1331
+ end
1332
+ if tap_this_head
1333
+ t_q_pre = trace_tap(tag + "q_pre", t_q_pre)
1334
+ end
1335
+ if @has_qk_norm
1336
+ if @qk_norm_kind == 2
1337
+ # OLMoE / Granite per-head gamma slice (see build_block_step's
1338
+ # K-norm comment). The gamma tensor is [d_model]; head hq's
1339
+ # slice lives at byte offset hq*d_head*4.
1340
+ q_gamma_view = TinyNNCuda.tnn_view_1d(@sess, blk.t_q_norm_gamma,
1341
+ @d_head, hq * @d_head * 4)
1342
+ t_q_pre = TinyNNCuda.tnn_rms_norm(@sess, t_q_pre, q_gamma_view, @rms_eps)
1343
+ else
1344
+ t_q_pre = TinyNNCuda.tnn_rms_norm(@sess, t_q_pre, blk.t_q_norm_gamma, @rms_eps)
1345
+ end
1346
+ end
1347
+ t_q = TinyNNCuda.tnn_rope_ext(@sess, t_q_pre, t_pos, @d_head,
1348
+ @rope_base, @rope_scaling.freq_scale,
1349
+ @rope_scaling.ext_factor,
1350
+ @rope_scaling.attn_factor,
1351
+ @rope_scaling.beta_fast,
1352
+ @rope_scaling.beta_slow,
1353
+ @t_rope_freq_factors)
1354
+ if tap_this_head
1355
+ t_q = trace_tap(tag + "q_rot", t_q)
1356
+ end
1357
+
1358
+ # M3 + I-Gemma: sliding-window attention. When swa_for_this_layer
1359
+ # > 0, restrict the K/V view to the last `min(pos+1, swa_window)`
1360
+ # positions. swa_for_this_layer differs from @swa_window only
1361
+ # when @swa_alternates is set (Gemma 2's even/odd layer pattern).
1362
+ if swa_for_this_layer > 0 && (pos + 1) > swa_for_this_layer
1363
+ hist_start = pos + 1 - swa_for_this_layer
1364
+ hist_count = swa_for_this_layer
1365
+ else
1366
+ hist_start = 0
1367
+ hist_count = pos + 1
1368
+ end
1369
+ # P5.1+P5.2: K and V views share the same byte-stride math.
1370
+ # ggml_mul_mat dequantizes Q8 source on the fly when reads happen.
1371
+ t_K_hist = TinyNNCuda.tnn_view_2d(@sess, blk.t_K[hkv],
1372
+ @d_head, hist_count, bytes_d_head_k,
1373
+ hist_start * bytes_d_head_k)
1374
+ # P5.2: V is now ne=[d_head, max_T] (positions on ne1, mirror of K).
1375
+ # The history view at [d_head, hist_count] is what flash_attn_ext
1376
+ # expects natively — no transpose-cont in the flash path now.
1377
+ t_V_hist = TinyNNCuda.tnn_view_2d(@sess, blk.t_V[hkv],
1378
+ @d_head, hist_count, bytes_d_head_v,
1379
+ hist_start * bytes_d_head_v)
1380
+
1381
+ if @use_flash_attn
1382
+ # P4.1+P5.2: fused softmax(Q·Kᵀ·scale + mask)·V via
1383
+ # ggml_flash_attn_ext. Reshape Q/K/V to the 3D shapes
1384
+ # flash_attn_ext expects (ne[3] defaults to 1 so we don't need
1385
+ # a fourth dim). V's layout is already correct post-P5.2 — no
1386
+ # transpose needed.
1387
+ t_q_3d = TinyNNCuda.tnn_reshape_3d(@sess, t_q, @d_head, 1, 1)
1388
+ t_K_3d = TinyNNCuda.tnn_reshape_3d(@sess, t_K_hist, @d_head, hist_count, 1)
1389
+ t_V_3d = TinyNNCuda.tnn_reshape_3d(@sess, t_V_hist, @d_head, hist_count, 1)
1390
+ # I-Gemma (#113): pass logit soft-cap to flash_attn_ext. The
1391
+ # kernel applies tanh(x/softcap)*softcap to attention logits
1392
+ # internally. 0.0 disables (every non-Gemma model).
1393
+ t_out_4d = TinyNNCuda.tnn_flash_attn_ext(@sess, t_q_3d, t_K_3d, t_V_3d, nil,
1394
+ scale, 0.0, @attn_softcap)
1395
+ # Output ne=[d_head, n_head=1, T_q=1, batch=1]; collapse to 2D.
1396
+ t_head = TinyNNCuda.tnn_reshape_2d(@sess, t_out_4d, @d_head, 1)
1397
+ if tap_this_head
1398
+ t_head = trace_tap(tag + "head0_flash", t_head)
1399
+ end
1400
+ return t_head
1401
+ end
1402
+
1403
+ t_scores = TinyNNCuda.tnn_matmul(@sess, t_K_hist, t_q)
1404
+ if tap_this_head
1405
+ t_scores = trace_tap(tag + "scores", t_scores)
1406
+ end
1407
+ t_scaled = TinyNNCuda.tnn_scale(@sess, t_scores, scale)
1408
+ # I-Gemma (#113): logit soft-cap in the non-flash path.
1409
+ # y = softcap * tanh(x / softcap)
1410
+ # Composed via two scales + tanh. No-op when @attn_softcap == 0.
1411
+ if @attn_softcap > 0.0
1412
+ t_scaled = TinyNNCuda.tnn_scale(@sess, t_scaled, 1.0 / @attn_softcap)
1413
+ t_scaled = TinyNNCuda.tnn_tanh(@sess, t_scaled)
1414
+ t_scaled = TinyNNCuda.tnn_scale(@sess, t_scaled, @attn_softcap)
1415
+ end
1416
+ t_attn = TinyNNCuda.tnn_softmax(@sess, t_scaled)
1417
+ if tap_this_head
1418
+ t_attn = trace_tap(tag + "softmax", t_attn)
1419
+ end
1420
+ # P5.2: V is now [d_head, hist_count]; ggml_mul_mat needs the
1421
+ # matching k axis (hist_count) on both inputs, so transpose V_hist
1422
+ # (free view; tnn_transpose materializes via ggml_cont — one copy
1423
+ # of d_head × hist_count × 4 bytes per Q-head per layer). Cheap
1424
+ # at decode (typical hist_count ~ a few hundred) and uniform with
1425
+ # how flash takes V — both paths see the same V layout now.
1426
+ t_V_T = TinyNNCuda.tnn_transpose(@sess, t_V_hist)
1427
+ t_head = TinyNNCuda.tnn_matmul(@sess, t_V_T, t_attn)
1428
+ if tap_this_head
1429
+ t_head = trace_tap(tag + "head0", t_head)
1430
+ end
1431
+ t_head
1432
+ end
1433
+ end
1434
+
1435
+ # Init-param names deliberately differ from the ivar names — same
1436
+ # defensive pattern as GPT2KVStepResult.
1437
+ class SmolLM2KVStepResultCuda
1438
+ attr_accessor :t_token_id, :t_pos, :kv_step_logits
1439
+ def initialize(tok_ptr, pos_ptr, logits_ptr)
1440
+ @t_token_id = tok_ptr
1441
+ @t_pos = pos_ptr
1442
+ @kv_step_logits = logits_ptr
1443
+ end
1444
+ end
1445
+
1446
+ module SmolLM2KVCuda
1447
+ # Upload all Toy::SmolLM2 weights into a realized cache (+ zero-init
1448
+ # the K/V buffers).
1449
+ def self.upload_from(kv_cache, model)
1450
+ sess = kv_cache.sess
1451
+ n = kv_cache.n_layers
1452
+ n_heads = kv_cache.n_heads
1453
+ n_kv = kv_cache.n_kv
1454
+ d_model = kv_cache.d_model
1455
+ d_head = kv_cache.d_head
1456
+ max_T = kv_cache.max_T
1457
+
1458
+ TinyNNCuda.upload_row_major(sess, kv_cache.t_token_embed, model.token_embed.weight)
1459
+ TinyNNCuda.tnn_upload_from_float_array(sess, kv_cache.t_final_norm_gamma,
1460
+ model.final_norm.gamma, d_model)
1461
+ if kv_cache.has_untied_output
1462
+ TinyNNCuda.upload_row_major(sess, kv_cache.t_output, model.output_proj)
1463
+ end
1464
+
1465
+ # P5.2: K and V share the same layout ne=[d_head, max_T] now,
1466
+ # so they share the same zero-init Mat.
1467
+ kv_zero = Mat.new(max_T, d_head)
1468
+
1469
+ li = 0
1470
+ while li < n
1471
+ blk_n = model.stack[li]
1472
+ blk_f = kv_cache.kv_blocks_ffi[li]
1473
+
1474
+ TinyNNCuda.tnn_upload_from_float_array(sess, blk_f.t_rn1_gamma, blk_n.rn1.gamma, d_model)
1475
+ TinyNNCuda.tnn_upload_from_float_array(sess, blk_f.t_rn2_gamma, blk_n.rn2.gamma, d_model)
1476
+
1477
+ hq = 0
1478
+ while hq < n_heads
1479
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_q[hq], blk_n.attn.w_q[hq])
1480
+ if kv_cache.has_qkv_bias
1481
+ TinyNNCuda.tnn_upload_from_float_array(sess, blk_f.t_b_q[hq], blk_n.attn.b_q[hq], d_head)
1482
+ end
1483
+ hq = hq + 1
1484
+ end
1485
+
1486
+ hkv = 0
1487
+ while hkv < n_kv
1488
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_k[hkv], blk_n.attn.w_k[hkv])
1489
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_v[hkv], blk_n.attn.w_v[hkv])
1490
+ if kv_cache.has_qkv_bias
1491
+ TinyNNCuda.tnn_upload_from_float_array(sess, blk_f.t_b_k[hkv], blk_n.attn.b_k[hkv], d_head)
1492
+ TinyNNCuda.tnn_upload_from_float_array(sess, blk_f.t_b_v[hkv], blk_n.attn.b_v[hkv], d_head)
1493
+ end
1494
+ # P5.1+P5.2: same Q8 skip rule as realize_for_mmap.
1495
+ if kv_cache.kv_type_k != 8
1496
+ TinyNNCuda.upload_row_major(sess, blk_f.t_K[hkv], kv_zero)
1497
+ end
1498
+ if kv_cache.kv_type_v != 8
1499
+ TinyNNCuda.upload_row_major(sess, blk_f.t_V[hkv], kv_zero)
1500
+ end
1501
+ hkv = hkv + 1
1502
+ end
1503
+
1504
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_o, blk_n.attn.w_o)
1505
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_gate, blk_n.ffn.w_gate)
1506
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_up, blk_n.ffn.w_up)
1507
+ TinyNNCuda.stage_transposed_and_upload(sess, blk_f.t_w_down, blk_n.ffn.w_down)
1508
+
1509
+ li = li + 1
1510
+ end
1511
+ end
1512
+
1513
+ # Decode one new token at position `pos`. Returns the (1, vocab)
1514
+ # logits Mat for the new position. If `kv_cache.trace_on` is set the
1515
+ # rebuild path inserts taps and we dump stats before reading logits.
1516
+ def self.decode_step(kv_cache, token_id, pos)
1517
+ TinyNNCuda.tnn_reset_for_rebuild(kv_cache.sess)
1518
+ step = kv_cache.build_decode_step(pos)
1519
+ TinyNNCuda.tnn_realize(kv_cache.sess, step.kv_step_logits)
1520
+ TinyNNCuda.upload_int_array(kv_cache.sess, step.t_token_id, [token_id])
1521
+ TinyNNCuda.upload_int_array(kv_cache.sess, step.t_pos, [pos])
1522
+ TinyNNCuda.tnn_compute(kv_cache.sess)
1523
+ kv_cache.dump_trace
1524
+ TinyNNCuda.download_row_major(kv_cache.sess, step.kv_step_logits, 1, kv_cache.vocab_size)
1525
+ end
1526
+ end