toy 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2107) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +1124 -0
  3. data/LICENSE +21 -0
  4. data/Makefile +2022 -0
  5. data/README.md +154 -0
  6. data/bin/toy +10 -0
  7. data/lib/toy/compute.rb +135 -0
  8. data/lib/toy/compute_cuda.rb +104 -0
  9. data/lib/toy/compute_metal.rb +97 -0
  10. data/lib/toy/core/cli/describe.rb +188 -0
  11. data/lib/toy/core/cli/eval.rb +385 -0
  12. data/lib/toy/core/cli/exit_codes.rb +15 -0
  13. data/lib/toy/core/cli/fetch.rb +238 -0
  14. data/lib/toy/core/cli/infer.rb +268 -0
  15. data/lib/toy/core/cli/install.rb +228 -0
  16. data/lib/toy/core/cli/list.rb +86 -0
  17. data/lib/toy/core/cli/manifest.rb +49 -0
  18. data/lib/toy/core/cli/new.rb +594 -0
  19. data/lib/toy/core/cli/serve.rb +237 -0
  20. data/lib/toy/core/cli/train.rb +471 -0
  21. data/lib/toy/core/cli.rb +165 -0
  22. data/lib/toy/core/config.rb +64 -0
  23. data/lib/toy/core/gguf_meta.rb +161 -0
  24. data/lib/toy/core/model_scan.rb +221 -0
  25. data/lib/toy/core/run_log.rb +94 -0
  26. data/lib/toy/core/toy_root.rb +95 -0
  27. data/lib/toy/dev/toy_card.rb +299 -0
  28. data/lib/toy/dev/toy_describe_flow.rb +412 -0
  29. data/lib/toy/dev/toy_logprobs.rb +86 -0
  30. data/lib/toy/dev/toy_tap.rb +183 -0
  31. data/lib/toy/dev/toy_token_drift.rb +121 -0
  32. data/lib/toy/ffi/tinynn.rb +1491 -0
  33. data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
  34. data/lib/toy/ffi/tinynn_metal.rb +359 -0
  35. data/lib/toy/ffi_manifest.rb +84 -0
  36. data/lib/toy/io/bpe.rb +325 -0
  37. data/lib/toy/io/gguf_kv.rb +35 -0
  38. data/lib/toy/io/gguf_load.rb +331 -0
  39. data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
  40. data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
  41. data/lib/toy/io/model_index.rb +206 -0
  42. data/lib/toy/io/run_bundle.rb +280 -0
  43. data/lib/toy/io/tokenizer.rb +613 -0
  44. data/lib/toy/io/toy_corpus_loader.rb +52 -0
  45. data/lib/toy/io/toy_events.rb +56 -0
  46. data/lib/toy/io/toy_image_loader.rb +48 -0
  47. data/lib/toy/llm/adamw.rb +169 -0
  48. data/lib/toy/llm/archs/llama_arch.rb +233 -0
  49. data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
  50. data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
  51. data/lib/toy/llm/blocks/transformer_block.rb +876 -0
  52. data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
  53. data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
  54. data/lib/toy/llm/classify_batch.rb +88 -0
  55. data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
  56. data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
  57. data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
  58. data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
  59. data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
  60. data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
  61. data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
  62. data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
  63. data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
  64. data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
  65. data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
  66. data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
  67. data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
  68. data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
  69. data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
  70. data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
  71. data/lib/toy/llm/labels.rb +142 -0
  72. data/lib/toy/llm/primitives/gqa.rb +62 -0
  73. data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
  74. data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
  75. data/lib/toy/llm/primitives/rms_norm.rb +39 -0
  76. data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
  77. data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
  78. data/lib/toy/llm/primitives/rope.rb +68 -0
  79. data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
  80. data/lib/toy/llm/primitives/rope_metal.rb +72 -0
  81. data/lib/toy/llm/primitives/swiglu.rb +41 -0
  82. data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
  83. data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
  84. data/lib/toy/llm/recipe_options.rb +71 -0
  85. data/lib/toy/llm/recipes/from_scratch.rb +105 -0
  86. data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
  87. data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
  88. data/lib/toy/llm/recipes/lora.rb +110 -0
  89. data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
  90. data/lib/toy/llm/recipes/lora_metal.rb +114 -0
  91. data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
  92. data/lib/toy/llm/recipes/warm_start.rb +235 -0
  93. data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
  94. data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
  95. data/lib/toy/llm/training_batch.rb +133 -0
  96. data/lib/toy/models/arch.rb +253 -0
  97. data/lib/toy/models/gpt2.rb +311 -0
  98. data/lib/toy/models/toy_gpt2.rb +177 -0
  99. data/lib/toy/models/toy_smollm2.rb +393 -0
  100. data/lib/toy/models/toy_vit.rb +83 -0
  101. data/lib/toy/models/transformer.rb +1494 -0
  102. data/lib/toy/models/transformer_lm.rb +298 -0
  103. data/lib/toy/models/transformer_lm_cuda.rb +159 -0
  104. data/lib/toy/models/transformer_lm_metal.rb +142 -0
  105. data/lib/toy/mri.rb +300 -0
  106. data/lib/toy/run/eval.rb +76 -0
  107. data/lib/toy/run/eval_cuda.rb +66 -0
  108. data/lib/toy/run/eval_lmc.rb +334 -0
  109. data/lib/toy/run/eval_metal.rb +67 -0
  110. data/lib/toy/run/infer.rb +130 -0
  111. data/lib/toy/run/infer_cuda.rb +118 -0
  112. data/lib/toy/run/infer_metal.rb +119 -0
  113. data/lib/toy/run/infer_trace.rb +37 -0
  114. data/lib/toy/run/serve.rb +144 -0
  115. data/lib/toy/run/train.rb +404 -0
  116. data/lib/toy/run/train_cuda.rb +397 -0
  117. data/lib/toy/run/train_gpt2.rb +103 -0
  118. data/lib/toy/run/train_gpt2_cuda.rb +85 -0
  119. data/lib/toy/run/train_gpt2_metal.rb +85 -0
  120. data/lib/toy/run/train_lora.rb +207 -0
  121. data/lib/toy/run/train_lora_cuda.rb +219 -0
  122. data/lib/toy/run/train_metal.rb +227 -0
  123. data/lib/toy/run/train_vit.rb +251 -0
  124. data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
  125. data/lib/toy/serve/openai/handlers.rb +143 -0
  126. data/lib/toy/serve/openai/server.rb +159 -0
  127. data/lib/toy/train/sampler.rb +314 -0
  128. data/lib/toy/train/toy_chat_template.rb +179 -0
  129. data/lib/toy/train/toy_drift_grad.rb +176 -0
  130. data/lib/toy/train/toy_gguf_fuse.rb +428 -0
  131. data/lib/toy/train/toy_gguf_writer.rb +100 -0
  132. data/lib/toy/train/toy_lr_schedule.rb +39 -0
  133. data/lib/toy/train/toy_sample.rb +125 -0
  134. data/lib/toy/train/toy_trainer.rb +86 -0
  135. data/lib/toy/train/training.rb +160 -0
  136. data/lib/toy/version.rb +11 -0
  137. data/lib/toy.rb +902 -0
  138. data/prep/progress +118 -0
  139. data/prep/quietly +64 -0
  140. data/sig/toy.rbs +397 -0
  141. data/sig/toy_compute.rbs +450 -0
  142. data/spinel-ext.json +122 -0
  143. data/tinynn/Makefile +71 -0
  144. data/tinynn/tinynn_backend_cuda.c +99 -0
  145. data/tinynn/tinynn_backend_metal.m +75 -0
  146. data/tinynn/tinynn_events.c +122 -0
  147. data/tinynn/tinynn_events.h +83 -0
  148. data/tinynn/tinynn_ggml.c +2460 -0
  149. data/tinynn/tinynn_ggml.h +545 -0
  150. data/tinynn/tinynn_gguf.c +783 -0
  151. data/tinynn/tinynn_gguf.h +167 -0
  152. data/tinynn/tinynn_trace.c +180 -0
  153. data/tinynn/tinynn_trace.h +85 -0
  154. data/vendor/ggml/AUTHORS +335 -0
  155. data/vendor/ggml/CMakeLists.txt +505 -0
  156. data/vendor/ggml/CONTRIBUTING.md +3 -0
  157. data/vendor/ggml/LICENSE +21 -0
  158. data/vendor/ggml/README.md +50 -0
  159. data/vendor/ggml/ci/run.sh +395 -0
  160. data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
  161. data/vendor/ggml/cmake/GitVars.cmake +22 -0
  162. data/vendor/ggml/cmake/common.cmake +50 -0
  163. data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
  164. data/vendor/ggml/docs/gguf.md +828 -0
  165. data/vendor/ggml/examples/CMakeLists.txt +34 -0
  166. data/vendor/ggml/examples/common-ggml.cpp +244 -0
  167. data/vendor/ggml/examples/common-ggml.h +18 -0
  168. data/vendor/ggml/examples/common.cpp +675 -0
  169. data/vendor/ggml/examples/common.h +322 -0
  170. data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
  171. data/vendor/ggml/examples/gpt-2/README.md +225 -0
  172. data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
  173. data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
  174. data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
  175. data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
  176. data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
  177. data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
  178. data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
  179. data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
  180. data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
  181. data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
  182. data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
  183. data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
  184. data/vendor/ggml/examples/gpt-j/README.md +239 -0
  185. data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
  186. data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
  187. data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
  188. data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
  189. data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
  190. data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
  191. data/vendor/ggml/examples/magika/README.md +23 -0
  192. data/vendor/ggml/examples/magika/convert.py +32 -0
  193. data/vendor/ggml/examples/magika/main.cpp +374 -0
  194. data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
  195. data/vendor/ggml/examples/mnist/README.md +206 -0
  196. data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
  197. data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
  198. data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
  199. data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
  200. data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
  201. data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
  202. data/vendor/ggml/examples/mnist/server.py +36 -0
  203. data/vendor/ggml/examples/mnist/web/index.html +178 -0
  204. data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
  205. data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
  206. data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
  207. data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
  208. data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
  209. data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
  210. data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
  211. data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
  212. data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
  213. data/vendor/ggml/examples/prompts/replit.txt +100 -0
  214. data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
  215. data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
  216. data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
  217. data/vendor/ggml/examples/prompts/whisper.txt +100 -0
  218. data/vendor/ggml/examples/python/README.md +115 -0
  219. data/vendor/ggml/examples/python/api.h +14 -0
  220. data/vendor/ggml/examples/python/example_add_quant.py +25 -0
  221. data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
  222. data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
  223. data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
  224. data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
  225. data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
  226. data/vendor/ggml/examples/python/ggml/utils.py +182 -0
  227. data/vendor/ggml/examples/python/regenerate.py +42 -0
  228. data/vendor/ggml/examples/python/stubs.py +128 -0
  229. data/vendor/ggml/examples/python/test_tensor.py +258 -0
  230. data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
  231. data/vendor/ggml/examples/sam/README.md +95 -0
  232. data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
  233. data/vendor/ggml/examples/sam/example.jpg +0 -0
  234. data/vendor/ggml/examples/sam/sam.cpp +2370 -0
  235. data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
  236. data/vendor/ggml/examples/simple/README.md +61 -0
  237. data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
  238. data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
  239. data/vendor/ggml/examples/stb_image.h +7987 -0
  240. data/vendor/ggml/examples/stb_image_write.h +1724 -0
  241. data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
  242. data/vendor/ggml/examples/test-cmake/README.md +3 -0
  243. data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
  244. data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
  245. data/vendor/ggml/examples/yolo/README.md +59 -0
  246. data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
  247. data/vendor/ggml/examples/yolo/data/coco.names +80 -0
  248. data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
  249. data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
  250. data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
  251. data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
  252. data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
  253. data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
  254. data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
  255. data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
  256. data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
  257. data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
  258. data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
  259. data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
  260. data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
  261. data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
  262. data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
  263. data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
  264. data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
  265. data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
  266. data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
  267. data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
  268. data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
  269. data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
  270. data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
  271. data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
  272. data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
  273. data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
  274. data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
  275. data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
  276. data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
  277. data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
  278. data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
  279. data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
  280. data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
  281. data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
  282. data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
  283. data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
  284. data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
  285. data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
  286. data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
  287. data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
  288. data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
  289. data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
  290. data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
  291. data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
  292. data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
  293. data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
  294. data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
  295. data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
  296. data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
  297. data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
  298. data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
  299. data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
  300. data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
  301. data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
  302. data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
  303. data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
  304. data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
  305. data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
  306. data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
  307. data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
  308. data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
  309. data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
  310. data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
  311. data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
  312. data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
  313. data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
  314. data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
  315. data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
  316. data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
  317. data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
  318. data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
  319. data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
  320. data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
  321. data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
  322. data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
  323. data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
  324. data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
  325. data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
  326. data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
  327. data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
  328. data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
  329. data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
  330. data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
  331. data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
  332. data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
  333. data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
  334. data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
  335. data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
  336. data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
  337. data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
  338. data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
  339. data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
  340. data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
  341. data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
  342. data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
  343. data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
  344. data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
  345. data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
  346. data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
  347. data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
  348. data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
  349. data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
  350. data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
  351. data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
  352. data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
  353. data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
  354. data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
  355. data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
  356. data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
  357. data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
  358. data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
  359. data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
  360. data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
  361. data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
  362. data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
  363. data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
  364. data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
  365. data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
  366. data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
  367. data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
  368. data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
  369. data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
  370. data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
  371. data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
  372. data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
  373. data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
  374. data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
  375. data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
  376. data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
  377. data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
  378. data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
  379. data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
  380. data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
  381. data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
  382. data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
  383. data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
  384. data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
  385. data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
  386. data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
  387. data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
  388. data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
  389. data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
  390. data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
  391. data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
  392. data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
  393. data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
  394. data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
  395. data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
  396. data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
  397. data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
  398. data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
  399. data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
  400. data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
  401. data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
  402. data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
  403. data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
  404. data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
  405. data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
  406. data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
  407. data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
  408. data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
  409. data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
  410. data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
  411. data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
  412. data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
  413. data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
  414. data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
  415. data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
  416. data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
  417. data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
  418. data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
  419. data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
  420. data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
  421. data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
  422. data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
  423. data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
  424. data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
  425. data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
  426. data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
  427. data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
  428. data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
  429. data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
  430. data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
  431. data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
  432. data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
  433. data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
  434. data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
  435. data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
  436. data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
  437. data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
  438. data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
  439. data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
  440. data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
  441. data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
  442. data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
  443. data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
  444. data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
  445. data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
  446. data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
  447. data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
  448. data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
  449. data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
  450. data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
  451. data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
  452. data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
  453. data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
  454. data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
  455. data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
  456. data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
  457. data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
  458. data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
  459. data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
  460. data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
  461. data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
  462. data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
  463. data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
  464. data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
  465. data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
  466. data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
  467. data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
  468. data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
  469. data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
  470. data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
  471. data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
  472. data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
  473. data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
  474. data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
  475. data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
  476. data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
  477. data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
  478. data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
  479. data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
  480. data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
  481. data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
  482. data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
  483. data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
  484. data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
  485. data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
  486. data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
  487. data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
  488. data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
  489. data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
  490. data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
  491. data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
  492. data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
  493. data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
  494. data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
  495. data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
  496. data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
  497. data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
  498. data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
  499. data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
  500. data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
  501. data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
  502. data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
  503. data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
  504. data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
  505. data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
  506. data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
  507. data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
  508. data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
  509. data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
  510. data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
  511. data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
  512. data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
  513. data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
  514. data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
  515. data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
  516. data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
  517. data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
  518. data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
  519. data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
  520. data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
  521. data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
  522. data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
  523. data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
  524. data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
  525. data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
  526. data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
  527. data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
  528. data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
  529. data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
  530. data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
  531. data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
  532. data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
  533. data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
  534. data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
  535. data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
  536. data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
  537. data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
  538. data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
  539. data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
  540. data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
  541. data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
  542. data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
  543. data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
  544. data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
  545. data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
  546. data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
  547. data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
  548. data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
  549. data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
  550. data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
  551. data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
  552. data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
  553. data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
  554. data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
  555. data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
  556. data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
  557. data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
  558. data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
  559. data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
  560. data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
  561. data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
  562. data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
  563. data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
  564. data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
  565. data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
  566. data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
  567. data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
  568. data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
  569. data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
  570. data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
  571. data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
  572. data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
  573. data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
  574. data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
  575. data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
  576. data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
  577. data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
  578. data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
  579. data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
  580. data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
  581. data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
  582. data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
  583. data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
  584. data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
  585. data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
  586. data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
  587. data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
  588. data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
  589. data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
  590. data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
  591. data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
  592. data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
  593. data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
  594. data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
  595. data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
  596. data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
  597. data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
  598. data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
  599. data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
  600. data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
  601. data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
  602. data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
  603. data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
  604. data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
  605. data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
  606. data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
  607. data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
  608. data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
  609. data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
  610. data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
  611. data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
  612. data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
  613. data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
  614. data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
  615. data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
  616. data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
  617. data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
  618. data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
  619. data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
  620. data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
  621. data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
  622. data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
  623. data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
  624. data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
  625. data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
  626. data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
  627. data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
  628. data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
  629. data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
  630. data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
  631. data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
  632. data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
  633. data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
  634. data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
  635. data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
  636. data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
  637. data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
  638. data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
  639. data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
  640. data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
  641. data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
  642. data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
  643. data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
  644. data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
  645. data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
  646. data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
  647. data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
  648. data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
  649. data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
  650. data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
  651. data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
  652. data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
  653. data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
  654. data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
  655. data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
  656. data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
  657. data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
  658. data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
  659. data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
  660. data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
  661. data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
  662. data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
  663. data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
  664. data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
  665. data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
  666. data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
  667. data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
  668. data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
  669. data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
  670. data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
  671. data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
  672. data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
  673. data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
  674. data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
  675. data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
  676. data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
  677. data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
  678. data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
  679. data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
  680. data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
  681. data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
  682. data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
  683. data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
  684. data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
  685. data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
  686. data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
  687. data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
  688. data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
  689. data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
  690. data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
  691. data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
  692. data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
  693. data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
  694. data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
  695. data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
  696. data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
  697. data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
  698. data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
  699. data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
  700. data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
  701. data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
  702. data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
  703. data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
  704. data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
  705. data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
  706. data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
  707. data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
  708. data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
  709. data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
  710. data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
  711. data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
  712. data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
  713. data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
  714. data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
  715. data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
  716. data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
  717. data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
  718. data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
  719. data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
  720. data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
  721. data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
  722. data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
  723. data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
  724. data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
  725. data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
  726. data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
  727. data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
  728. data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
  729. data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
  730. data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
  731. data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
  732. data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
  733. data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
  734. data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
  735. data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
  736. data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
  737. data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
  738. data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
  739. data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
  740. data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
  741. data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
  742. data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
  743. data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
  744. data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
  745. data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
  746. data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
  747. data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
  748. data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
  749. data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
  750. data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
  751. data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
  752. data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
  753. data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
  754. data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
  755. data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
  756. data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
  757. data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
  758. data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
  759. data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
  760. data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
  761. data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
  762. data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
  763. data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
  764. data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
  765. data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
  766. data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
  767. data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
  768. data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
  769. data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
  770. data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
  771. data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
  772. data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
  773. data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
  774. data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
  775. data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
  776. data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
  777. data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
  778. data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
  779. data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
  780. data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
  781. data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
  782. data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
  783. data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
  784. data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
  785. data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
  786. data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
  787. data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
  788. data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
  789. data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
  790. data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
  791. data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
  792. data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
  793. data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
  794. data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
  795. data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
  796. data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
  797. data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
  798. data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
  799. data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
  800. data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
  801. data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
  802. data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
  803. data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
  804. data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
  805. data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
  806. data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
  807. data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
  808. data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
  809. data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
  810. data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
  811. data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
  812. data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
  813. data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
  814. data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
  815. data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
  816. data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
  817. data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
  818. data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
  819. data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
  820. data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
  821. data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
  822. data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
  823. data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
  824. data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
  825. data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
  826. data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
  827. data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
  828. data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
  829. data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
  830. data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
  831. data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
  832. data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
  833. data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
  834. data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
  835. data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
  836. data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
  837. data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
  838. data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
  839. data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
  840. data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
  841. data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
  842. data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
  843. data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
  844. data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
  845. data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
  846. data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
  847. data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
  848. data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
  849. data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
  850. data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
  851. data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
  852. data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
  853. data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
  854. data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
  855. data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
  856. data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
  857. data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
  858. data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
  859. data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
  860. data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
  861. data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
  862. data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
  863. data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
  864. data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
  865. data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
  866. data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
  867. data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
  868. data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
  869. data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
  870. data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
  871. data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
  872. data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
  873. data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
  874. data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
  875. data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
  876. data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
  877. data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
  878. data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
  879. data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
  880. data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
  881. data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
  882. data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
  883. data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
  884. data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
  885. data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
  886. data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
  887. data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
  888. data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
  889. data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
  890. data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
  891. data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
  892. data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
  893. data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
  894. data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
  895. data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
  896. data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
  897. data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
  898. data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
  899. data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
  900. data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
  901. data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
  902. data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
  903. data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
  904. data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
  905. data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
  906. data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
  907. data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
  908. data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
  909. data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
  910. data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
  911. data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
  912. data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
  913. data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
  914. data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
  915. data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
  916. data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
  917. data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
  918. data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
  919. data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
  920. data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
  921. data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
  922. data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
  923. data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
  924. data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
  925. data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
  926. data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
  927. data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
  928. data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
  929. data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
  930. data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
  931. data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
  932. data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
  933. data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
  934. data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
  935. data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
  936. data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
  937. data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
  938. data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
  939. data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
  940. data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
  941. data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
  942. data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
  943. data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
  944. data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
  945. data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
  946. data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
  947. data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
  948. data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
  949. data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
  950. data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
  951. data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
  952. data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
  953. data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
  954. data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
  955. data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
  956. data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
  957. data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
  958. data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
  959. data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
  960. data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
  961. data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
  962. data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
  963. data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
  964. data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
  965. data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
  966. data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
  967. data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
  968. data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
  969. data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
  970. data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
  971. data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
  972. data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
  973. data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
  974. data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
  975. data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
  976. data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
  977. data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
  978. data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
  979. data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
  980. data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
  981. data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
  982. data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
  983. data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
  984. data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
  985. data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
  986. data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
  987. data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
  988. data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
  989. data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
  990. data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
  991. data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
  992. data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
  993. data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
  994. data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
  995. data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
  996. data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
  997. data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
  998. data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
  999. data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
  1000. data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
  1001. data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
  1002. data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
  1003. data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
  1004. data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
  1005. data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
  1006. data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
  1007. data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
  1008. data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
  1009. data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
  1010. data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
  1011. data/vendor/ggml/ggml.pc.in +10 -0
  1012. data/vendor/ggml/include/ggml-alloc.h +85 -0
  1013. data/vendor/ggml/include/ggml-backend.h +431 -0
  1014. data/vendor/ggml/include/ggml-blas.h +25 -0
  1015. data/vendor/ggml/include/ggml-cann.h +123 -0
  1016. data/vendor/ggml/include/ggml-cpp.h +39 -0
  1017. data/vendor/ggml/include/ggml-cpu.h +151 -0
  1018. data/vendor/ggml/include/ggml-cuda.h +50 -0
  1019. data/vendor/ggml/include/ggml-hexagon.h +19 -0
  1020. data/vendor/ggml/include/ggml-metal.h +61 -0
  1021. data/vendor/ggml/include/ggml-opencl.h +26 -0
  1022. data/vendor/ggml/include/ggml-openvino.h +37 -0
  1023. data/vendor/ggml/include/ggml-opt.h +256 -0
  1024. data/vendor/ggml/include/ggml-rpc.h +35 -0
  1025. data/vendor/ggml/include/ggml-sycl.h +49 -0
  1026. data/vendor/ggml/include/ggml-virtgpu.h +14 -0
  1027. data/vendor/ggml/include/ggml-vulkan.h +29 -0
  1028. data/vendor/ggml/include/ggml-webgpu.h +19 -0
  1029. data/vendor/ggml/include/ggml-zdnn.h +17 -0
  1030. data/vendor/ggml/include/ggml-zendnn.h +22 -0
  1031. data/vendor/ggml/include/ggml.h +2845 -0
  1032. data/vendor/ggml/include/gguf.h +204 -0
  1033. data/vendor/ggml/requirements.txt +12 -0
  1034. data/vendor/ggml/scripts/gen-authors.sh +9 -0
  1035. data/vendor/ggml/scripts/release.sh +296 -0
  1036. data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
  1037. data/vendor/ggml/scripts/sync-llama.last +1 -0
  1038. data/vendor/ggml/scripts/sync-llama.sh +21 -0
  1039. data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
  1040. data/vendor/ggml/scripts/sync-whisper.last +1 -0
  1041. data/vendor/ggml/scripts/sync-whisper.sh +17 -0
  1042. data/vendor/ggml/src/CMakeLists.txt +493 -0
  1043. data/vendor/ggml/src/ggml-alloc.c +1248 -0
  1044. data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
  1045. data/vendor/ggml/src/ggml-backend-dl.h +45 -0
  1046. data/vendor/ggml/src/ggml-backend-impl.h +275 -0
  1047. data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
  1048. data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
  1049. data/vendor/ggml/src/ggml-backend.cpp +2371 -0
  1050. data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  1051. data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
  1052. data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
  1053. data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
  1054. data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
  1055. data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
  1056. data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
  1057. data/vendor/ggml/src/ggml-cann/common.h +651 -0
  1058. data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
  1059. data/vendor/ggml/src/ggml-common.h +1900 -0
  1060. data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
  1061. data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
  1062. data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
  1063. data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
  1064. data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  1065. data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  1066. data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  1067. data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
  1068. data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
  1069. data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
  1070. data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  1071. data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
  1072. data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  1073. data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
  1074. data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
  1075. data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  1076. data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
  1077. data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
  1078. data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  1079. data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
  1080. data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
  1081. data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
  1082. data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  1083. data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
  1084. data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  1085. data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
  1086. data/vendor/ggml/src/ggml-cpu/common.h +95 -0
  1087. data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
  1088. data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
  1089. data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
  1090. data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
  1091. data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
  1092. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
  1093. data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  1094. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
  1095. data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  1096. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
  1097. data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  1098. data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
  1099. data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
  1100. data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
  1101. data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
  1102. data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
  1103. data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
  1104. data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
  1105. data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
  1106. data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
  1107. data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
  1108. data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
  1109. data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
  1110. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
  1111. data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
  1112. data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
  1113. data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
  1114. data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
  1115. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
  1116. data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
  1117. data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
  1118. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
  1119. data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
  1120. data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
  1121. data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
  1122. data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
  1123. data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  1124. data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
  1125. data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
  1126. data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
  1127. data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
  1128. data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
  1129. data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
  1130. data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
  1131. data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
  1132. data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
  1133. data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
  1134. data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
  1135. data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
  1136. data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
  1137. data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
  1138. data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
  1139. data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
  1140. data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
  1141. data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
  1142. data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
  1143. data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
  1144. data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
  1145. data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
  1146. data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
  1147. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
  1148. data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  1149. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  1150. data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  1151. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
  1152. data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
  1153. data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
  1154. data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  1155. data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
  1156. data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
  1157. data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
  1158. data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  1159. data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  1160. data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  1161. data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
  1162. data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
  1163. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
  1164. data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  1165. data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
  1166. data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  1167. data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
  1168. data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
  1169. data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
  1170. data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
  1171. data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  1172. data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
  1173. data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
  1174. data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
  1175. data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
  1176. data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
  1177. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
  1178. data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
  1179. data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
  1180. data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
  1181. data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
  1182. data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
  1183. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
  1184. data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  1185. data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
  1186. data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
  1187. data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
  1188. data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
  1189. data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
  1190. data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
  1191. data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
  1192. data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
  1193. data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
  1194. data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
  1195. data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
  1196. data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
  1197. data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
  1198. data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
  1199. data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
  1200. data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
  1201. data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
  1202. data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
  1203. data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
  1204. data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
  1205. data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
  1206. data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
  1207. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  1208. data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  1209. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  1210. data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  1211. data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
  1212. data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  1213. data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
  1214. data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
  1215. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  1216. data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  1217. data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
  1218. data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  1219. data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
  1220. data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
  1221. data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
  1222. data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
  1223. data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
  1224. data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
  1225. data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
  1226. data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
  1227. data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
  1228. data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
  1229. data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  1230. data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
  1231. data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
  1232. data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
  1233. data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
  1234. data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
  1235. data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
  1236. data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
  1237. data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
  1238. data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  1239. data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  1240. data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
  1241. data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  1242. data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
  1243. data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  1244. data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
  1245. data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
  1246. data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
  1247. data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  1248. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
  1249. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
  1250. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
  1251. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  1252. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  1253. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
  1254. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
  1255. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
  1256. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
  1257. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
  1258. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  1259. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  1260. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
  1261. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  1262. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
  1263. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
  1264. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  1265. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  1266. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  1267. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
  1268. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
  1269. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  1270. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  1271. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
  1272. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  1273. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
  1274. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  1275. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
  1276. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  1277. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  1278. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  1279. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  1280. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  1281. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
  1282. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
  1283. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
  1284. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
  1285. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
  1286. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
  1287. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
  1288. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
  1289. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  1290. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  1291. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  1292. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  1293. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  1294. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  1295. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
  1296. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  1297. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  1298. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  1299. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  1300. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  1301. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  1302. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
  1303. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  1304. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  1305. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  1306. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  1307. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  1308. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  1309. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
  1310. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  1311. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  1312. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  1313. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  1314. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  1315. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  1316. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
  1317. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  1318. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  1319. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  1320. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  1321. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  1322. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  1323. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
  1324. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  1325. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  1326. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  1327. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  1328. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  1329. data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  1330. data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
  1331. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  1332. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  1333. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  1334. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  1335. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  1336. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  1337. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  1338. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  1339. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  1340. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  1341. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  1342. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  1343. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  1344. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  1345. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  1346. data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  1347. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  1348. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  1349. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  1350. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  1351. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  1352. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  1353. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  1354. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  1355. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  1356. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
  1357. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
  1358. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  1359. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  1360. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  1361. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  1362. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  1363. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  1364. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  1365. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  1366. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  1367. data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  1368. data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
  1369. data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
  1370. data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
  1371. data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
  1372. data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
  1373. data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
  1374. data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
  1375. data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  1376. data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
  1377. data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
  1378. data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
  1379. data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
  1380. data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
  1381. data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
  1382. data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
  1383. data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
  1384. data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
  1385. data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
  1386. data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
  1387. data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
  1388. data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
  1389. data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
  1390. data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
  1391. data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
  1392. data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  1393. data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
  1394. data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
  1395. data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
  1396. data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
  1397. data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
  1398. data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
  1399. data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
  1400. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  1401. data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
  1402. data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
  1403. data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  1404. data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
  1405. data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
  1406. data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
  1407. data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
  1408. data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
  1409. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
  1410. data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
  1411. data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
  1412. data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
  1413. data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
  1414. data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
  1415. data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  1416. data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
  1417. data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
  1418. data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
  1419. data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  1420. data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
  1421. data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  1422. data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  1423. data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  1424. data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
  1425. data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  1426. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
  1427. data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  1428. data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  1429. data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
  1430. data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
  1431. data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
  1432. data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
  1433. data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
  1434. data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
  1435. data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
  1436. data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
  1437. data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
  1438. data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  1439. data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
  1440. data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
  1441. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  1442. data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  1443. data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  1444. data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  1445. data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
  1446. data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
  1447. data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
  1448. data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
  1449. data/vendor/ggml/src/ggml-impl.h +783 -0
  1450. data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  1451. data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  1452. data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  1453. data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  1454. data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
  1455. data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
  1456. data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
  1457. data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
  1458. data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
  1459. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
  1460. data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
  1461. data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
  1462. data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
  1463. data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
  1464. data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
  1465. data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
  1466. data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
  1467. data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
  1468. data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  1469. data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  1470. data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  1471. data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  1472. data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  1473. data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  1474. data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  1475. data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
  1476. data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  1477. data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
  1478. data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  1479. data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  1480. data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  1481. data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  1482. data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  1483. data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  1484. data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  1485. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  1486. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  1487. data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  1488. data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  1489. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  1490. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
  1491. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
  1492. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
  1493. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
  1494. data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
  1495. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
  1496. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
  1497. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  1498. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
  1499. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
  1500. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
  1501. data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
  1502. data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
  1503. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  1504. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
  1505. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
  1506. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
  1507. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
  1508. data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
  1509. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
  1510. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
  1511. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
  1512. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  1513. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
  1514. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
  1515. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
  1516. data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
  1517. data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  1518. data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  1519. data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  1520. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  1521. data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  1522. data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  1523. data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  1524. data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
  1525. data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
  1526. data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  1527. data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  1528. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  1529. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  1530. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  1531. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
  1532. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  1533. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  1534. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
  1535. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
  1536. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  1537. data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  1538. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  1539. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  1540. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  1541. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  1542. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  1543. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  1544. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  1545. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  1546. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  1547. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  1548. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
  1549. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
  1550. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  1551. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  1552. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  1553. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  1554. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  1555. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  1556. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  1557. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  1558. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  1559. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  1560. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
  1561. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
  1562. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
  1563. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  1564. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  1565. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  1566. data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  1567. data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  1568. data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  1569. data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  1570. data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  1571. data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  1572. data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  1573. data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  1574. data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  1575. data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  1576. data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  1577. data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  1578. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  1579. data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  1580. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  1581. data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  1582. data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  1583. data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  1584. data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  1585. data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  1586. data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  1587. data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  1588. data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  1589. data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  1590. data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
  1591. data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  1592. data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  1593. data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  1594. data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  1595. data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
  1596. data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  1597. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
  1598. data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  1599. data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
  1600. data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
  1601. data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  1602. data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  1603. data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  1604. data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  1605. data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  1606. data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  1607. data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  1608. data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  1609. data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  1610. data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  1611. data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  1612. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  1613. data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  1614. data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  1615. data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  1616. data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  1617. data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  1618. data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
  1619. data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  1620. data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  1621. data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  1622. data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  1623. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
  1624. data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  1625. data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  1626. data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
  1627. data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
  1628. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  1629. data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  1630. data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  1631. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  1632. data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  1633. data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
  1634. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
  1635. data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  1636. data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
  1637. data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
  1638. data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
  1639. data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
  1640. data/vendor/ggml/src/ggml-opt.cpp +1094 -0
  1641. data/vendor/ggml/src/ggml-quants.c +5491 -0
  1642. data/vendor/ggml/src/ggml-quants.h +112 -0
  1643. data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
  1644. data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
  1645. data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
  1646. data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
  1647. data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
  1648. data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
  1649. data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
  1650. data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
  1651. data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
  1652. data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  1653. data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
  1654. data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
  1655. data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
  1656. data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
  1657. data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
  1658. data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
  1659. data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
  1660. data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
  1661. data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  1662. data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  1663. data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
  1664. data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
  1665. data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
  1666. data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
  1667. data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
  1668. data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
  1669. data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
  1670. data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
  1671. data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  1672. data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
  1673. data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
  1674. data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
  1675. data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
  1676. data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
  1677. data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
  1678. data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
  1679. data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
  1680. data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
  1681. data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
  1682. data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
  1683. data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
  1684. data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
  1685. data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
  1686. data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
  1687. data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
  1688. data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
  1689. data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
  1690. data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
  1691. data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
  1692. data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
  1693. data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
  1694. data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
  1695. data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  1696. data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
  1697. data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
  1698. data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
  1699. data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
  1700. data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
  1701. data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
  1702. data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
  1703. data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
  1704. data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
  1705. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  1706. data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  1707. data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
  1708. data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
  1709. data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
  1710. data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  1711. data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  1712. data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
  1713. data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
  1714. data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
  1715. data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
  1716. data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
  1717. data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
  1718. data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
  1719. data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  1720. data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
  1721. data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
  1722. data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
  1723. data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
  1724. data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
  1725. data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  1726. data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
  1727. data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
  1728. data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
  1729. data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
  1730. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  1731. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  1732. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  1733. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  1734. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
  1735. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  1736. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  1737. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  1738. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  1739. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  1740. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
  1741. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
  1742. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
  1743. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
  1744. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
  1745. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
  1746. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
  1747. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
  1748. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
  1749. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
  1750. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
  1751. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
  1752. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
  1753. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
  1754. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
  1755. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
  1756. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
  1757. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
  1758. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
  1759. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
  1760. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
  1761. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
  1762. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
  1763. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
  1764. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
  1765. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
  1766. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
  1767. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
  1768. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
  1769. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
  1770. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
  1771. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
  1772. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
  1773. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
  1774. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
  1775. data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
  1776. data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  1777. data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  1778. data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
  1779. data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
  1780. data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
  1781. data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
  1782. data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
  1783. data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
  1784. data/vendor/ggml/src/ggml-threading.cpp +12 -0
  1785. data/vendor/ggml/src/ggml-threading.h +14 -0
  1786. data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  1787. data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  1788. data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  1789. data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  1790. data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  1791. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  1792. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  1793. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  1794. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  1795. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  1796. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  1797. data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  1798. data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  1799. data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  1800. data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  1801. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  1802. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  1803. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  1804. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  1805. data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  1806. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  1807. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
  1808. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
  1809. data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  1810. data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
  1811. data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  1812. data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  1813. data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  1814. data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  1815. data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  1816. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  1817. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  1818. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  1819. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  1820. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  1821. data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  1822. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
  1823. data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  1824. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  1825. data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  1826. data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
  1827. data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
  1828. data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
  1829. data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  1830. data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
  1831. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  1832. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  1833. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  1834. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  1835. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  1836. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  1837. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  1838. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  1839. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  1840. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  1841. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  1842. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  1843. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  1844. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  1845. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  1846. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  1847. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  1848. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  1849. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  1850. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
  1851. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  1852. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  1853. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  1854. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  1855. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  1856. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  1857. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  1858. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  1859. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
  1860. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
  1861. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  1862. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  1863. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  1864. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  1865. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  1866. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  1867. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  1868. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  1869. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  1870. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  1871. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  1872. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
  1873. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
  1874. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  1875. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  1876. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  1877. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  1878. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  1879. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  1880. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  1881. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  1882. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  1883. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  1884. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
  1885. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  1886. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  1887. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  1888. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
  1889. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  1890. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  1891. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  1892. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  1893. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  1894. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
  1895. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
  1896. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
  1897. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
  1898. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
  1899. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  1900. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
  1901. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  1902. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  1903. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
  1904. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  1905. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  1906. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  1907. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  1908. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  1909. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  1910. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
  1911. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  1912. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  1913. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  1914. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  1915. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
  1916. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
  1917. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  1918. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  1919. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  1920. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
  1921. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
  1922. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  1923. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  1924. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
  1925. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  1926. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  1927. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  1928. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  1929. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  1930. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  1931. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  1932. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  1933. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  1934. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  1935. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  1936. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  1937. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  1938. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  1939. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  1940. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  1941. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
  1942. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  1943. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  1944. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  1945. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
  1946. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  1947. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  1948. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
  1949. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  1950. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  1951. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  1952. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
  1953. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
  1954. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  1955. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  1956. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  1957. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  1958. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  1959. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  1960. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
  1961. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  1962. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  1963. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  1964. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  1965. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  1966. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  1967. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  1968. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  1969. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  1970. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
  1971. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  1972. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  1973. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  1974. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
  1975. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  1976. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  1977. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  1978. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  1979. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  1980. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  1981. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  1982. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  1983. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  1984. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  1985. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  1986. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  1987. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  1988. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  1989. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  1990. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  1991. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  1992. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  1993. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  1994. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  1995. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  1996. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  1997. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  1998. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  1999. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  2000. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  2001. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  2002. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  2003. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  2004. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  2005. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  2006. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
  2007. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  2008. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
  2009. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  2010. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  2011. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
  2012. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  2013. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  2014. data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  2015. data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  2016. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
  2017. data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
  2018. data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  2019. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
  2020. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  2021. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  2022. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  2023. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
  2024. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
  2025. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  2026. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
  2027. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
  2028. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  2029. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
  2030. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
  2031. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
  2032. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
  2033. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
  2034. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
  2035. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
  2036. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
  2037. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
  2038. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
  2039. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  2040. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
  2041. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
  2042. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
  2043. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
  2044. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
  2045. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
  2046. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
  2047. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
  2048. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
  2049. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  2050. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  2051. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
  2052. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
  2053. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
  2054. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  2055. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
  2056. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  2057. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
  2058. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
  2059. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
  2060. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
  2061. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  2062. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
  2063. data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
  2064. data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  2065. data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
  2066. data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
  2067. data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  2068. data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  2069. data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
  2070. data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
  2071. data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  2072. data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
  2073. data/vendor/ggml/src/ggml.c +7777 -0
  2074. data/vendor/ggml/src/ggml.cpp +26 -0
  2075. data/vendor/ggml/src/gguf.cpp +1556 -0
  2076. data/vendor/ggml/tests/CMakeLists.txt +356 -0
  2077. data/vendor/ggml/tests/test-arange.cpp +100 -0
  2078. data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
  2079. data/vendor/ggml/tests/test-cont.c +170 -0
  2080. data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
  2081. data/vendor/ggml/tests/test-conv-transpose.c +248 -0
  2082. data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
  2083. data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
  2084. data/vendor/ggml/tests/test-conv1d.cpp +289 -0
  2085. data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
  2086. data/vendor/ggml/tests/test-conv2d.cpp +391 -0
  2087. data/vendor/ggml/tests/test-customop.c +300 -0
  2088. data/vendor/ggml/tests/test-dup.c +111 -0
  2089. data/vendor/ggml/tests/test-interpolate.cpp +166 -0
  2090. data/vendor/ggml/tests/test-opt.cpp +1003 -0
  2091. data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
  2092. data/vendor/ggml/tests/test-pool.c +274 -0
  2093. data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
  2094. data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
  2095. data/vendor/ggml/tests/test-rel-pos.c +87 -0
  2096. data/vendor/ggml/tests/test-roll.cpp +128 -0
  2097. data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
  2098. data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
  2099. data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
  2100. data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
  2101. data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
  2102. data/vendor-patches/0005-concat-backward.patch +36 -0
  2103. data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
  2104. data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
  2105. data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
  2106. data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
  2107. metadata +2161 -0
data/Makefile ADDED
@@ -0,0 +1,2022 @@
1
+ # toy build system.
2
+ #
3
+ # Demo / training Ruby drivers live in demos/ and compile to native
4
+ # binaries via Spinel. GPU acceleration is opt-in:
5
+ #
6
+ # make # demos/train_minimal + demos/train_tinystories
7
+ # make setup-ggml # one-time clone + CPU build of vendored ggml
8
+ # make setup-ggml-cuda # one-time clone + CUDA build (needs CUDA toolkit)
9
+ # make setup-ggml-metal # one-time Metal build (macOS only)
10
+ # make smoke # tinynn FFI smoke test (4x3 ggml matmul demo)
11
+ # make distilgpt2_demo_text # → demos/distilgpt2_demo_text
12
+ #
13
+ # Vendored ggml lives at vendor/ggml/ (gitignored).
14
+ # The CUDA build expects sm_121 (NVIDIA GB10); override with
15
+ # GGML_CUDA_ARCH=NN on the command line.
16
+ # The Metal build uses GGML_METAL_EMBED_LIBRARY=ON so it works with
17
+ # Command Line Tools only (the xcrun metal / metallib compilers ship
18
+ # only with full Xcode). Kernels get JIT-compiled at first device load.
19
+
20
+ SPINEL_DIR ?= $(HOME)/sites/spinel
21
+ SPINEL_BIN ?= $(SPINEL_DIR)/spinel
22
+
23
+ # toy#69 — sig/*.rbs type roots. Every Spinel compile seeds the analyzer
24
+ # with toy's shipped RBS tree (`--rbs sig`): uncalled public methods
25
+ # keep their DECLARED param/return/ivar types instead of widening to
26
+ # poly (the spinel-dev#11/#12 facet family). Seeds are ADVISORY —
27
+ # inference runs on top and widens on observed contradiction (spinel
28
+ # docs/RBS-EXTRACT.md), and the full gate sweep was byte-exact at
29
+ # adoption. Vendored gems' sig roots ride along through the gitignored
30
+ # sig/vendor symlink -> ../vendor/spinel/sig, refreshed by `make
31
+ # vendor-tep` (spinel takes ONE --rbs dir; spinel_rbs_extract walks it
32
+ # recursively and follows symlinks). Set SPINEL_RBS= (empty) to compile
33
+ # without seeds when chasing an analyzer issue.
34
+ SPINEL_RBS ?= --rbs $(CURDIR)/sig
35
+
36
+ # --- DevEx polish knobs (cosmetic, never gate correctness) ----------------
37
+ # QUIET=1 (default) routes known-harmless build chatter through the
38
+ # prep/quietly + prep/progress helpers so the terminal stays readable
39
+ # on a fresh clone. QUIET=0 disables all filtering (useful when chasing
40
+ # a Spinel codegen issue or a cmake misconfig).
41
+ # - prep/quietly silences exact-substring patterns; exit code is
42
+ # ALWAYS the child's, so real errors still propagate.
43
+ # - prep/progress draws a single-line [NN%] bar over cmake/make's
44
+ # own progress markers; full output is tee'd to a .log file in
45
+ # vendor/ggml/. On non-zero exit it dumps the log tail to stderr.
46
+ QUIET ?= 1
47
+ QUIETLY := $(CURDIR)/prep/quietly
48
+ PROGRESS := $(CURDIR)/prep/progress
49
+ ifeq ($(QUIET),0)
50
+ SPINEL = $(SPINEL_BIN) $(SPINEL_RBS)
51
+ else
52
+ SPINEL = $(QUIETLY) \
53
+ 'cannot resolve call to' \
54
+ 'ignoring duplicate libraries' \
55
+ -- $(SPINEL_BIN) $(SPINEL_RBS)
56
+ endif
57
+ # Sentinel deps so example/demo Spinel-compiled binaries get re-spun
58
+ # when the Spinel compiler itself changes. Without this, stale .o /
59
+ # .a in tinynn/ combined with newer Spinel C codegen can produce
60
+ # misaligned binaries that segfault at init (Tao hit this 2026-05-26
61
+ # after pulling Spinel 2183a92 — the lib archives weren't rebuilt).
62
+ SPINEL_DEPS := $(SPINEL_DIR)/spinel_analyze $(SPINEL_DIR)/spinel_codegen
63
+
64
+ CC ?= cc
65
+ CFLAGS ?= -O2 -fPIC -Wall -Wextra
66
+ ARFLAGS = rcs
67
+
68
+ # macOS Command Line Tools (as of 26.x) keep stale 2023 C++ stub headers
69
+ # at /Library/Developer/CommandLineTools/usr/include/c++/v1 which shadow
70
+ # the real headers in the SDK. Prepend the SDK's libc++ include path so
71
+ # ggml's C++ files can find <mutex>, <array>, etc. No-op on Linux.
72
+ UNAME_S := $(shell uname -s)
73
+ ifeq ($(UNAME_S),Darwin)
74
+ CMAKE_ENV := CPLUS_INCLUDE_PATH=$(shell xcrun --show-sdk-path)/usr/include/c++/v1
75
+ NJOBS := $(shell sysctl -n hw.logicalcpu)
76
+ else
77
+ CMAKE_ENV :=
78
+ NJOBS := $(shell nproc)
79
+ endif
80
+
81
+ # --- vendored ggml ----------------------------------------------------------
82
+ GGML_DIR := vendor/ggml
83
+ GGML_REPO := https://github.com/ggml-org/ggml.git
84
+ # Pinned upstream rev: what the vendor-patches/ set is proven against, and
85
+ # what ships inside the gem (toy#45). Bump deliberately, re-proving patches.
86
+ #
87
+ # MUST be the FULL 40-char SHA (toy#60 item 5): the clone rule does a
88
+ # shallow `git fetch origin $(GGML_REV)`, and GitHub only serves
89
+ # fetch-by-SHA for FULL SHAs (allowReachableSHA1InWant — a short SHA
90
+ # gets "fatal: couldn't find remote ref", which broke every pristine
91
+ # clone during #45). Full-SHA shallow fetch verified cold against
92
+ # github.com/ggml-org/ggml on 2026-06-11.
93
+ GGML_REV := 41e7949d705fd5dfeac33f3804e1af2a136cebd9
94
+ GGML_CUDA_ARCH ?= 121
95
+ CUDA_DIR ?= /usr/local/cuda
96
+
97
+ # --- Tep dependency (spinelgems convention) ---------------------------------
98
+ # Tep is consumed as a RELEASED gem from RubyGems (Gemfile: `gem "tep",
99
+ # "~> 0.11"`; published at https://rubygems.org/gems/tep) via the
100
+ # bundler-spinel / spinelgems convention. Two steps:
101
+ #
102
+ # 1. `bundle lock` (Gemfile → Gemfile.lock)
103
+ # 2. `../spinelgems/exe/spinel-compat vendor` (lock → vendor/spinel/:
104
+ # copies tep lib/ AND
105
+ # natively compiles+wires
106
+ # its C-exts from tep's
107
+ # spinel-ext.json, AND
108
+ # writes vendor/spinel/deps.rb)
109
+ #
110
+ # No step 3: the old `prep/post_vendor_tep.rb` @TEP_*@ substitution is
111
+ # RETIRED — spinel-compat vendor owns C-ext wiring now (tep#98). Spinel
112
+ # entrypoints do `require_relative "vendor/spinel/deps"`.
113
+ #
114
+ # Precheck: ../spinelgems (the vendor tool) must be present. tep itself
115
+ # comes from RubyGems (bundler fetches the released gem), so ../tep is NOT
116
+ # required for the vendor flow.
117
+ #
118
+ # `bundle` env note: use a user-managed Ruby (rbenv / rv / ruby-install
119
+ # with --user-install gems). With system-owned gems (e.g. Debian's
120
+ # /var/lib/gems), `bundle lock` can't write the git cache without sudo —
121
+ # that's an env-setup concern, not a toy bug.
122
+ #
123
+ # SPINEL_EXT_DISABLE=pg: tep's optional pg C-ext currently fails to
124
+ # compile under spinel-compat (its libpq pkg-config cflags aren't wired
125
+ # to the source .o compile — spinelgems#8). toy only uses tep for HTTP
126
+ # serving, not its pg adapter, so we opt out. Drop this once #8 lands.
127
+ vendor-tep:
128
+ @if [ ! -d ../spinelgems ]; then \
129
+ echo ""; \
130
+ echo " ✗ vendor-tep needs the spinelgems sibling checkout (the vendor tool):"; \
131
+ echo " missing: ../spinelgems"; \
132
+ echo ""; \
133
+ echo " From this directory's parent ($$(cd .. && pwd)):"; \
134
+ echo " git clone https://github.com/OriPekelman/spinelgems"; \
135
+ echo " Or symlink an existing checkout:"; \
136
+ echo " ln -s ~/sites/spinelgems ../spinelgems"; \
137
+ echo ""; \
138
+ exit 1; \
139
+ fi
140
+ bundle lock
141
+ SPINEL_EXT_DISABLE=pg SPINEL_DIR=$(HOME)/sites/spinel ../spinelgems/exe/spinel-compat vendor
142
+ @# toy#69 — fold the vendored gems' aggregated sig root (advertised
143
+ @# by vendor/spinel/deps.rb, spinelgems#13) into toy's own --rbs
144
+ @# root via a gitignored symlink: spinel accepts ONE --rbs dir and
145
+ @# spinel_rbs_extract follows symlinks. Removed when no gem ships
146
+ @# sig (a dangling link would warn on every compile).
147
+ @if [ -d vendor/spinel/sig ]; then \
148
+ ln -sfn ../vendor/spinel/sig sig/vendor; \
149
+ echo " sig/vendor -> ../vendor/spinel/sig (rbs ride-along)"; \
150
+ else \
151
+ rm -f sig/vendor; \
152
+ fi
153
+
154
+ # Build vendor/spinel/tep/lib/tep.rb on demand for tep_demo/* targets.
155
+ # Triggers vendor-tep, which gates on sibling checkouts.
156
+ vendor/spinel/tep/lib/tep.rb:
157
+ @$(MAKE) vendor-tep
158
+
159
+ # SpinelKit (toy#44) is vendored by the SAME `vendor-tep` step (both gems are in
160
+ # the Gemfile/lock; `spinel-compat vendor` copies all of them). This rule lets
161
+ # any runner/example list the vendored spinel_kit/git.rb (toy_events' git
162
+ # provenance) as a build prereq and have it produced on demand. Pure Ruby, no
163
+ # C-ext — the vendor copy is just lib/ files.
164
+ vendor/spinel/spinel_kit/lib/spinel_kit/git.rb:
165
+ @$(MAKE) vendor-tep
166
+
167
+ # SpinelKit JSON builder (toy#44) — the run_start/events JSON emitter, vendored
168
+ # by the same `vendor-tep` step. Replaces the retired lib/toy/io/toy_json.rb
169
+ # (Toy::Json → SpinelKit::Json::Builder; byte-identical output).
170
+ vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb:
171
+ @$(MAKE) vendor-tep
172
+
173
+ # --- pure-Spinel drivers ----------------------------------------------------
174
+ # Source lives in demos/. We expose short top-level target names
175
+ # (`make train_minimal`, `make distilgpt2_demo_text`) that build into
176
+ # demos/. Run the resulting binaries from the repo root.
177
+ # `make` with no args = `make help`. Previously it ran `all` which
178
+ # triggered vendor-tep and failed on machines without ../tep checked
179
+ # out (DevEx footgun a fresh-clone-on-Mac hit 2026-05-28). `make help`
180
+ # is always safe + discoverable; `make all` still works for the
181
+ # original behaviour.
182
+ .DEFAULT_GOAL := help
183
+
184
+ all: demos/train demos/smollm2
185
+
186
+ # `make setup` auto-detects the best backend for this host and runs
187
+ # the right setup-ggml-* variant. macOS → metal; nvcc on PATH → cuda;
188
+ # else CPU. Sentinels in setup-ggml-* make this a no-op if already
189
+ # done. Saves new users from picking the wrong setup target.
190
+ .PHONY: setup
191
+
192
+ setup:
193
+ @uname_s="$$(uname -s)"; \
194
+ if [ "$$uname_s" = "Darwin" ]; then \
195
+ echo "[setup] macOS detected → setup-ggml + setup-ggml-metal"; \
196
+ echo " (CPU examples link against vendor/ggml/build/;"; \
197
+ echo " Metal examples link against vendor/ggml/build-metal/.)"; \
198
+ $(MAKE) setup-ggml; \
199
+ $(MAKE) setup-ggml-metal; \
200
+ elif command -v nvcc >/dev/null 2>&1; then \
201
+ echo "[setup] nvcc on PATH → setup-ggml + setup-ggml-cuda"; \
202
+ $(MAKE) setup-ggml; \
203
+ $(MAKE) setup-ggml-cuda; \
204
+ else \
205
+ echo "[setup] CPU only → setup-ggml"; \
206
+ $(MAKE) setup-ggml; \
207
+ fi; \
208
+ echo ""; \
209
+ echo "Done. Next: run 'make help' for the entry points."
210
+
211
+ # --- help / time-to-joy entry points --------------------------------------
212
+ # `make help` is the discoverable index for someone who just cloned.
213
+ # Keep it short — pointers to the heavier docs (examples/README.md,
214
+ # tep_demo/README.md, docs/INDEX.md) for the details.
215
+
216
+ .PHONY: help
217
+
218
+ help:
219
+ @echo ""
220
+ @echo " toy — a transformer LM in Ruby, Spinel-compiled."
221
+ @echo " Full docs: README.md, examples/README.md, docs/INDEX.md."
222
+ @echo ""
223
+ @echo " NEW HERE? Scaffold a project + discover models with the toy CLI:"
224
+ @echo " toy new <dir> scaffold a conventional toy project tree"
225
+ @echo " toy install build/verify the CPU backend"
226
+ @echo " toy list find GGUFs in caches + project data/"
227
+ @echo " toy fetch <repo> <file> download a GGUF from HuggingFace"
228
+ @echo ""
229
+ @echo " ONE-TIME SETUP"
230
+ @echo " make setup auto-detect platform; pick CUDA/Metal/CPU"
231
+ @echo " make setup-ggml force CPU build (~2 min)"
232
+ @echo " make setup-ggml-cuda force CUDA backend"
233
+ @echo " make setup-ggml-metal force Metal backend (macOS)"
234
+ @echo ""
235
+ @if [ "$$(uname -s)" = "Darwin" ]; then \
236
+ echo " ⚡ macOS detected — for GPU acceleration use the _metal example"; \
237
+ echo " variants below (they link against libggml-metal + KV kernels)."; \
238
+ echo " The plain CPU runner (\`toy infer\`) still works but is CPU-only."; \
239
+ echo ""; \
240
+ fi
241
+ @echo " GETTING STARTED — examples/"
242
+ @echo " toy list list GGUFs cached locally / in HF / Ollama / LM Studio"
243
+ @echo " toy infer <model.gguf> load a GGUF, generate 16 tokens (CPU)"
244
+ @if [ "$$(uname -s)" = "Darwin" ]; then \
245
+ echo " make example_inference_metal same, Metal-accelerated (macOS) — use this on Mac"; \
246
+ fi
247
+ @echo " Most tasks are the CLI now: toy train|infer|eval|serve (see 'toy --help')."
248
+ @echo " Curated examples (narrated; examples/README.md is the tour):"
249
+ @echo " make example_01 train a tiny Llama from scratch (start here; ~2 s)"
250
+ @echo " make example_02 warm-start fine-tune from a real GGUF's embeddings"
251
+ @echo " make example_03 LoRA adapters over a frozen mmap'd base"
252
+ @echo " make example_04 load a GGUF, KV decode, print text"
253
+ @echo " make example_05 per-token logprobs (the eval building block)"
254
+ @echo " make example_06 compare your runs/ (CRuby, no build)"
255
+ @echo " make example_07 ViT-Tiny image classifier (same recipe shape)"
256
+ @echo " (Superseded tutorials live on in examples/legacy/ — they still build.)"
257
+ @echo ""
258
+ @echo " HTTP SERVING — tep_demo/"
259
+ @echo " make tep_demo/hello minimal Tep HTTP smoke"
260
+ @if [ ! -f vendor/spinel/tep/lib/tep.rb ]; then \
261
+ printf " (prereq: run %s first — needs ../tep + ../spinelgems checkouts)\n" "'make vendor-tep'"; \
262
+ fi
263
+ @echo ""
264
+ @echo " BENCH + CHECKS"
265
+ @echo " make bench routine perf regression gate (vs bench/baselines.csv)"
266
+ @echo " make bench-vs-pytorch same workloads, gated vs PyTorch (ratio, not absolute ms)"
267
+ @echo " make coverage regenerate the ggml-op coverage matrix"
268
+ @echo " make coverage-check CI form (no diff means in sync)"
269
+ @echo " make test all tinynn FFI smoke binaries"
270
+ @echo ""
271
+ @echo " COMMON MAKE FLAGS"
272
+ @echo " DEVICE=cuda on example_train_from_scratch / example_finetune_cuda"
273
+ @echo " GGUF=path/to/model.gguf on example_finetune (toy infer takes a positional path)"
274
+ @echo ""
275
+
276
+ # --- examples/ getting-started entry points --------------------------------
277
+ # Compact, one-file demos covering the main use cases. See
278
+ # examples/README.md.
279
+ # `toy infer` COMPUTE runner — lib-side Spinel binary the CLI shells to.
280
+ # Lifted from the retired examples/01_inference.rb. Target name MUST equal
281
+ # the output path string: lib/toy/core/cli/infer.rb uses RUNNER_TARGET both
282
+ # as the make target (ensure_built) AND the joined binary path. CPU-only;
283
+ # NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
284
+ libexec:
285
+ mkdir -p libexec
286
+ libexec/toy-infer: lib/toy/run/infer.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a | libexec
287
+ $(SPINEL) $< -o $@
288
+ toy-infer: libexec/toy-infer
289
+
290
+ # Diagnostic sibling of toy-infer: enables the cache trace and dumps per-tap
291
+ # min/max/|mean|/nan for every layer (used to localize ggml#1506 — the K-quant
292
+ # MoE attention head_nbytes collapse). See docs/notes/mul_mat_id_quants.md.
293
+ libexec/toy-infer-trace: lib/toy/run/infer_trace.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a | libexec
294
+ $(SPINEL) $< -o $@
295
+ toy-infer-trace: libexec/toy-infer-trace
296
+
297
+ # P4 — `toy eval` COMPUTE runner (CRuby→runner COMPUTE BRIDGE, same shape as
298
+ # toy-infer). Spinel source lib/toy/run/eval.rb; the binary path EQUALS the
299
+ # make target so ToyRoot.ensure_built("libexec/toy-eval") both builds and
300
+ # locates it. Deps = infer's deps + lib/toy/dev/toy_logprobs.rb (a transitive require
301
+ # of transformer_lm; listed explicitly so a touch of it rebuilds the runner).
302
+ # CPU-only; NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
303
+ libexec/toy-eval: lib/toy/run/eval.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a | libexec
304
+ $(SPINEL) $< -o $@
305
+ toy-eval: libexec/toy-eval
306
+
307
+ # LMC (Linear Mode Connectivity) eval runner — `toy eval lmc --ckpt A --other B`.
308
+ # Interpolates two checkpoints θ_α = (1-α)·θ_A + α·θ_B and evals CE per α.
309
+ # Spinel source lib/toy/run/eval_lmc.rb; the binary path EQUALS the make target
310
+ # so ToyRoot.ensure_built("libexec/toy-eval-lmc") both builds and locates it.
311
+ # Deps mirror example_lmc (Makefile:479) NOT toy-eval; order-only | libexec (no
312
+ # $(SPINEL_DEPS)) like the CPU toy-eval runner. CPU-only; NOT in MIRRORABLE (see
313
+ # prep/gen_cuda_mirror.rb); a cuda LMC twin is a later slice.
314
+ libexec/toy-eval-lmc: lib/toy/run/eval_lmc.rb lib/toy/llm/adamw.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/train/toy_drift_grad.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
315
+ $(SPINEL) $< -o $@
316
+ toy-eval-lmc: libexec/toy-eval-lmc
317
+
318
+ # CUDA siblings of toy-infer / toy-eval — selected by the CRuby CLI shell when
319
+ # invoked with `--device cuda` (lib/toy/core/cli/{infer,eval}.rb derive the
320
+ # target). PER-DEVICE binaries (not one polymorphic runner): a single source
321
+ # requiring BOTH ToyLM and ToyLMCuda would force the CUDA archive onto the CPU
322
+ # binary's link line, changing it. Keeping separate binaries leaves
323
+ # libexec/toy-infer / toy-eval link lines BYTE-UNCHANGED. Source is the
324
+ # hand-written lib/toy/run/{infer,eval}_cuda.rb (ToyLMCuda ctor arity 1 →
325
+ # NOT mechanically mirrorable → ABSENT from MIRRORABLE, like the CPU runners).
326
+ # Force-link recipe matches every other cuda target (-Wl,-u,tnn_cuda_force_link).
327
+ libexec/toy-infer-cuda: lib/toy/run/infer_cuda.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
328
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
329
+ toy-infer-cuda: libexec/toy-infer-cuda
330
+
331
+ libexec/toy-eval-cuda: lib/toy/run/eval_cuda.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
332
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
333
+ toy-eval-cuda: libexec/toy-eval-cuda
334
+
335
+ # Metal twins of the infer/eval cuda runners (macOS ONLY). Same single-type
336
+ # binary discipline (landmine #16): TinyNNMetal is the only compute module.
337
+ # Source is the hand-written lib/toy/run/{infer,eval}_metal.rb (ToyLMMetal ctor
338
+ # arity 1 -> NOT mechanically mirrorable -> ABSENT from MIRRORABLE, like the
339
+ # cuda/CPU runners). The macOS guard MUST come first so Linux/gx10 never touches
340
+ # the Apple frameworks; the metal --cc recipe links Foundation/Metal/MetalKit
341
+ # with the leading-underscore force-link symbol (_tnn_metal_force_link, macOS
342
+ # symbol convention) vs cuda's tnn_cuda_force_link. libtinynn_ggml.a (CPU
343
+ # archive) stays in deps for the base ggml symbols. gx10 RUNTIME-UNVERIFIED.
344
+ libexec/toy-infer-metal: lib/toy/run/infer_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
345
+ ifneq ($(UNAME_S),Darwin)
346
+ @echo "toy-infer-metal: macOS-only"; exit 1
347
+ endif
348
+ $(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
349
+ toy-infer-metal: libexec/toy-infer-metal
350
+
351
+ libexec/toy-eval-metal: lib/toy/run/eval_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
352
+ ifneq ($(UNAME_S),Darwin)
353
+ @echo "toy-eval-metal: macOS-only"; exit 1
354
+ endif
355
+ $(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
356
+ toy-eval-metal: libexec/toy-eval-metal
357
+
358
+ # Convenience: run both functional gates with the CUDA parity arm enabled.
359
+ .PHONY: gate-cuda
360
+ gate-cuda:
361
+ TOY_GATE_CUDA=1 ruby prep/infer_gate.rb
362
+ TOY_GATE_CUDA=1 ruby prep/eval_gate.rb
363
+
364
+ # GPT-2 minimal inline training proof (toy#12 part-b foundation). Builds a
365
+ # self-contained forward+CE+backward+AdamW loop over the GPT-2-distinctive
366
+ # structure (wte+wpe learned embeddings, composite LayerNorm, GELU FFN, tied
367
+ # output) — exercising the two vendored backward kernels (ggml_gelu_back,
368
+ # ggml_norm_back; vendor-patches/0007) end-to-end. Attention is the next
369
+ # increment; this proves the kernels train. Asserts CE decreases (exit 1 if
370
+ # not). CPU-only. "record-from-inline-first" reference for prep/gpt2_train_gate.
371
+ libexec/gpt2-train-min: prep/gpt2_train_min.rb lib/toy.rb lib/toy/ffi/tinynn.rb \
372
+ lib/toy/models/transformer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
373
+ $(SPINEL) $< -o $@
374
+ gpt2-train-min: libexec/gpt2-train-min
375
+ .PHONY: gpt2-train-min
376
+ gate-gpt2-min: libexec/gpt2-train-min
377
+ ./libexec/gpt2-train-min
378
+ .PHONY: gate-gpt2-min
379
+ # Byte-exact GPT-2 train gate: assert the CE loss curve is byte-identical to
380
+ # prep/fixtures/gpt2_train_baseline.txt (record-from-inline reference for the
381
+ # eventual `toy train --arch gpt2`). Re-record with `ruby prep/gpt2_train_gate.rb --record`.
382
+ gate-gpt2: libexec/gpt2-train-min
383
+ ruby prep/gpt2_train_gate.rb
384
+ .PHONY: gate-gpt2
385
+ # Byte-exact gate for the GPT-2 ENGINE runner (libexec/toy-train-gpt2 →
386
+ # Toy::LLM::Engine::GPT2SeqEngine, the `toy train --arch gpt2` compute). Asserts
387
+ # the from-scratch loss curve is byte-identical + decreasing. Re-record with
388
+ # `ruby prep/gpt2_train_engine_gate.rb --record`.
389
+ gate-gpt2-train: libexec/toy-train-gpt2
390
+ ruby prep/gpt2_train_engine_gate.rb
391
+ .PHONY: gate-gpt2-train
392
+ # CUDA arm: `toy train --arch gpt2 --device cuda`. Forward + most backward on
393
+ # CUDA; GELU/LayerNorm backward fall back to CPU (no GPU kernel). CUDA-vs-CUDA
394
+ # byte-exact (empirical on GB10) + decreasing. Re-record with
395
+ # `ruby prep/gpt2_train_cuda_gate.rb --record`.
396
+ gate-gpt2-train-cuda: libexec/toy-train-gpt2-cuda
397
+ ruby prep/gpt2_train_cuda_gate.rb
398
+ .PHONY: gate-gpt2-train-cuda
399
+
400
+ # Deterministic train→infer ROUND-TRIP gate: train from-scratch --steps 5
401
+ # --seed 0, then infer a fixed numeric prompt greedily from the written
402
+ # checkpoint and assert the generated ids byte-equal the recorded fixture.
403
+ # Proves the from-scratch checkpoint is a standard fused-llama GGUF that
404
+ # `toy infer` loads. CPU-only (no CUDA arm); bin/toy auto-builds the runners.
405
+ .PHONY: gate-ckpt-roundtrip
406
+ gate-ckpt-roundtrip:
407
+ ruby prep/ckpt_roundtrip_gate.rb
408
+
409
+ # Deterministic LMC gate: `toy eval lmc` interpolates two PINNED from-scratch
410
+ # checkpoints and evals CE per α. The curve is ggml-internal CE (no Ruby libm)
411
+ # → byte-exact everywhere. Run twice (determinism) and assert byte-identical to
412
+ # prep/fixtures/lmc_baseline.txt. CPU-only (no CUDA arm this slice).
413
+ .PHONY: gate-lmc
414
+ gate-lmc:
415
+ ruby prep/lmc_gate.rb
416
+
417
+ # The 6th realize-gate (F3 full fine-tune) — past P2's accepted ceiling.
418
+ # Records the engine's full_finetune CE curve and re-verifies it byte-for-byte
419
+ # so the per-block alloc lift onto TransformerBlock is provably behavior-
420
+ # preserving. MODEL-GATED: needs data/smollm2-135m-native.gguf (gitignored dev
421
+ # artifact); SKIPs loudly when absent. Train losses are ggml-internal → byte-
422
+ # exact. Re-record with `ruby prep/full_finetune_gate.rb --record`.
423
+ .PHONY: gate-full-finetune
424
+ gate-full-finetune:
425
+ ruby prep/full_finetune_gate.rb
426
+
427
+ # Mixed-precision training gate (GH#9, f16, CPU). Drives the from-scratch
428
+ # example at WEIGHT_DTYPE=1 vs =0 and asserts: f16 runs to completion (needs the
429
+ # 0008 mul_mat-backward-mixed-precision ggml patch — without it backward aborts),
430
+ # run_start.model.weight_type surfaces the dtype, and the f16 final loss lands
431
+ # within tolerance of the f32 baseline. TOLERANCE arm (dtype changes numerics),
432
+ # not byte-exact. bf16 is the CUDA/GB10 follow-up. Builds the example itself.
433
+ .PHONY: gate-mixed-precision
434
+ gate-mixed-precision:
435
+ ruby prep/mixed_precision_gate.rb
436
+
437
+ # toy#64 item 6 — Toy::RunLog unit gate (CRuby-only, no Spinel build).
438
+ # Self-contained synthetic fixture + integration sniff of repo runs/.
439
+ .PHONY: gate-run-log
440
+ gate-run-log:
441
+ ruby prep/run_log_gate.rb
442
+
443
+ # toy#71 — the MRI dev-run gate, BOTH arms (plain `ruby`, NO Spinel
444
+ # build, NO SPINEL_DIR). Stub leg (Stage A): `require "toy/mri"` loads
445
+ # the full compute surface under CRuby, the pure-Ruby teaching path
446
+ # genuinely trains, and crossing the native boundary raises the NAMED
447
+ # Toy::MRI::NativeCallError. Native leg (Stage B, the CRuby oracle;
448
+ # needs `make libtinynn_shared`, loud SKIP otherwise — MRI_GATE_STRICT=1
449
+ # turns the skip into a failure): MRI+Fiddle reproduces the recorded
450
+ # Spinel from-scratch gate curve BIT-EXACT (train_baseline.txt) and the
451
+ # smollm2-135m greedy decode ids byte-equal infer_baseline.txt.
452
+ .PHONY: gate-mri
453
+ gate-mri:
454
+ ruby prep/mri_gate.rb
455
+
456
+ # toy#60 item 4 — the COLD-START consumer gate: `toy new` scaffold →
457
+ # hello.rb compiles + runs (default ENV, then D_MODEL override without
458
+ # recompiling) → `toy train` prints losses + writes runs/<id>/ → the
459
+ # missing-corpus guard fails loud; PLUS the `toy new --lib` leg
460
+ # (bundle lock → spinel-compat vendor → ./build.sh cpu → run; skips
461
+ # loudly when bundler/spinel-compat are absent). Structural, not
462
+ # byte-exact. ~4 min (the lib leg builds ggml inside the tmp project).
463
+ .PHONY: gate-consumer
464
+ gate-consumer:
465
+ ruby prep/consumer_gate.rb
466
+
467
+ # toy#42 full-API require gate. Builds prep/smokes/smoke_compute_surface (which
468
+ # requires ONLY lib/toy/compute.rb) and asserts it realizes a live engine —
469
+ # proving the one-require compute surface co-compiles + works for a library
470
+ # consumer. Builds the smoke itself.
471
+ .PHONY: gate-compute-surface
472
+ gate-compute-surface: prep/smokes/smoke_compute_surface
473
+ @out="$$(./prep/smokes/smoke_compute_surface 2>&1)"; \
474
+ echo "$$out" | tail -2; \
475
+ echo "$$out" | grep -q "compute-surface: ok" \
476
+ && echo "GATE PASS [compute-surface]: lib/toy/compute.rb one-require surface is live" \
477
+ || { echo "GATE FAIL [compute-surface]"; exit 1; }
478
+
479
+ # toy#64 item 8 — CUDA twin of gate-compute-surface: build + run the
480
+ # consumer-ish CUDA entry smoke on the GPU (GB10 sm_121).
481
+ .PHONY: gate-compute-surface-cuda
482
+ gate-compute-surface-cuda: prep/smokes/smoke_compute_surface_cuda
483
+ @out="$$(./prep/smokes/smoke_compute_surface_cuda 2>&1)"; \
484
+ echo "$$out" | tail -2; \
485
+ echo "$$out" | grep -q "compute-surface-cuda: ok" \
486
+ && echo "GATE PASS [compute-surface-cuda]: lib/toy/compute_cuda.rb device entry is live" \
487
+ || { echo "GATE FAIL [compute-surface-cuda]"; exit 1; }
488
+
489
+ # K-quant MoE attention regression gate (the bug long misfiled as ggml#1506):
490
+ # head_nbytes returned 0 for K-quant attention weights → per-head mmap stride
491
+ # collapsed every head onto head 0 → degenerate repeating decode on OLMoE
492
+ # Q4_K_M. Structural assertion (distinct-count + max single-token run), not
493
+ # byte-exact, so it survives benign K-quant drift. MODEL-GATED: needs the ~4 GB
494
+ # data/OLMoE-1b-7b-0924-Instruct-Q4_K_M.gguf (gitignored); SKIPs loudly when
495
+ # absent. bin/toy auto-builds the infer runner. See docs/notes/mul_mat_id_quants.md.
496
+ .PHONY: gate-moe-kquant
497
+ gate-moe-kquant:
498
+ ruby prep/moe_kquant_gate.rb
499
+
500
+ # Silent poly-degradation gate (#32): compiles the canonical compute entrypoints
501
+ # with spinel and fails if a NEW `cannot resolve … on poly … (emitting 0)` warning
502
+ # appears vs the frozen baseline — i.e. a refactor just silently compiled a literal
503
+ # 0 into a numerical path (compiled != correct). Re-record the known-benign set with
504
+ # `ruby prep/poly_degrade_gate.rb --record`. See feedback_spinel_type_inference_landmines.
505
+ .PHONY: gate-poly-degrade
506
+ gate-poly-degrade:
507
+ ruby prep/poly_degrade_gate.rb
508
+
509
+ # CUDA from-scratch TRAINING gate (STRONG arm, no epsilon): train
510
+ # from-scratch --device cuda --steps 5 --seed 0, assert the "step N: loss="
511
+ # curve byte-equals prep/fixtures/train_cuda_baseline.txt, loss decreases,
512
+ # and the CUDA checkpoint round-trips through CPU `toy infer`. Determinism is
513
+ # EMPIRICAL on this GB10 — see the fixture header. bin/toy auto-builds.
514
+ .PHONY: gate-train-cuda
515
+ gate-train-cuda:
516
+ ruby prep/train_cuda_gate.rb
517
+
518
+ # Metal RUNTIME parity gate (macOS ONLY). Builds the three metal runners then
519
+ # runs prep/metal_gate.rb: infer (cpu-vs-metal byte-equal ids), eval (top-k id
520
+ # ORDER equality), train-from-scratch (run-twice byte-determinism OR a Mac-
521
+ # pinned baseline, loss-decrease, ckpt round-trip vs the SHARED fixture,
522
+ # events.jsonl run_start/run_end). On Linux/gx10 this SKIPS GREEN (exit 0) so
523
+ # umbrella `make gate-*` runs do not false-fail — Metal cannot build or run
524
+ # here. THIS is the gate that actually validates metal numerics; run it on the
525
+ # Mac. (The metal BUILD targets exit 1 on Linux — a gate that can't run skips
526
+ # green, a build target that can't build errors red.)
527
+ .PHONY: gate-metal
528
+ gate-metal:
529
+ ifneq ($(UNAME_S),Darwin)
530
+ @echo "gate-metal: Metal is macOS-only (uname -s = $(UNAME_S)) — skipping"; exit 0
531
+ else
532
+ $(MAKE) libexec/toy-infer-metal libexec/toy-eval-metal libexec/toy-train-metal
533
+ ruby prep/metal_gate.rb
534
+ endif
535
+
536
+ # STRUCTURAL serving-telemetry gate: boot libexec/toy-serve with TAO_RUN_DIR
537
+ # set, POST /v1/completions, SIGTERM, then assert runs/<id>/events.jsonl carries
538
+ # the toy/v1 run_start(serve) + eval/serve/request + run_end stream (Tao #6).
539
+ # Honest STRUCTURAL (NOT byte-identical): t/latency_us/request_id are
540
+ # wall-clock/counter and cannot be byte-stable. Self-builds the runner.
541
+ .PHONY: gate-serve-events
542
+ gate-serve-events:
543
+ ruby prep/serve_events_gate.rb
544
+
545
+ # Umbrella: the byte-baseline serve gate THEN the structural events gate.
546
+ .PHONY: gate-serve
547
+ gate-serve:
548
+ ruby prep/serve_gate.rb
549
+ ruby prep/serve_events_gate.rb
550
+
551
+ # P4 — from-scratch TRAINING compute runner (CRuby→runner COMPUTE BRIDGE,
552
+ # same shape as toy-infer). Spinel source lib/toy/run/train.rb; the binary
553
+ # path EQUALS the make target so ToyRoot.ensure_built("libexec/toy-train")
554
+ # both builds and locates it. Deps list every transitive require the runner
555
+ # pulls (the recipe → llama_seq_engine → transformer + toy + smollm2 +
556
+ # tinynn + the L1-L3 primitives/blocks/archs; plus gguf_writer + drift_grad
557
+ # for the checkpoint). CPU-only; NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
558
+ libexec/toy-train: lib/toy/run/train.rb lib/toy/dev/toy_describe_flow.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
559
+ vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb \
560
+ lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb \
561
+ lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb \
562
+ lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb \
563
+ lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
564
+ lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
565
+ lib/toy/llm/primitives/rms_norm.rb lib/toy/llm/primitives/rope.rb \
566
+ lib/toy/llm/primitives/swiglu.rb lib/toy/llm/primitives/gqa.rb \
567
+ lib/toy/llm/blocks/transformer_block.rb lib/toy/llm/archs/llama_arch.rb \
568
+ lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
569
+ $(SPINEL) $< -o $@
570
+ toy-train: libexec/toy-train
571
+
572
+ # `toy train lora` DEDICATED runner. Separate binary from toy-train: the
573
+ # LoRA realize_for_mmap path cannot share a Spinel compilation unit with the
574
+ # random-init path (cfg type-merge miscompile; see lib/toy/run/train_lora.rb
575
+ # header). CPU-only; NOT in MIRRORABLE.
576
+ libexec/toy-train-lora: lib/toy/run/train_lora.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
577
+ lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb \
578
+ lib/toy/llm/adamw.rb \
579
+ lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
580
+ lib/toy/llm/primitives/rms_norm.rb lib/toy/llm/primitives/rope.rb \
581
+ lib/toy/llm/primitives/swiglu.rb lib/toy/llm/primitives/gqa.rb \
582
+ lib/toy/llm/blocks/transformer_block.rb lib/toy/llm/archs/llama_arch.rb \
583
+ lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
584
+ $(SPINEL) $< -o $@
585
+ toy-train-lora: libexec/toy-train-lora
586
+
587
+ # `toy train from-scratch --arch gpt2` DEDICATED runner. Separate binary from
588
+ # toy-train (landmine #16: the GPT-2 realize path can't share a Spinel unit with
589
+ # the llama random-init path). Self-contained GPT2SeqEngine (no llama engine /
590
+ # primitives dep), so it also can't churn the llama gates. CPU-only this slice.
591
+ libexec/toy-train-gpt2: lib/toy/run/train_gpt2.rb lib/toy.rb \
592
+ lib/toy/llm/engine/gpt2_seq_engine.rb lib/toy/llm/labels.rb lib/toy/llm/adamw.rb \
593
+ lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
594
+ $(SPINEL) $< -o $@
595
+ toy-train-gpt2: libexec/toy-train-gpt2
596
+ .PHONY: toy-train-gpt2
597
+
598
+ # CUDA twin of toy-train-gpt2 (`--arch gpt2 --device cuda`). SEPARATE single-type
599
+ # binary (landmine #16): links the generated CUDA engine mirror + the CUDA TinyNN
600
+ # shim; the GELU/LayerNorm backward ops fall back to the CPU backend via the
601
+ # scheduler (no CUDA kernel). lib/toy/ffi/tinynn.rb + transformer.rb stay in deps (Mat /
602
+ # CPU-TinyNN seam). NOT in MIRRORABLE (the engine mirror IS; the runner is hand-written).
603
+ libexec/toy-train-gpt2-cuda: lib/toy/run/train_gpt2_cuda.rb lib/toy.rb \
604
+ lib/toy/llm/engine/gpt2_seq_engine_cuda.rb lib/toy/models/transformer.rb \
605
+ lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
606
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
607
+ toy-train-gpt2-cuda: libexec/toy-train-gpt2-cuda
608
+ .PHONY: toy-train-gpt2-cuda
609
+
610
+ # Metal twin (`--arch gpt2 --device metal`), macOS ONLY. Same structure; links
611
+ # the generated Metal engine mirror + the Metal TinyNN shim + Apple frameworks.
612
+ # gx10 RUNTIME-UNVERIFIED (codegen + structural parity here; runtime-gate on Mac).
613
+ libexec/toy-train-gpt2-metal: lib/toy/run/train_gpt2_metal.rb lib/toy.rb \
614
+ lib/toy/llm/engine/gpt2_seq_engine_metal.rb lib/toy/models/transformer.rb \
615
+ lib/toy/ffi/tinynn_metal.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
616
+ ifneq ($(UNAME_S),Darwin)
617
+ @echo "toy-train-gpt2-metal: macOS-only"; exit 1
618
+ endif
619
+ $(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
620
+ toy-train-gpt2-metal: libexec/toy-train-gpt2-metal
621
+ .PHONY: toy-train-gpt2-metal
622
+
623
+ # P4/vit — ViT-Tiny from-scratch CPU TRAINING runner. SEPARATE binary
624
+ # (landmine #16): ViTTinyConfig must NOT share a Spinel compilation unit
625
+ # with SmolLM2Config. Source lib/toy/run/train_vit.rb; binary path EQUALS
626
+ # the make target. Reads STEPS/SEED/IMG_DIR/TAO_RUN_DIR/TOY_RUN_ID from ENV;
627
+ # trains random-init on the COMMITTED data/vit_smoke corpus. NO toy_gguf_writer
628
+ # dep (cfg.vocab/d_ff poly-collide with ViTTinyConfig — #169 checkpoint
629
+ # follow-up). CPU-only; absent from MIRRORABLE (no CUDA/Metal twin this slice).
630
+ libexec/toy-train-vit: lib/toy/run/train_vit.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/vit_tiny.rb \
631
+ lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb \
632
+ lib/toy/io/toy_image_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/train/toy_drift_grad.rb \
633
+ lib/toy/llm/adamw.rb \
634
+ lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
635
+ $(SPINEL) $< -o $@
636
+ toy-train-vit: libexec/toy-train-vit
637
+
638
+ # P4/GPU — from-scratch CUDA TRAINING runner. CUDA twin of libexec/toy-train,
639
+ # from-scratch ONLY (warm_start dropped). SINGLE-TYPE binary (landmine #16):
640
+ # TinyNNCuda is the compute path; lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in
641
+ # deps because transformer.rb requires tinynn -> defines CPU TinyNN for the
642
+ # checkpoint write/fuse/drift seam (dropping them breaks the writer). Links
643
+ # the CUDA ggml backend via -Wl,-u,tnn_cuda_force_link (every cuda target).
644
+ # CPU-only; NOT in MIRRORABLE (hand-written, see prep/gen_cuda_mirror.rb).
645
+ libexec/toy-train-cuda: lib/toy/run/train_cuda.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
646
+ lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb \
647
+ lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch_cuda.rb \
648
+ lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start_cuda.rb \
649
+ lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
650
+ lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/models/transformer.rb \
651
+ lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
652
+ lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
653
+ lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
654
+ lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
655
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
656
+ toy-train-cuda: libexec/toy-train-cuda
657
+
658
+ # P4/GPU — LoRA CUDA TRAINING runner. CUDA twin of libexec/toy-train-lora.
659
+ # SEPARATE binary from libexec/toy-train-cuda: the LoRA realize_for_mmap path
660
+ # cannot share a Spinel compilation unit with the random-init path (cfg
661
+ # type-merge miscompile; landmine #16 — same reason toy-train-lora is split
662
+ # from toy-train). SINGLE-TYPE binary: TinyNNCuda is the compute path;
663
+ # lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in deps because transformer.rb
664
+ # requires tinynn -> defines CPU TinyNN for the checkpoint write seam
665
+ # (ToyDriftGrad.params downloads via CPU TinyNN). toy_gguf_fuse is NOT a dep
666
+ # (lora uses ToyDriftGrad.params, not the lens-fold path). Links the CUDA
667
+ # ggml backend via -Wl,-u,tnn_cuda_force_link. NOT in MIRRORABLE (hand-written).
668
+ libexec/toy-train-lora-cuda: lib/toy/run/train_lora_cuda.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
669
+ lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora_cuda.rb \
670
+ lib/toy/llm/adamw.rb \
671
+ lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
672
+ lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
673
+ lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
674
+ lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
675
+ lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
676
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
677
+ toy-train-lora-cuda: libexec/toy-train-lora-cuda
678
+
679
+ # P4/GPU — from-scratch METAL TRAINING runner (macOS ONLY). Metal twin of
680
+ # libexec/toy-train-cuda, from-scratch ONLY. SINGLE-TYPE binary (landmine #16):
681
+ # TinyNNMetal is the compute path; lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in
682
+ # deps because transformer.rb requires tinynn -> defines CPU TinyNN for the
683
+ # checkpoint write/fuse/drift seam (dropping them breaks the writer). The macOS
684
+ # guard MUST come first so Linux/gx10 never touches the Apple frameworks; the
685
+ # metal --cc recipe links Foundation/Metal/MetalKit with _tnn_metal_force_link
686
+ # (leading underscore, macOS symbol convention). libtinynn_ggml.a (CPU archive)
687
+ # stays in deps for the write seam + base ggml. NOT in MIRRORABLE (hand-written).
688
+ # gx10 RUNTIME-UNVERIFIED — pin baseline + gate on the Mac.
689
+ libexec/toy-train-metal: lib/toy/run/train_metal.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
690
+ lib/toy/llm/engine/llama_seq_engine_metal.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch_metal.rb \
691
+ lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
692
+ lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/models/transformer.rb \
693
+ lib/toy/llm/primitives/rms_norm_metal.rb lib/toy/llm/primitives/rope_metal.rb \
694
+ lib/toy/llm/primitives/swiglu_metal.rb lib/toy/llm/primitives/gqa_metal.rb \
695
+ lib/toy/llm/blocks/transformer_block_metal.rb lib/toy/llm/archs/llama_arch_metal.rb \
696
+ lib/toy/ffi/tinynn_metal.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
697
+ ifneq ($(UNAME_S),Darwin)
698
+ @echo "toy-train-metal: macOS-only"; exit 1
699
+ endif
700
+ $(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
701
+ toy-train-metal: libexec/toy-train-metal
702
+
703
+ # P4 — `toy serve` PERSISTENT compute runner (OpenAI-compatible HTTP).
704
+ # Unlike infer/train/eval (compute-once), this runner blocks in Tep.run!.
705
+ # Spinel source lib/toy/run/serve.rb; the binary path EQUALS the make
706
+ # target so ToyRoot.ensure_built("libexec/toy-serve") both builds and
707
+ # locates it. The endpoint logic moved out of tep_demo/openai_api_llama.rb
708
+ # into lib/toy/serve/openai/* (Server/State + handlers + the embeddings
709
+ # handler; JSON via SpinelKit::Json, toy#44). vendor/spinel/tep/lib/tep.rb is the TEP BUILD-DEP
710
+ # edge — Tep is consumed purely as transport (built by `make vendor-tep`
711
+ # on a fresh tree; needs ../tep + ../spinelgems siblings). Deps mirror the
712
+ # tep_demo recipe (Makefile:486) + the KV stack. CPU-only; NOT in
713
+ # MIRRORABLE (see prep/gen_cuda_mirror.rb).
714
+ libexec/toy-serve: lib/toy/run/serve.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb \
715
+ lib/toy/serve/openai/server.rb \
716
+ lib/toy/serve/openai/handlers.rb lib/toy/serve/openai/embeddings_handler.rb \
717
+ vendor/spinel/tep/lib/tep.rb \
718
+ lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb \
719
+ tinynn/libtinynn_ggml.a | libexec
720
+ $(SPINEL) $< -o $@
721
+ toy-serve: libexec/toy-serve
722
+
723
+ # toy#gguf-checkpoint-reload (#153) — smoke binary that loads a
724
+ # from-scratch toy GGUF and runs a tiny generation. No tokenizer.
725
+ prep/smokes/smoke_toy_ckpt_reload: prep/smokes/smoke_toy_ckpt_reload.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a
726
+ $(SPINEL) $< -o $@
727
+
728
+ # toy#embed-api (#145) — smoke for ToyLM#embed_lookup.
729
+ prep/smokes/smoke_embed_api: prep/smokes/smoke_embed_api.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a
730
+ $(SPINEL) $< -o $@
731
+
732
+ # P1 framework refactor — runtime Card derivation smoke. Loads a
733
+ # llama-family GGUF, realizes the seq-mode cache, derives a
734
+ # structural Toy::Card via ToyDescribeFlow.card, prints + gates.
735
+ prep/smokes/smoke_card_derive: prep/smokes/smoke_card_derive.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/dev/toy_card.rb tinynn/libtinynn_ggml.a
736
+ $(SPINEL) $< -o $@
737
+
738
+ # toy#decode-logprobs (#151) — smoke for ToyLM#decode_step_with_logprobs.
739
+ prep/smokes/smoke_decode_logprobs: prep/smokes/smoke_decode_logprobs.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a
740
+ $(SPINEL) $< -o $@
741
+
742
+ # GH#18 — LMC interpolate-and-eval runner.
743
+ examples/example_lmc: examples/legacy/08_lmc.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
744
+ $(SPINEL) $< -o $@
745
+ example_lmc: examples/example_lmc
746
+
747
+ # E2.3 (towards GH#14) — projection-lens smoke.
748
+ prep/smokes/smoke_projection_lens: prep/smokes/smoke_projection_lens.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
749
+ $(SPINEL) $< -o $@
750
+
751
+ # toy#42 full-API require gate. Compiling this proves lib/toy/compute.rb's whole
752
+ # surface (all three engines + recipes + loaders) co-compiles in one program;
753
+ # running it realizes a LlamaSeqEngine to prove the surface is live. The prereq
754
+ # is just lib/toy/compute.rb — it pulls everything else transitively, and
755
+ # $(SPINEL) follows the require graph.
756
+ prep/smokes/smoke_compute_surface: prep/smokes/smoke_compute_surface.rb lib/toy/compute.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
757
+ $(SPINEL) $< -o $@
758
+
759
+ # toy#64 item 8 — the CUDA compute entry (lib/toy/compute_cuda.rb), the
760
+ # consumer-ish device-at-compile-time gate. Same shape as the CPU
761
+ # compute-surface gate but requires compute_cuda + links the CUDA
762
+ # archives with the force-link flag. The generated CUDA mirrors in the
763
+ # dep list are kept fresh by the $(MIRROR_CUDA) pattern rules.
764
+ prep/smokes/smoke_compute_surface_cuda: prep/smokes/smoke_compute_surface_cuda.rb lib/toy/compute_cuda.rb \
765
+ lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb \
766
+ lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/gpt2_seq_engine_cuda.rb \
767
+ lib/toy/llm/engine/llama_kv_engine_cuda.rb \
768
+ lib/toy/llm/recipes/from_scratch_cuda.rb lib/toy/llm/recipes/warm_start_cuda.rb \
769
+ lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
770
+ lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
771
+ lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
772
+ lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
773
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
774
+
775
+ # P2.6 — GQA-divergent (w_o) gate. Realizes a config with head_dim=24 so
776
+ # n_heads*head_dim (96) != d_model (64), proving the divergent w_o shape
777
+ # [d_model, n_heads*head_dim] allocates and runs forward+backward.
778
+ prep/smokes/smoke_gate_gqa_divergent: prep/smokes/smoke_gate_gqa_divergent.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
779
+ $(SPINEL) $< -o $@
780
+
781
+ # P2.6 — llama3 RoPE post-rope TENSOR parity gate. Builds a standalone
782
+ # post-rope subgraph from the SAME public primitive (RoPE.apply_2d) the
783
+ # model's K/Q paths call, with a NON-NULL, NON-TRIVIAL llama3 freq_factors
784
+ # ptr (computed via Toy::RopeScaling.compute_llama3_freq_factors). Logit-
785
+ # level is rope-angle-INSENSITIVE, so the gate taps the post-rope tensor:
786
+ # asserts (a) freq_factors non-uniform / kind==:llama3, (b) post-rope output
787
+ # byte-identical run-to-run, plus a contrast guard vs :none (NULL factors).
788
+ # No model file, no lib/ change, no mirror regen. Run from repo root.
789
+ prep/smokes/smoke_gate_llama3_tensor: prep/smokes/smoke_gate_llama3_tensor.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
790
+ $(SPINEL) $< -o $@
791
+
792
+ # P2.6 — B>1 (micro-batch) gate. Realizes with t_batch=2 so @seq_b=2,
793
+ # forcing the block-causal mask alloc + upload (gated on @seq_b>1) and the
794
+ # soft_max_ext attention path (gqa.rb:50). Proves the batched graph
795
+ # allocates the [T*B,T*B] mask and runs forward+backward; records a
796
+ # reproducible loss baseline. MUST run from repo root (data/ts_seqs.txt).
797
+ prep/smokes/smoke_gate_b_gt_1: prep/smokes/smoke_gate_b_gt_1.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
798
+ $(SPINEL) $< -o $@
799
+
800
+ # P2.6 — L4 FromScratch recipe gate. Drives the same random-init config
801
+ # as smoke_projection_lens THROUGH Toy::LLM::Recipes::FromScratch; its
802
+ # loss curve must byte-equal the projection-lens reference.
803
+ prep/smokes/smoke_recipe_from_scratch: prep/smokes/smoke_recipe_from_scratch.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
804
+ $(SPINEL) $< -o $@
805
+
806
+ # BLESSED from-scratch path — the short tutorial. Same gate-fixture
807
+ # config as smoke_recipe_from_scratch, but the clean tutorial read using
808
+ # the value objects (Toy::SmolLM2Config.mha + Toy::Labels + Toy::AdamW).
809
+ examples/example_train_from_scratch_blessed: examples/legacy/train_from_scratch.rb lib/toy.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
810
+ $(SPINEL) $< -o $@
811
+ example_train_from_scratch_blessed: examples/example_train_from_scratch_blessed
812
+
813
+ # ── Curated examples (toy#60) — the narrated teaching set. One file,
814
+ # one make target, one binary each; see examples/README.md for the tour.
815
+ # 01 — from-scratch on the bundled tiny corpus via the one-require
816
+ # compute surface + the named value objects. THE showcase; the example
817
+ # in docs/framework.md must stay truthful to this file.
818
+ examples/example_01_train_tiny: examples/01_train_tiny.rb lib/toy/compute.rb lib/toy/io/toy_corpus_loader.rb lib/toy/io/run_bundle.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
819
+ $(SPINEL) $< -o $@
820
+ example_01: examples/example_01_train_tiny
821
+ .PHONY: example_01
822
+
823
+ # 02 — warm-start fine-tune: donor token_embd from a real GGUF through
824
+ # Toy::LLM::Recipes::WarmStart (realize_scratch! → realize_warm! → build!).
825
+ examples/example_02_finetune_warm_start: examples/02_finetune_warm_start.rb lib/toy/compute.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
826
+ $(SPINEL) $< -o $@
827
+ example_02: examples/example_02_finetune_warm_start
828
+ .PHONY: example_02
829
+
830
+ # 03 — LoRA adapters over a frozen mmap'd base GGUF, via the one-require
831
+ # compute surface (lora re-added to it by toy#52).
832
+ examples/example_03_lora: examples/03_lora.rb lib/toy/compute.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
833
+ $(SPINEL) $< -o $@
834
+ example_03: examples/example_03_lora
835
+ .PHONY: example_03
836
+
837
+ # 04 — load a GGUF, KV-cache decode, print text (the llama_kv_engine
838
+ # path the `toy infer` runner drives).
839
+ examples/example_04_generate: examples/04_generate.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
840
+ $(SPINEL) $< -o $@
841
+ example_04: examples/example_04_generate
842
+ .PHONY: example_04
843
+
844
+ # 05 — per-token logprobs at a decode position (the `toy eval` compute).
845
+ examples/example_05_eval_logprobs: examples/05_eval_logprobs.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
846
+ $(SPINEL) $< -o $@
847
+ example_05: examples/example_05_eval_logprobs
848
+ .PHONY: example_05
849
+
850
+ # 06 — CRuby, NOT compiled: Toy::RunLog comparison table over runs/.
851
+ example_06:
852
+ ruby examples/06_runlog_compare.rb
853
+ .PHONY: example_06
854
+
855
+ # 07 — ViT-Tiny on the committed data/vit_smoke corpus via Recipes::VitTiny.
856
+ examples/example_07_vit_tiny: examples/07_vit_tiny.rb lib/toy/compute.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/llm/recipes/vit_tiny.rb lib/toy/models/toy_vit.rb lib/toy/io/toy_image_loader.rb lib/toy/io/run_bundle.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/classify_batch.rb lib/toy/llm/recipe_options.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
857
+ $(SPINEL) $< -o $@
858
+ example_07: examples/example_07_vit_tiny
859
+ .PHONY: example_07
860
+
861
+ examples-curated: example_01 example_02 example_03 example_04 example_05 example_07
862
+ .PHONY: examples-curated
863
+
864
+ # L4 LoRA recipe gate. Drives the same LoRA fine-tune config as the
865
+ # frozen reference 03_finetune_lora THROUGH Toy::LLM::Recipes::LoRA; its
866
+ # loss curve must byte-equal the reference at the fixed config.
867
+ prep/smokes/smoke_recipe_lora: prep/smokes/smoke_recipe_lora.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
868
+ $(SPINEL) $< -o $@
869
+
870
+ # L4 WarmStart recipe gate. Drives the same warm-start config as the
871
+ # frozen reference 09_warm_start_train (INIT=scratch) THROUGH
872
+ # Toy::LLM::Recipes::WarmStart; its loss curve must byte-equal 09's at
873
+ # the fixed config (SEED=0 STEPS=5). The fixture drives the cosine LR
874
+ # schedule + streaming corpus loader (deps below); the recipe stays thin.
875
+ prep/smokes/smoke_recipe_warm_start: prep/smokes/smoke_recipe_warm_start.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
876
+ $(SPINEL) $< -o $@
877
+
878
+ # P2.6 gate — GGUF F32 mmap round-trip parity. Head-fuses a random_init
879
+ # model into the FUSED llama.cpp naming, writes a GGUF, reloads via
880
+ # realize_for_mmap, and asserts the reloaded forward is BIT-IDENTICAL to
881
+ # the in-memory forward. This is the behavioral gate for realize_for_mmap
882
+ # (previously only realize_for_random_init was gated). CPU-only: the GGUF
883
+ # WRITE half reads host data ptrs (tnn_gguf_w_add_tensor), which the CUDA
884
+ # writer doesn't implement — do NOT auto-mirror this to CUDA.
885
+ prep/smokes/smoke_gguf_roundtrip: prep/smokes/smoke_gguf_roundtrip.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/train/toy_gguf_writer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
886
+ $(SPINEL) $< -o $@
887
+
888
+ prep/smokes/smoke_full_finetune: prep/smokes/smoke_full_finetune.rb lib/toy/llm/adamw.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
889
+ $(SPINEL) $< -o $@
890
+
891
+ # P2.6 gate — qkv_bias mmap branch. Loads the real Qwen2.5-0.5B native GGUF
892
+ # (which DOES carry blk.N.attn_{q,k,v}.bias) and realizes via
893
+ # realize_for_mmap with qkv_bias=TRUE, untied=FALSE (output.weight absent =>
894
+ # tied), forcing the bias mmap branch (llama_seq_engine.rb:635-661) and
895
+ # its transformer_block tnn_add consumer — neither hit by smoke_gguf_roundtrip
896
+ # (qkv_bias=FALSE). Records a deterministic finite-logit baseline. CPU-only;
897
+ # DATA DEPENDENCY: data/qwen25-0.5b-native.gguf (not self-contained). MUST run
898
+ # from repo root. Do NOT auto-mirror to CUDA.
899
+ prep/smokes/smoke_gate_qkv_bias: prep/smokes/smoke_gate_qkv_bias.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
900
+ $(SPINEL) $< -o $@
901
+
902
+ # P2.6 gate — Q8-stays-Q8 realize_for_q8_copy branch. Loads the existing
903
+ # Q8 GGUF, asserts blk.0 attn_q weight stays Q8_0 in memory (NOT dequant
904
+ # to F32), deterministic forward x2 byte-identical baseline. Pure-Ruby
905
+ # fixture (no toy_drift_grad dep; seq_blocks_ffi directly).
906
+ prep/smokes/smoke_gate_q8_preserve: prep/smokes/smoke_gate_q8_preserve.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
907
+ $(SPINEL) $< -o $@
908
+
909
+ # P2.6 CUDA gate — GPU mirror of the projection-lens smoke. Exercises
910
+ # realize_for_random_init + seq forward on the CUDA backend so the
911
+ # realize-path refactor can be parity-gated on GPU (CUDA self-consistency
912
+ # before/after; CUDA floats don't bit-equal CPU). Mirror auto-generated
913
+ # by prep/gen_cuda_mirror.rb. Same force-link recipe as the 06 CUDA entry.
914
+ prep/smokes/smoke_projection_lens_cuda: prep/smokes/smoke_projection_lens_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
915
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
916
+
917
+ # E2.4 (towards GH#14) — streaming corpus loader + cosine LR smoke.
918
+ prep/smokes/smoke_corpus_loader: prep/smokes/smoke_corpus_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb tinynn/libtinynn_ggml.a
919
+ $(SPINEL) $< -o $@
920
+
921
+ # E2.5 (towards GH#14) — warm-start training driver.
922
+ examples/example_warm_start_train: examples/legacy/09_warm_start_train.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
923
+ $(SPINEL) $< -o $@
924
+ example_warm_start_train: examples/example_warm_start_train
925
+
926
+ # Auto-generated coverage matrix — ggml ops vs our FFI surface.
927
+ # Sources are vendor/ggml/include/ggml.h, tinynn/tinynn_ggml.c, and the
928
+ # two FFI binding files. See docs/coverage.md for the matrix.
929
+ coverage: docs/coverage.md
930
+ docs/coverage.md: prep/gen_coverage.rb vendor/ggml/include/ggml.h \
931
+ tinynn/tinynn_ggml.c lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb \
932
+ lib/toy/ffi/tinynn_metal.rb
933
+ ruby prep/gen_coverage.rb
934
+ coverage-check:
935
+ ruby prep/gen_coverage.rb --check
936
+ .PHONY: coverage coverage-check
937
+
938
+ examples/example_train: examples/legacy/02_train_custom_gpt.rb lib/toy/models/transformer.rb lib/toy/train/training.rb lib/toy/train/toy_trainer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
939
+ $(SPINEL) $< -o $@
940
+ example_train: examples/example_train
941
+
942
+ examples/example_finetune: examples/legacy/03_finetune_lora.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
943
+ $(SPINEL) $< -o $@
944
+ example_finetune: examples/example_finetune
945
+
946
+ # CUDA mirror — same source, swap TinyNN → TinyNNCuda by including
947
+ # both libs. The example source uses TinyNN; the CUDA build link-step
948
+ # carries CUDA symbols too (no source change). For real GPU speedup
949
+ # users typically write a `_cuda` variant; this mirror is for the
950
+ # build-recipe story.
951
+ examples/example_finetune_cuda: examples/legacy/03_finetune_lora_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
952
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
953
+ example_finetune_cuda: examples/example_finetune_cuda
954
+
955
+ # Metal mirror of example_inference (macOS only). Uses TinyNNMetal.
956
+ # Same -Wl,-u trick as CUDA so the Metal backend init survives
957
+ # weak-symbol resolution. macOS expects a leading underscore on
958
+ # external symbols, hence `-Wl,-u,_tnn_metal_force_link`.
959
+ # Frameworks (Foundation/Metal/MetalKit) are linked via -framework.
960
+ examples/example_inference_metal: examples/legacy/01_inference_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a
961
+ ifneq ($(UNAME_S),Darwin)
962
+ @echo "example_inference_metal: macOS-only"; exit 1
963
+ endif
964
+ $(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
965
+ example_inference_metal: examples/example_inference_metal
966
+
967
+ # DEVICE-aware entry point. Toy's per-backend Spinel binaries can't
968
+ # share a Ruby file (poly-dispatch landmines on LlamaSeqForwardFFICache
969
+ # vs *Cuda), so the entry point is a shell-script dispatcher.
970
+ #
971
+ # Today only DEVICE=cpu is supported for from-scratch training:
972
+ # LlamaSeqForwardFFICacheCuda implements realize_for_mmap (LoRA /
973
+ # fine-tune from a base GGUF) but NOT realize_for_random_init.
974
+ # Adding CUDA random-init is a real feature — tracked under
975
+ # toy#train-device-select-cuda follow-up. The dispatcher errors
976
+ # cleanly on DEVICE=cuda so Tao's `run_start.backend.kind=="cuda"`
977
+ # acceptance fails honestly rather than silently emitting cpu data.
978
+ examples/example_train_from_scratch_cpu: examples/legacy/06_train_from_scratch.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/dev/toy_tap.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
979
+ $(SPINEL) $< -o $@
980
+ examples/example_train_from_scratch_cuda: examples/legacy/06_train_from_scratch_cuda.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/dev/toy_tap.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
981
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
982
+ examples/example_train_from_scratch: examples/example_train_from_scratch_cpu
983
+ @printf '#!/bin/sh\n# Auto-generated by Makefile. DEVICE selects the backend binary.\n# Edit examples/legacy/06_train_from_scratch.rb (cpu) for behaviour; CUDA mirror is auto-generated by prep/gen_cuda_mirror.rb.\ncase "$${DEVICE:-cpu}" in\n cpu|"") exec "$$(dirname "$$0")/example_train_from_scratch_cpu" "$$@" ;;\n cuda) exec "$$(dirname "$$0")/example_train_from_scratch_cuda" "$$@" ;;\n metal) echo "DEVICE=metal not yet supported for training (inference only)" >&2; exit 2 ;;\n *) echo "DEVICE=$${DEVICE} not recognised (want cpu|cuda)" >&2; exit 2 ;;\nesac\n' > $@
984
+ @chmod +x $@
985
+ example_train_from_scratch: examples/example_train_from_scratch
986
+ example_train_from_scratch_cuda: examples/example_train_from_scratch_cuda
987
+
988
+ # GPT-2 from-scratch via the GPT2SeqEngine library API (the curated GPT-2 demo;
989
+ # CLI surface is `toy train from-scratch --arch gpt2`). Memorizes a synthetic
990
+ # sequence so CE visibly collapses; exercises the vendored LayerNorm/GELU kernels.
991
+ examples/gpt2_train: examples/legacy/gpt2_train.rb lib/toy.rb \
992
+ lib/toy/llm/engine/gpt2_seq_engine.rb lib/toy/models/transformer.rb \
993
+ lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
994
+ $(SPINEL) $< -o $@
995
+ gpt2_train: examples/gpt2_train
996
+ .PHONY: gpt2_train
997
+
998
+ examples: toy-infer example_train example_train_from_scratch gpt2_train
999
+
1000
+ # Phase 0.6 — CUDA-mirror generator. The CPU file is the source of
1001
+ # truth; the CUDA file is auto-generated by prep/gen_cuda_mirror.rb.
1002
+ # `make verify-mirrors` exits non-zero if any committed CUDA mirror
1003
+ # has drifted from what the generator would produce.
1004
+ gen-mirrors:
1005
+ @ruby prep/gen_cuda_mirror.rb
1006
+
1007
+ # Mirrors are off-disk build artifacts (gitignored), so there is no committed
1008
+ # copy to drift against. verify-mirrors now regenerates every mirror (incl. the
1009
+ # Metal twins, which no Linux build consumes) and then re-runs the generator in
1010
+ # --verify mode: this asserts the generator is healthy and IDEMPOTENT (generate
1011
+ # == verify), the only invariant left once nothing is committed.
1012
+ verify-mirrors:
1013
+ @ruby prep/gen_cuda_mirror.rb
1014
+ @ruby prep/gen_cuda_mirror.rb --verify
1015
+
1016
+ # Mirrors generated at build time (off-disk; gitignored). Every runner rule
1017
+ # lists the mirror .rb as a prerequisite, so Make regenerates it on demand from
1018
+ # the CPU source of truth + the generator. `--backend` writes one backend, so
1019
+ # each target rebuilds exactly itself. These mirror MIRRORABLE in
1020
+ # prep/gen_cuda_mirror.rb — keep the two lists in sync. STATIC pattern rules
1021
+ # (targets restricted to this explicit list) so hand-written mirrors like
1022
+ # lib/toy/ffi/tinynn_cuda.rb / lib/toy/models/transformer_lm_cuda.rb are NOT captured.
1023
+ MIRROR_CUDA := \
1024
+ lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
1025
+ lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
1026
+ lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
1027
+ lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/gpt2_seq_engine_cuda.rb \
1028
+ lib/toy/llm/recipes/from_scratch_cuda.rb lib/toy/llm/recipes/lora_cuda.rb \
1029
+ lib/toy/llm/recipes/warm_start_cuda.rb \
1030
+ lib/toy/llm/engine/llama_kv_engine_cuda.rb \
1031
+ lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb \
1032
+ examples/legacy/06_train_from_scratch_cuda.rb prep/smokes/smoke_projection_lens_cuda.rb
1033
+ MIRROR_METAL := $(MIRROR_CUDA:_cuda.rb=_metal.rb)
1034
+
1035
+ $(MIRROR_CUDA): %_cuda.rb: %.rb prep/gen_cuda_mirror.rb
1036
+ @ruby prep/gen_cuda_mirror.rb --backend cuda $<
1037
+ $(MIRROR_METAL): %_metal.rb: %.rb prep/gen_cuda_mirror.rb
1038
+ @ruby prep/gen_cuda_mirror.rb --backend metal $<
1039
+
1040
+ # Parity-checks vs native TransformerLM.forward.
1041
+
1042
+ # Tep+Spinel HTTP server demos. See tep_demo/README.md. Builds bypass
1043
+ # tep's translator (we use spinel directly on the spinelgems-vendored
1044
+ # tep tree at vendor/spinel/tep/lib/, produced by `make vendor-tep`).
1045
+ tep_demo/hello: tep_demo/hello_api.rb vendor/spinel/tep/lib/tep.rb
1046
+ $(SPINEL) tep_demo/hello_api.rb -o tep_demo/hello
1047
+
1048
+ # Inference API: /generate?n=N runs greedy generation via FullForwardFFICache.
1049
+ tep_demo/api: tep_demo/legacy/inference_api.rb vendor/spinel/tep/lib/tep.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1050
+ $(SPINEL) tep_demo/legacy/inference_api.rb -o tep_demo/api
1051
+
1052
+ # --- ggml vendor ------------------------------------------------------------
1053
+ # Vendor patches that must land before any ggml build target. See
1054
+ # vendor-patches/README.md for the per-patch rationale.
1055
+ GGML_PATCHES := \
1056
+ vendor-patches/0001-cuda-buffer_from_ptr.patch \
1057
+ vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch \
1058
+ vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch \
1059
+ vendor-patches/0004-cuda-cpy-strided.patch \
1060
+ vendor-patches/0005-concat-backward.patch \
1061
+ vendor-patches/0006-getrows-back-large-vocab.patch \
1062
+ vendor-patches/0007-gpt2-backward-kernels.patch \
1063
+ vendor-patches/0008-mul-mat-backward-mixed-precision.patch \
1064
+ vendor-patches/0009-sched-unsupported-node-diagnostic.patch
1065
+
1066
+ # Sentinel file marking that all $(GGML_PATCHES) have been applied to
1067
+ # the vendored tree. Build targets depend on it through CMakeLists.txt
1068
+ # (which depends on this sentinel) so a fresh clone applies the patches
1069
+ # exactly once, and re-runs of `make setup-ggml` are no-ops as long as
1070
+ # the patch set is unchanged.
1071
+ $(GGML_DIR)/.patched: $(GGML_DIR)/CMakeLists.txt $(GGML_PATCHES)
1072
+ @echo " reset vendor/ggml to upstream HEAD (build/ untouched)"
1073
+ @cd $(GGML_DIR) && git reset --hard HEAD >/dev/null
1074
+ @cd $(GGML_DIR) && for p in $(GGML_PATCHES); do \
1075
+ echo " apply $$p"; \
1076
+ git apply "$(CURDIR)/$$p" || { echo " FAILED"; exit 1; }; \
1077
+ done
1078
+ touch $@
1079
+
1080
+ $(GGML_DIR)/CMakeLists.txt:
1081
+ mkdir -p vendor
1082
+ git init -q $(GGML_DIR)
1083
+ cd $(GGML_DIR) && git remote add origin $(GGML_REPO) \
1084
+ && git fetch -q --depth 1 origin $(GGML_REV) \
1085
+ && git checkout -q FETCH_HEAD
1086
+
1087
+ # GGML_OPENMP=OFF: avoid the libgomp link dependency. On macOS clang
1088
+ # ships libomp (LLVM), not libgomp (GNU); ggml's own thread pool covers
1089
+ # CPU parallelism either way. Same setting used on Linux for build
1090
+ # parity (and so lib/toy/ffi/tinynn.rb doesn't need ffi_lib "gomp").
1091
+ #
1092
+ # Build output is routed through prep/progress, which:
1093
+ # - tees full cmake/build output to vendor/ggml/<dir>.log
1094
+ # - draws a one-line [NN%] progress bar on a TTY (plain "[NN%] msg"
1095
+ # lines on CI / non-tty stdout, no overdraw)
1096
+ # - on non-zero exit, dumps the last 40 lines of the log + exits
1097
+ # with the child's status. NEVER swallows errors.
1098
+ # Disable with QUIET=0 (passes through stdout unchanged).
1099
+ # (PROGRESS / QUIET / QUIETLY are defined near the top of this file
1100
+ # alongside SPINEL_BIN — see the DevEx polish knobs block.)
1101
+
1102
+ # Helper: run a `cd $(GGML_DIR) && cmake -B <DIR> <FLAGS>` configure
1103
+ # step. Routes output to a logfile when QUIET=1; on failure dumps the
1104
+ # log tail and propagates the exit code. QUIET=0 passes through.
1105
+ # Args: $(1) = build dir name (build / build-metal / build-cuda)
1106
+ # $(2) = cmake invocation (everything after the cd)
1107
+ define ggml_configure
1108
+ @if [ "$(QUIET)" = "1" ]; then \
1109
+ log="$(CURDIR)/$(GGML_DIR)/$(1).config.log"; \
1110
+ ( cd $(GGML_DIR) && $(2) ) >"$$log" 2>&1 || { \
1111
+ echo " ✗ cmake configure ($(1)) failed; tail of $$log:"; \
1112
+ tail -30 "$$log"; exit 1; }; \
1113
+ else \
1114
+ cd $(GGML_DIR) && $(2) ; \
1115
+ fi
1116
+ endef
1117
+
1118
+ # Helper: run a `cmake --build <DIR> -j<N>` step. Routes through
1119
+ # prep/progress when QUIET=1 (single-line [NN%] bar, log tee). QUIET=0
1120
+ # passes through.
1121
+ # Args: $(1) = build dir name; $(2) = label tag (cpu/metal/cuda);
1122
+ # $(3) = cmake --build command
1123
+ define ggml_build
1124
+ @if [ "$(QUIET)" = "1" ]; then \
1125
+ LOG="$(CURDIR)/$(GGML_DIR)/$(1).build.log" LABEL="ggml-$(2)" \
1126
+ $(PROGRESS) -- sh -c "cd $(GGML_DIR) && $(3)"; \
1127
+ else \
1128
+ cd $(GGML_DIR) && $(3) ; \
1129
+ fi
1130
+ endef
1131
+
1132
+ # setup-ggml-* targets are user-facing phonies; the real work happens
1133
+ # in the libggml.a sentinel rules below so re-running setup is a no-op
1134
+ # once the static archive is built. Lets `make setup` / `toy install`
1135
+ # chain through without redoing the ~5 s incremental cmake check on
1136
+ # every invocation.
1137
+ .PHONY: setup-ggml setup-ggml-cuda setup-ggml-metal
1138
+
1139
+ setup-ggml: $(GGML_DIR)/build/src/libggml.a
1140
+ setup-ggml-cuda: $(GGML_DIR)/build-cuda/src/libggml.a
1141
+ setup-ggml-metal: $(GGML_DIR)/build-metal/src/libggml.a
1142
+
1143
+ $(GGML_DIR)/build/src/libggml.a: $(GGML_DIR)/.patched
1144
+ @echo " → configure ggml (cpu)"
1145
+ $(call ggml_configure,build,$(CMAKE_ENV) cmake -B build \
1146
+ -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
1147
+ -DGGML_CUDA=OFF -DGGML_METAL=OFF -DGGML_VULKAN=OFF \
1148
+ -DGGML_OPENCL=OFF -DGGML_BLAS=OFF -DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
1149
+ -DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
1150
+ -DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
1151
+ @echo " → build ggml (cpu, $(NJOBS) jobs)"
1152
+ $(call ggml_build,build,cpu,$(CMAKE_ENV) cmake --build build -j$(NJOBS))
1153
+
1154
+ $(GGML_DIR)/build-cuda/src/libggml.a: $(GGML_DIR)/.patched
1155
+ @echo " → configure ggml (cuda, sm_$(GGML_CUDA_ARCH))"
1156
+ $(call ggml_configure,build-cuda,PATH=$(CUDA_DIR)/bin:$$PATH $(CMAKE_ENV) cmake -B build-cuda \
1157
+ -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
1158
+ -DGGML_CUDA=ON -DGGML_METAL=OFF -DGGML_VULKAN=OFF \
1159
+ -DGGML_OPENCL=OFF -DGGML_BLAS=OFF -DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
1160
+ -DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
1161
+ -DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
1162
+ -DCMAKE_CUDA_ARCHITECTURES=$(GGML_CUDA_ARCH) -DGGML_NATIVE=OFF)
1163
+ @echo " → build ggml (cuda, $(NJOBS) jobs)"
1164
+ $(call ggml_build,build-cuda,cuda,PATH=$(CUDA_DIR)/bin:$$PATH $(CMAKE_ENV) cmake --build build-cuda -j$(NJOBS))
1165
+
1166
+ # Metal build (macOS only). GGML_METAL_EMBED_LIBRARY=ON bakes the
1167
+ # .metal shader source into the static archive as raw bytes; the
1168
+ # Metal driver JIT-compiles it on first device load. This lets the
1169
+ # whole pipeline work with the Command Line Tools (xcrun metal /
1170
+ # metallib are full-Xcode-only). On a Mac with full Xcode you can
1171
+ # flip GGML_METAL_EMBED_LIBRARY=OFF for AOT-compiled kernels.
1172
+ $(GGML_DIR)/build-metal/src/libggml.a: $(GGML_DIR)/.patched
1173
+ ifneq ($(UNAME_S),Darwin)
1174
+ @echo "setup-ggml-metal: Metal is macOS-only (uname -s = $(UNAME_S))"; exit 1
1175
+ endif
1176
+ @echo " → configure ggml (metal)"
1177
+ $(call ggml_configure,build-metal,$(CMAKE_ENV) cmake -B build-metal \
1178
+ -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
1179
+ -DGGML_CUDA=OFF -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON \
1180
+ -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_BLAS=OFF \
1181
+ -DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
1182
+ -DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
1183
+ -DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
1184
+ @echo " → build ggml (metal, $(NJOBS) jobs)"
1185
+ $(call ggml_build,build-metal,metal,$(CMAKE_ENV) cmake --build build-metal -j$(NJOBS))
1186
+
1187
+ # --- tinynn shim (CPU build) ------------------------------------------------
1188
+ GGML_INC := -I$(GGML_DIR)/include -I$(GGML_DIR)/src
1189
+
1190
+ tinynn/tinynn_ggml.o: tinynn/tinynn_ggml.c tinynn/tinynn_ggml.h tinynn/tinynn_trace.h
1191
+ $(CC) $(CFLAGS) $(GGML_INC) -c $< -o $@
1192
+
1193
+ tinynn/tinynn_gguf.o: tinynn/tinynn_gguf.c tinynn/tinynn_gguf.h
1194
+ $(CC) $(CFLAGS) $(GGML_INC) -c $< -o $@
1195
+
1196
+ tinynn/tinynn_trace.o: tinynn/tinynn_trace.c tinynn/tinynn_trace.h
1197
+ $(CC) $(CFLAGS) -c $< -o $@
1198
+
1199
+ tinynn/tinynn_events.o: tinynn/tinynn_events.c tinynn/tinynn_events.h
1200
+ $(CC) $(CFLAGS) -c $< -o $@
1201
+
1202
+ tinynn/libtinynn_ggml.a: tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o
1203
+ ar $(ARFLAGS) $@ tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o
1204
+
1205
+ # --- toy#71 Stage B: the CRuby-oracle shared library ------------------------
1206
+ # tinynn objects + the static CPU ggml archives linked into ONE self-
1207
+ # contained shared object that plain MRI dlopens via Fiddle (lib/toy/mri.rb
1208
+ # native arm). PIC is already on everywhere (CFLAGS -fPIC for tinynn,
1209
+ # -DCMAKE_POSITION_INDEPENDENT_CODE=ON for ggml). -Wl,-Bsymbolic binds
1210
+ # ggml's intra-library references locally — without it the aarch64 link
1211
+ # rejects adrp relocations against ggml's C++ vtables ("may bind
1212
+ # externally"); no interposition is wanted anyway. --whole-archive keeps
1213
+ # every backend-registry object alive. Link order mirrors the Spinel
1214
+ # ffi_lib list in lib/toy/ffi/tinynn.rb (stdc++/pthread, -lm TRAILING; no
1215
+ # gomp — the CPU ggml build is -DGGML_OPENMP=OFF). CPU ONLY this stage:
1216
+ # the CUDA/Metal shims stay static-archive-only (follow-up — a
1217
+ # libtinynn_ggml_cuda_shared.so would whole-archive build-cuda/ + the CUDA
1218
+ # stub libs; Metal additionally needs a Mac to verify -dynamiclib +
1219
+ # -force_load). Artifact is gitignored (rebuild: make libtinynn_shared).
1220
+ .PHONY: libtinynn_shared
1221
+ libtinynn_shared: tinynn/libtinynn_ggml_shared.so
1222
+
1223
+ tinynn/libtinynn_ggml_shared.so: tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o $(GGML_DIR)/build/src/libggml.a
1224
+ ifeq ($(UNAME_S),Darwin)
1225
+ # macOS variant (toy#71 Stage B follow-up, Mac-verified 2026-06-12):
1226
+ # -dynamiclib for -shared; -force_load per archive for GNU ld's
1227
+ # --whole-archive (pulls every ggml object so the Fiddle backend
1228
+ # resolves all tnn_* symbols); -lc++ for libc++ (not libstdc++); no
1229
+ # -Bsymbolic (macOS two-level namespace already binds internally).
1230
+ # Output keeps the .so name the gate/Fiddle loader expects.
1231
+ $(CC) -dynamiclib -o $@ \
1232
+ tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o \
1233
+ -Wl,-force_load,$(GGML_DIR)/build/src/libggml.a \
1234
+ -Wl,-force_load,$(GGML_DIR)/build/src/libggml-cpu.a \
1235
+ -Wl,-force_load,$(GGML_DIR)/build/src/libggml-base.a \
1236
+ -lc++ -lpthread -lm
1237
+ else
1238
+ $(CC) -shared -Wl,-Bsymbolic -o $@ \
1239
+ tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o \
1240
+ -L$(GGML_DIR)/build/src \
1241
+ -Wl,--whole-archive -lggml -lggml-cpu -lggml-base -Wl,--no-whole-archive \
1242
+ -lstdc++ -lpthread -lm
1243
+ endif
1244
+
1245
+ # --- smoke test -------------------------------------------------------------
1246
+ # Builds tinynn/smoke.rb against the CPU shim. Requires `setup-ggml` to have
1247
+ # been run once first.
1248
+ # --- gem release prep (toy#45) ----------------------------------------------
1249
+ # The gem ships PRISTINE pinned ggml (patches apply at the consumer's vendor
1250
+ # step), so reset the working tree's ggml before `gem build`. Re-run setup-ggml
1251
+ # afterwards to restore the dev build. Also materialize the generated CUDA
1252
+ # mirrors (gitignored; toy.gemspec ships lib/toy/llm/*_cuda.rb explicitly) —
1253
+ # without them the gem's compute_cuda.rb requires point at missing files and
1254
+ # Spinel silently compiles them to nothing (toy#70 finding).
1255
+ # NB: reset to GGML_REV explicitly — NEVER FETCH_HEAD ("whatever was
1256
+ # fetched last"): a cold-fetch test moved FETCH_HEAD to ggml master and
1257
+ # this target silently staged UNVERIFIED ggml sources into the gem
1258
+ # (caught at the v0.8.0 wire). The assert keeps it loud.
1259
+ gem-prep: $(GGML_DIR)/CMakeLists.txt gen-mirrors
1260
+ cd $(GGML_DIR) && (git rev-parse --verify -q $(GGML_REV)^{commit} >/dev/null || git fetch -q --depth 1 origin $(GGML_REV)) && git reset --hard $(GGML_REV) >/dev/null
1261
+ rm -f $(GGML_DIR)/.patched
1262
+ @test "$$(cd $(GGML_DIR) && git rev-parse HEAD)" = "$(GGML_REV)" || { echo "FATAL: vendor/ggml HEAD != GGML_REV ($(GGML_REV)) after gem-prep"; exit 1; }
1263
+ @echo "ggml pristine at GGML_REV $$(cd $(GGML_DIR) && git rev-parse --short HEAD); now: gem build toy.gemspec"
1264
+ .PHONY: gem-prep
1265
+
1266
+ smoke: tinynn/smoke
1267
+ ./tinynn/smoke
1268
+
1269
+ tinynn/smoke: tinynn/smoke.rb tinynn/libtinynn_ggml.a
1270
+ $(SPINEL) tinynn/smoke.rb -o tinynn/smoke
1271
+
1272
+ # A/B parity tests: native vs FFI (CPU) for one op each.
1273
+ ab-smoke: tinynn/ab_smoke
1274
+ ./tinynn/ab_smoke
1275
+
1276
+ ab-smoke-add: tinynn/ab_smoke_add
1277
+ ./tinynn/ab_smoke_add
1278
+
1279
+ ab-smoke-gelu: tinynn/ab_smoke_gelu
1280
+ ./tinynn/ab_smoke_gelu
1281
+
1282
+ # Llama-family ops (silu, mul, eventually rope) — added with the
1283
+ # Toy::SmolLM2 FFI mirror work.
1284
+ ab-smoke-silu: tinynn/ab_smoke_silu
1285
+ ./tinynn/ab_smoke_silu
1286
+
1287
+ tinynn/ab_smoke_silu: tinynn/ab_smoke_silu.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1288
+ $(SPINEL) $< -o $@
1289
+
1290
+ ab-smoke-mul: tinynn/ab_smoke_mul
1291
+ ./tinynn/ab_smoke_mul
1292
+
1293
+ tinynn/ab_smoke_mul: tinynn/ab_smoke_mul.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1294
+ $(SPINEL) $< -o $@
1295
+
1296
+ ab-smoke-rms-norm: tinynn/ab_smoke_rms_norm
1297
+ ./tinynn/ab_smoke_rms_norm
1298
+
1299
+ ab-smoke-softmax: tinynn/ab_smoke_softmax
1300
+ ./tinynn/ab_smoke_softmax
1301
+
1302
+ ab-smoke-transpose: tinynn/ab_smoke_transpose
1303
+ ./tinynn/ab_smoke_transpose
1304
+
1305
+ ab-smoke-scale: tinynn/ab_smoke_scale
1306
+ ./tinynn/ab_smoke_scale
1307
+
1308
+ # Chained-op pipeline: gelu(h·w1)·w2 in one ggml graph.
1309
+ ab-smoke-pipeline: tinynn/ab_smoke_pipeline
1310
+ ./tinynn/ab_smoke_pipeline
1311
+
1312
+ # Run every CPU smoke. (CUDA variants would need `make setup-ggml-cuda` first.)
1313
+ # `ab-smoke-transpose` is omitted: ggml_cont(ggml_transpose(...)) trips
1314
+ # the scheduler's buffer allocation; we fold transposes into consuming
1315
+ # ops instead (see TinyNN.matmul's b-transposed upload).
1316
+ test: smoke ab-smoke ab-smoke-add ab-smoke-gelu ab-smoke-rms-norm \
1317
+ ab-smoke-softmax ab-smoke-scale ab-smoke-pipeline \
1318
+ ab-smoke-matmul-variants ab-smoke-back ab-smoke-embed ab-smoke-sgd \
1319
+ ab-smoke-gelu-back ab-smoke-cegrad ab-smoke-adam
1320
+
1321
+ tinynn/ab_smoke: tinynn/ab_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1322
+ $(SPINEL) tinynn/ab_smoke.rb -o tinynn/ab_smoke
1323
+
1324
+ tinynn/ab_smoke_add: tinynn/ab_smoke_add.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1325
+ $(SPINEL) tinynn/ab_smoke_add.rb -o tinynn/ab_smoke_add
1326
+
1327
+ # E1.1 / GH#13 — Conv2D smoke + JSON dump for PyTorch parity.
1328
+ tinynn/ab_smoke_conv2d: tinynn/ab_smoke_conv2d.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1329
+ $(SPINEL) tinynn/ab_smoke_conv2d.rb -o tinynn/ab_smoke_conv2d
1330
+
1331
+ # E1.2 / GH#13 — patch_embed composite smoke + parity dump.
1332
+ tinynn/ab_smoke_patch_embed: tinynn/ab_smoke_patch_embed.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/models/toy_vit.rb tinynn/libtinynn_ggml.a
1333
+ $(SPINEL) tinynn/ab_smoke_patch_embed.rb -o tinynn/ab_smoke_patch_embed
1334
+
1335
+ # E1.3 / GH#13 — ViT-Tiny forward + training smoke.
1336
+ prep/smokes/smoke_vit_tiny: prep/smokes/smoke_vit_tiny.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
1337
+ $(SPINEL) $< -o $@
1338
+
1339
+ # E1.5 / GH#13 — image-loader smoke.
1340
+ prep/smokes/smoke_image_loader: prep/smokes/smoke_image_loader.rb lib/toy/io/toy_image_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1341
+ $(SPINEL) $< -o $@
1342
+
1343
+ # E1.6 / GH#13 — ViT-Tiny training driver.
1344
+ examples/example_train_vit_tiny: examples/legacy/07_train_vit_tiny.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb lib/toy/io/toy_image_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
1345
+ $(SPINEL) $< -o $@
1346
+ example_train_vit_tiny: examples/example_train_vit_tiny
1347
+
1348
+ tinynn/ab_smoke_gelu: tinynn/ab_smoke_gelu.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1349
+ $(SPINEL) tinynn/ab_smoke_gelu.rb -o tinynn/ab_smoke_gelu
1350
+
1351
+ tinynn/ab_smoke_rms_norm: tinynn/ab_smoke_rms_norm.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1352
+ $(SPINEL) tinynn/ab_smoke_rms_norm.rb -o tinynn/ab_smoke_rms_norm
1353
+
1354
+ tinynn/ab_smoke_softmax: tinynn/ab_smoke_softmax.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1355
+ $(SPINEL) tinynn/ab_smoke_softmax.rb -o tinynn/ab_smoke_softmax
1356
+
1357
+ tinynn/ab_smoke_flash_attn: tinynn/ab_smoke_flash_attn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1358
+ $(SPINEL) tinynn/ab_smoke_flash_attn.rb -o tinynn/ab_smoke_flash_attn
1359
+
1360
+ tinynn/ab_smoke_q8_kv: tinynn/ab_smoke_q8_kv.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1361
+ $(SPINEL) tinynn/ab_smoke_q8_kv.rb -o tinynn/ab_smoke_q8_kv
1362
+
1363
+ tinynn/ab_smoke_moe_ffn: tinynn/ab_smoke_moe_ffn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1364
+ $(SPINEL) tinynn/ab_smoke_moe_ffn.rb -o tinynn/ab_smoke_moe_ffn
1365
+
1366
+ tinynn/ab_smoke_transpose: tinynn/ab_smoke_transpose.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1367
+ $(SPINEL) tinynn/ab_smoke_transpose.rb -o tinynn/ab_smoke_transpose
1368
+
1369
+ tinynn/ab_smoke_scale: tinynn/ab_smoke_scale.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1370
+ $(SPINEL) tinynn/ab_smoke_scale.rb -o tinynn/ab_smoke_scale
1371
+
1372
+ tinynn/ab_smoke_pipeline: tinynn/ab_smoke_pipeline.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1373
+ $(SPINEL) tinynn/ab_smoke_pipeline.rb -o tinynn/ab_smoke_pipeline
1374
+
1375
+ # Chained FFNFFICache parity: pre, hidden, out vs hand-rolled native.
1376
+ ab-smoke-ffncache: tinynn/ab_smoke_ffncache
1377
+ ./tinynn/ab_smoke_ffncache
1378
+
1379
+ tinynn/ab_smoke_ffncache: tinynn/ab_smoke_ffncache.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1380
+ $(SPINEL) tinynn/ab_smoke_ffncache.rb -o tinynn/ab_smoke_ffncache
1381
+
1382
+ # ggml-native AdamW step (opt_step_adamw) parity vs project's plain-Adam.
1383
+ ab-smoke-adamw-op: tinynn/ab_smoke_adamw_op
1384
+ ./tinynn/ab_smoke_adamw_op
1385
+
1386
+ tinynn/ab_smoke_adamw_op: tinynn/ab_smoke_adamw_op.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1387
+ $(SPINEL) tinynn/ab_smoke_adamw_op.rb -o tinynn/ab_smoke_adamw_op
1388
+
1389
+ # Persistent-tensor architecture check: data uploaded to a ctx_w tensor
1390
+ # survives a compute cycle.
1391
+ ab-smoke-persistent: tinynn/ab_smoke_persistent
1392
+ ./tinynn/ab_smoke_persistent
1393
+
1394
+ tinynn/ab_smoke_persistent: tinynn/ab_smoke_persistent.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1395
+ $(SPINEL) tinynn/ab_smoke_persistent.rb -o tinynn/ab_smoke_persistent
1396
+
1397
+ # Dual-cgraph + persistent-weights design check: forward reads t_w;
1398
+ # adam mutates t_w in place; forward sees the new value.
1399
+ ab-smoke-dual-graph: tinynn/ab_smoke_dual_graph
1400
+ ./tinynn/ab_smoke_dual_graph
1401
+
1402
+ tinynn/ab_smoke_dual_graph: tinynn/ab_smoke_dual_graph.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1403
+ $(SPINEL) tinynn/ab_smoke_dual_graph.rb -o tinynn/ab_smoke_dual_graph
1404
+
1405
+ # M2 foundation: view_2d + cpy to write a single row into a persistent
1406
+ # (max_T, d_head) KV buffer at a runtime-baked position.
1407
+ ab-smoke-kv-write: tinynn/ab_smoke_kv_write
1408
+ ./tinynn/ab_smoke_kv_write
1409
+
1410
+ tinynn/ab_smoke_kv_write: tinynn/ab_smoke_kv_write.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1411
+ $(SPINEL) tinynn/ab_smoke_kv_write.rb -o tinynn/ab_smoke_kv_write
1412
+
1413
+ # M2 prototype: single-step decode through a KV cache. Pre-fills K/V
1414
+ # for positions 0..POS-1, writes k_new/v_new at POS, computes scores
1415
+ # + soft_max_ext + head_out. Parity vs hand-rolled native.
1416
+ ab-smoke-kv-attn: tinynn/ab_smoke_kv_attn
1417
+ ./tinynn/ab_smoke_kv_attn
1418
+
1419
+ tinynn/ab_smoke_kv_attn: tinynn/ab_smoke_kv_attn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1420
+ $(SPINEL) tinynn/ab_smoke_kv_attn.rb -o tinynn/ab_smoke_kv_attn
1421
+
1422
+ # M1.2: full single-block forward through the persistent graph.
1423
+ # Parity vs native TransformerLM.forward() at n_layers=1, n_heads=2.
1424
+ ab-smoke-full-forward-block: tinynn/ab_smoke_full_forward_block
1425
+ ./tinynn/ab_smoke_full_forward_block
1426
+
1427
+ tinynn/ab_smoke_full_forward_block: tinynn/ab_smoke_full_forward_block.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1428
+ $(SPINEL) tinynn/ab_smoke_full_forward_block.rb -o tinynn/ab_smoke_full_forward_block
1429
+
1430
+ # Wallclock bench: native TransformerLM.forward vs FullForwardFFICache.
1431
+ full-forward-bench: tinynn/full_forward_bench
1432
+ ./tinynn/full_forward_bench
1433
+
1434
+ tinynn/full_forward_bench: tinynn/full_forward_bench.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1435
+ $(SPINEL) tinynn/full_forward_bench.rb -o tinynn/full_forward_bench
1436
+
1437
+ full-forward-bench-cuda: tinynn/full_forward_bench_cuda
1438
+ ./tinynn/full_forward_bench_cuda
1439
+
1440
+ tinynn/full_forward_bench_cuda: tinynn/full_forward_bench_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
1441
+ $(SPINEL) tinynn/full_forward_bench_cuda.rb -o tinynn/full_forward_bench_cuda
1442
+
1443
+ ab-smoke-dual-graph-cuda: tinynn/ab_smoke_dual_graph_cuda
1444
+ ./tinynn/ab_smoke_dual_graph_cuda
1445
+
1446
+ tinynn/ab_smoke_dual_graph_cuda: tinynn/ab_smoke_dual_graph_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
1447
+ $(SPINEL) tinynn/ab_smoke_dual_graph_cuda.rb -o tinynn/ab_smoke_dual_graph_cuda
1448
+
1449
+ ab-smoke-adamw-op-cuda: tinynn/ab_smoke_adamw_op_cuda
1450
+ ./tinynn/ab_smoke_adamw_op_cuda
1451
+
1452
+ tinynn/ab_smoke_adamw_op_cuda: tinynn/ab_smoke_adamw_op_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
1453
+ $(SPINEL) tinynn/ab_smoke_adamw_op_cuda.rb -o tinynn/ab_smoke_adamw_op_cuda
1454
+
1455
+ # A/B harness for the "fuse-or-not" question: N_HEADS small matmuls vs
1456
+ # 1 batched matmul at LoRA-Q shape. Override D_MODEL / N_HEADS / R / T
1457
+ # via env to sweep launch-overhead vs compute-bound regimes. See
1458
+ # docs/heavy-train-attribution-2026-05-24.md.
1459
+ ab-smoke-lora-fused-cuda: tinynn/ab_smoke_lora_fused_cuda
1460
+ ./tinynn/ab_smoke_lora_fused_cuda
1461
+
1462
+ tinynn/ab_smoke_lora_fused_cuda: tinynn/ab_smoke_lora_fused_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
1463
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' tinynn/ab_smoke_lora_fused_cuda.rb -o tinynn/ab_smoke_lora_fused_cuda
1464
+
1465
+ # Transformer-shape sized parity + wallclock comparison.
1466
+ ab-smoke-big: tinynn/ab_smoke_big
1467
+ ./tinynn/ab_smoke_big
1468
+
1469
+ tinynn/ab_smoke_big: tinynn/ab_smoke_big.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1470
+ $(SPINEL) tinynn/ab_smoke_big.rb -o tinynn/ab_smoke_big
1471
+
1472
+ ab-smoke-matmul-variants: tinynn/ab_smoke_matmul_variants
1473
+ ./tinynn/ab_smoke_matmul_variants
1474
+
1475
+ tinynn/ab_smoke_matmul_variants: tinynn/ab_smoke_matmul_variants.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1476
+ $(SPINEL) tinynn/ab_smoke_matmul_variants.rb -o tinynn/ab_smoke_matmul_variants
1477
+
1478
+ ab-smoke-back: tinynn/ab_smoke_back
1479
+ ./tinynn/ab_smoke_back
1480
+
1481
+ tinynn/ab_smoke_back: tinynn/ab_smoke_back.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1482
+ $(SPINEL) tinynn/ab_smoke_back.rb -o tinynn/ab_smoke_back
1483
+
1484
+ ab-smoke-gelu-back: tinynn/ab_smoke_gelu_back
1485
+ ./tinynn/ab_smoke_gelu_back
1486
+
1487
+ tinynn/ab_smoke_gelu_back: tinynn/ab_smoke_gelu_back.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1488
+ $(SPINEL) tinynn/ab_smoke_gelu_back.rb -o tinynn/ab_smoke_gelu_back
1489
+
1490
+ ab-smoke-cegrad: tinynn/ab_smoke_cegrad
1491
+ ./tinynn/ab_smoke_cegrad
1492
+
1493
+ tinynn/ab_smoke_cegrad: tinynn/ab_smoke_cegrad.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1494
+ $(SPINEL) tinynn/ab_smoke_cegrad.rb -o tinynn/ab_smoke_cegrad
1495
+
1496
+ ab-smoke-adam: tinynn/ab_smoke_adam
1497
+ ./tinynn/ab_smoke_adam
1498
+
1499
+ tinynn/ab_smoke_adam: tinynn/ab_smoke_adam.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1500
+ $(SPINEL) tinynn/ab_smoke_adam.rb -o tinynn/ab_smoke_adam
1501
+
1502
+ gguf-smoke: tinynn/gguf_smoke
1503
+ ./tinynn/gguf_smoke
1504
+
1505
+ tinynn/gguf_smoke: tinynn/gguf_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1506
+ $(SPINEL) tinynn/gguf_smoke.rb -o tinynn/gguf_smoke
1507
+
1508
+ # Walks every tensor in data/distilgpt2-f32.gguf via tnn_gguf_*. Used to
1509
+ # confirm large HF-converted GGUFs roundtrip through the project FFI.
1510
+ gguf-inspect: tinynn/gguf_inspect
1511
+ ./tinynn/gguf_inspect
1512
+
1513
+ tinynn/gguf_inspect: tinynn/gguf_inspect.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1514
+ $(SPINEL) tinynn/gguf_inspect.rb -o tinynn/gguf_inspect
1515
+
1516
+ # GPT2LM build smoke: confirm lib/toy/models/gpt2.rb Spinel-compiles and the
1517
+ # forward shapes line up. Toy dims, random weights — values mean nothing.
1518
+ gpt2-build-smoke: tinynn/gpt2_build_smoke
1519
+ ./tinynn/gpt2_build_smoke
1520
+
1521
+ tinynn/gpt2_build_smoke: tinynn/gpt2_build_smoke.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb
1522
+ $(SPINEL) tinynn/gpt2_build_smoke.rb -o tinynn/gpt2_build_smoke
1523
+
1524
+ # Load distilgpt2-f32.gguf into a GPT2LM and print sentinel weights
1525
+ # per category. Verifies name mapping + per-head split before forward.
1526
+ gpt2-load-smoke: tinynn/gpt2_load_smoke
1527
+ ./tinynn/gpt2_load_smoke
1528
+
1529
+ tinynn/gpt2_load_smoke: tinynn/gpt2_load_smoke.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1530
+ $(SPINEL) tinynn/gpt2_load_smoke.rb -o tinynn/gpt2_load_smoke
1531
+
1532
+ # data/prompt_ids.txt, loads weights from data/distilgpt2-f32.gguf,
1533
+ # greedy-generates N_NEW tokens via native Mat forward, writes the
1534
+ # full ID sequence back. Decode with prep/tokens.py decode.
1535
+
1536
+ # Native Mat GPT-2 inference (DistilGPT2 / GPT-2 family).
1537
+ #
1538
+ gpt2: demos/gpt2
1539
+ demos/gpt2: demos/gpt2.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_gpt2.rb lib/toy/io/loaders/toy_gpt2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1540
+ $(SPINEL) $< -o $@
1541
+
1542
+ # SmolLM2-135M (llama-family) inference via Toy::SmolLM2.
1543
+ # Tokenization is host-side: ./prep/smollm2_tokens.py encode "..."
1544
+ smollm2: demos/smollm2
1545
+ demos/smollm2: demos/smollm2.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1546
+ $(SPINEL) $< -o $@
1547
+
1548
+ # SmolLM2-135M FFI KV-cache (CPU).
1549
+ smollm2_kv: demos/smollm2_kv
1550
+ demos/smollm2_kv: demos/smollm2_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1551
+ $(SPINEL) $< -o $@
1552
+
1553
+ # Qwen2.5 Mat-mediated KV-cache (CPU). The slow, correct reference path.
1554
+ # Run with `GGUF=data/qwen25-1.5b-f32.gguf ./demos/qwen25_kv` etc.
1555
+ qwen25_kv: demos/qwen25_kv
1556
+ demos/qwen25_kv: demos/qwen25_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1557
+ $(SPINEL) $< -o $@
1558
+
1559
+ # Qwen2.5 Phase-2 mmap inference (CPU). Canonical performance path.
1560
+ qwen25_native_mmap: demos/qwen25_native_mmap
1561
+ demos/qwen25_native_mmap: demos/qwen25_native_mmap.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1562
+ $(SPINEL) $< -o $@
1563
+
1564
+ # Phase 0.7 acceptance gates: 0.5B (f32 + Q8) + 1.5B + 3B greedy-decode
1565
+ # parity against locked-in golden token-ID sequences. Run before tagging
1566
+ # a release; see docs/design/phase-07-acceptance.md.
1567
+ qwen25_acceptance: demos/qwen25_acceptance
1568
+ demos/qwen25_acceptance: demos/qwen25_acceptance.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1569
+ $(SPINEL) $< -o $@
1570
+
1571
+ # Inference bench (CPU). Long warmup + long prefill + per-token stats.
1572
+ # Pick model via GGUF env; see docs/design/bench-cuda-2026-05-21.md.
1573
+ qwen25_bench_cpu: demos/qwen25_bench_cpu
1574
+ demos/qwen25_bench_cpu: demos/qwen25_bench_cpu.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1575
+ $(SPINEL) $< -o $@
1576
+
1577
+ # Inference bench (CUDA). Same shape as the CPU bench for side-by-side.
1578
+ qwen25_bench_cuda: demos/qwen25_bench_cuda
1579
+ demos/qwen25_bench_cuda: demos/qwen25_bench_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
1580
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1581
+
1582
+ # F1.2 step 2: LoRA-Q forward-parity gate. Loads SmolLM2-135M twice
1583
+ # (baseline + LoRA r=16 B=0), asserts bit-identical generated IDs.
1584
+ smollm2_lora_forward: demos/smollm2_lora_forward
1585
+ demos/smollm2_lora_forward: demos/smollm2_lora_forward.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1586
+ $(SPINEL) $< -o $@
1587
+
1588
+ # F1.2 step 3: backward through the full SmolLM2 decode graph,
1589
+ # layer-0 LoRA-Q updated via SGD. Requires the vendored CONCAT
1590
+ # backward in vendor/ggml/src/ggml.c.
1591
+ smollm2_lora_train_step: demos/smollm2_lora_train_step
1592
+ demos/smollm2_lora_train_step: demos/smollm2_lora_train_step.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1593
+ $(SPINEL) $< -o $@
1594
+
1595
+ # F1.2 step 4: all-layers LoRA-Q SGD on real CE loss against a rare
1596
+ # target token. 540 opt_step nodes (30 layers × 9 heads × 2 params).
1597
+ # Acceptance: monotonic decrease over 20 steps.
1598
+ smollm2_lora_train_ce: demos/smollm2_lora_train_ce
1599
+ demos/smollm2_lora_train_ce: demos/smollm2_lora_train_ce.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1600
+ $(SPINEL) $< -o $@
1601
+
1602
+ # F2 step 1: CUDA mirror of the LoRA forward parity gate.
1603
+ smollm2_lora_forward_cuda: demos/smollm2_lora_forward_cuda
1604
+ demos/smollm2_lora_forward_cuda: demos/smollm2_lora_forward_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1605
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1606
+
1607
+ # F2 step 2: CUDA mirror of the multi-layer SGD CE training smoke.
1608
+ smollm2_lora_train_ce_cuda: demos/smollm2_lora_train_ce_cuda
1609
+ demos/smollm2_lora_train_ce_cuda: demos/smollm2_lora_train_ce_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1610
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1611
+
1612
+ # Task #70 diagnostic — same CE smoke but with every graph_b node
1613
+ # pinned. Confirms sched intermediate-grad aliasing is the CPU
1614
+ # divergence's root cause. See docs/design/task70-root-cause-2026-05-21.md.
1615
+ smollm2_lora_train_ce_pinned: demos/smollm2_lora_train_ce_pinned
1616
+ demos/smollm2_lora_train_ce_pinned: demos/smollm2_lora_train_ce_pinned.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1617
+ $(SPINEL) $< -o $@
1618
+
1619
+ # F1.2 step 5: AdamW training with per-step m/v preservation via
1620
+ # tnn_graph_reset_grads_only. Converges 7.5 → 0.09 in 20 SGD steps
1621
+ # at LR=1e-3 — proper SFT-shaped learning curve.
1622
+ smollm2_lora_train_adamw_cuda: demos/smollm2_lora_train_adamw_cuda
1623
+ demos/smollm2_lora_train_adamw_cuda: demos/smollm2_lora_train_adamw_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1624
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1625
+
1626
+ # F1.2 step 6a: multi-target AdamW SFT-shaped training. Cycles through
1627
+ # 5 target tokens × 10 epochs at the same prefix; expects loss to
1628
+ # drop on average + per-target. 10.8 → 3.6 in 10 epochs. Foundation
1629
+ # for step 6b (multi-position) and step 7 (real alpaca dataset).
1630
+ smollm2_lora_sft_multi_cuda: demos/smollm2_lora_sft_multi_cuda
1631
+ demos/smollm2_lora_sft_multi_cuda: demos/smollm2_lora_sft_multi_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1632
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1633
+
1634
+ # F1.2 step 6b — multi-position SFT (cycle pos4 / pos5). Validates
1635
+ # that persistent Adam m/v (allocated by enable_lora_q_adamw! +
1636
+ # realize_for_mmap) survive tnn_reset_for_rebuild between cycles.
1637
+ smollm2_lora_sft_multipos_cuda: demos/smollm2_lora_sft_multipos_cuda
1638
+ demos/smollm2_lora_sft_multipos_cuda: demos/smollm2_lora_sft_multipos_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1639
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1640
+
1641
+ # M3 step 1 — sequence-mode forward parity at T=1.
1642
+ # LlamaSeqForwardFFICache.forward([id], [0]) must match
1643
+ # SmolLM2KVFFICache + decode_step(id, 0). See
1644
+ # docs/design/m3-seq-forward-2026-05-21.md.
1645
+ smollm2_seq_parity: demos/smollm2_seq_parity
1646
+ demos/smollm2_seq_parity: demos/smollm2_seq_parity.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1647
+ $(SPINEL) $< -o $@
1648
+
1649
+ # M3 step 2 — T=4 trajectory parity (CPU). Per-position seq logits must
1650
+ # match the decode_step trajectory; proves causal-mask + multi-pos RoPE.
1651
+ smollm2_seq_parity_t4: demos/smollm2_seq_parity_t4
1652
+ demos/smollm2_seq_parity_t4: demos/smollm2_seq_parity_t4.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1653
+ $(SPINEL) $< -o $@
1654
+
1655
+ # M3 step 2 — CUDA mirror. T=1 and T=4 vs CPU decode_step trajectory.
1656
+ smollm2_seq_parity_cuda: demos/smollm2_seq_parity_cuda
1657
+ demos/smollm2_seq_parity_cuda: demos/smollm2_seq_parity_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1658
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1659
+
1660
+ smollm2_seq_parity_t4_cuda: demos/smollm2_seq_parity_t4_cuda
1661
+ demos/smollm2_seq_parity_t4_cuda: demos/smollm2_seq_parity_t4_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1662
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1663
+
1664
+ # M3 step 3 — seq-mode LoRA training smoke (CPU). One forward + backward
1665
+ # + opt_step over T positions; loss should decrease over N steps.
1666
+ smollm2_seq_train: demos/smollm2_seq_train
1667
+ demos/smollm2_seq_train: demos/smollm2_seq_train.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1668
+ $(SPINEL) $< -o $@
1669
+
1670
+ smollm2_seq_train_cuda: demos/smollm2_seq_train_cuda
1671
+ demos/smollm2_seq_train_cuda: demos/smollm2_seq_train_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1672
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1673
+
1674
+ # F3 — full fine-tune on CUDA. Every per-block weight tensor is
1675
+ # writable F32 + AdamW state; opt_step on each. See
1676
+ # docs/roadmap/f3-full-finetune-2026-05-21.md.
1677
+ smollm2_seq_full_finetune_cuda: demos/smollm2_seq_full_finetune_cuda
1678
+ demos/smollm2_seq_full_finetune_cuda: demos/smollm2_seq_full_finetune_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1679
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1680
+
1681
+ # F4 (QLoRA) on CUDA via realize_for_q8_copy. Q8 base in standard
1682
+ # CUDA buffer + F32 LoRA adapter; bypasses the BYO-pointer padding bug.
1683
+ smollm2_seq_qlora_cuda: demos/smollm2_seq_qlora_cuda
1684
+ demos/smollm2_seq_qlora_cuda: demos/smollm2_seq_qlora_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1685
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1686
+
1687
+ # Training step-time bench. MODE=lora|ft; STEPS=N; GGUF=path.
1688
+ # toy#77: the seq-engine mirror requires the primitives/blocks/archs
1689
+ # mirrors; without them in the dep list a FRESH checkout generates only
1690
+ # llama_seq_engine_cuda.rb, its require_relatives dangle (Spinel ignores
1691
+ # them with a warning), every engine type degrades to int and .new
1692
+ # returns nil — the demo then segfaults in the first attr setter.
1693
+ seq_train_bench_cuda: demos/seq_train_bench_cuda
1694
+ demos/seq_train_bench_cuda: demos/seq_train_bench_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
1695
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1696
+
1697
+ # Per-phase training-step bench (CPU + CUDA). Times graph_reset /
1698
+ # uploads / compute_backward / download separately. Doc:
1699
+ # docs/design/bench-train-2026-05-21.md.
1700
+ smollm2_lora_train_bench: demos/smollm2_lora_train_bench
1701
+ demos/smollm2_lora_train_bench: demos/smollm2_lora_train_bench.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1702
+ $(SPINEL) $< -o $@
1703
+
1704
+ smollm2_lora_train_bench_cuda: demos/smollm2_lora_train_bench_cuda
1705
+ demos/smollm2_lora_train_bench_cuda: demos/smollm2_lora_train_bench_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1706
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1707
+
1708
+ # Task #70 grad-magnitude probes (per-layer maxabs(grad_A), maxabs(grad_B)).
1709
+ smollm2_lora_grad_probe: demos/smollm2_lora_grad_probe
1710
+ demos/smollm2_lora_grad_probe: demos/smollm2_lora_grad_probe.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1711
+ $(SPINEL) $< -o $@
1712
+
1713
+ smollm2_lora_grad_probe_cuda: demos/smollm2_lora_grad_probe_cuda
1714
+ demos/smollm2_lora_grad_probe_cuda: demos/smollm2_lora_grad_probe_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1715
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1716
+
1717
+ # Qwen2.5 Phase-2 mmap inference (CUDA). Requires `make setup-ggml-cuda`.
1718
+ qwen25_native_mmap_cuda: demos/qwen25_native_mmap_cuda
1719
+ demos/qwen25_native_mmap_cuda: demos/qwen25_native_mmap_cuda.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1720
+ $(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
1721
+
1722
+ # SmolLM2-135M FFI KV-cache (CUDA).
1723
+ smollm2_kv_cuda: demos/smollm2_kv_cuda
1724
+ demos/smollm2_kv_cuda: demos/smollm2_kv_cuda.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1725
+ $(SPINEL) $< -o $@
1726
+
1727
+ # TinyLlama-1.1B demo. Uses the same Toy::SmolLM2 / FFI KV CUDA stack
1728
+ # (llama-family architecture); just configured for the larger shape.
1729
+ tinyllama_kv_cuda: demos/tinyllama_kv_cuda
1730
+ demos/tinyllama_kv_cuda: demos/tinyllama_kv_cuda.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1731
+ $(SPINEL) $< -o $@
1732
+
1733
+ tinyllama: demos/tinyllama
1734
+ demos/tinyllama: demos/tinyllama.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1735
+ $(SPINEL) $< -o $@
1736
+
1737
+ tinyllama_kv: demos/tinyllama_kv
1738
+ demos/tinyllama_kv: demos/tinyllama_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1739
+ $(SPINEL) $< -o $@
1740
+
1741
+ # Print the Phuong–Hutter algorithm cards for both models. No
1742
+ # inference — just emit the structured pseudocode. Source-of-truth
1743
+ # for the round-trip work (task #33).
1744
+ algorithm_cards: demos/algorithm_cards
1745
+ demos/algorithm_cards: demos/algorithm_cards.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_gpt2.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_gpt2_loader.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1746
+ $(SPINEL) $< -o $@
1747
+
1748
+ # TinyStories from-scratch training via Toy::Trainer.
1749
+ #
1750
+ train: demos/train
1751
+ demos/train: demos/train.rb lib/toy/train/toy_trainer.rb lib/toy/models/transformer.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1752
+ $(SPINEL) $< -o $@
1753
+
1754
+ # Parity probe: one forward at distilgpt2 shape, dump last-row logits
1755
+ # to data/ours_logits.txt. Pair with prep/parity.py for the HF reference.
1756
+ gpt2-parity: tinynn/gpt2_parity
1757
+ ./tinynn/gpt2_parity
1758
+
1759
+ tinynn/gpt2_parity: tinynn/gpt2_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1760
+ $(SPINEL) tinynn/gpt2_parity.rb -o tinynn/gpt2_parity
1761
+
1762
+ # FFI parity probe: persistent ggml graph with LayerNorm + biases.
1763
+ # Dumps last-row logits to data/ours_ffi_logits.txt.
1764
+ gpt2-ffi-parity: tinynn/gpt2_ffi_parity
1765
+ ./tinynn/gpt2_ffi_parity
1766
+
1767
+ tinynn/gpt2_ffi_parity: tinynn/gpt2_ffi_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_fwd_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1768
+ $(SPINEL) tinynn/gpt2_ffi_parity.rb -o tinynn/gpt2_ffi_parity
1769
+
1770
+ # Apples-to-apples bench: native Mat vs FFI on the same forward.
1771
+ # Re-encode data/prompt_ids.txt first so prompt length matches T_SEQ=5.
1772
+ gpt2-bench: tinynn/gpt2_bench
1773
+ ./tinynn/gpt2_bench
1774
+
1775
+ tinynn/gpt2_bench: tinynn/gpt2_bench.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_fwd_engine.rb lib/toy/llm/engine/gpt2_kv_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1776
+ $(SPINEL) tinynn/gpt2_bench.rb -o tinynn/gpt2_bench
1777
+
1778
+ # Ruby BPE smoke: load vocab/merges, encode + roundtrip-decode some
1779
+ # fixed prompts. Compare against prep/tokens.py output.
1780
+ bpe-smoke: tinynn/bpe_smoke
1781
+ ./tinynn/bpe_smoke
1782
+
1783
+ tinynn/bpe_smoke: tinynn/bpe_smoke.rb lib/toy/models/transformer.rb lib/toy/io/bpe.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1784
+ $(SPINEL) tinynn/bpe_smoke.rb -o tinynn/bpe_smoke
1785
+
1786
+ # KV-cache parity probe: prefill the prompt one token at a time through
1787
+ # GPT2KVFFICache, dump last-position logits.
1788
+ gpt2-kv-parity: tinynn/gpt2_kv_parity
1789
+ ./tinynn/gpt2_kv_parity
1790
+
1791
+ tinynn/gpt2_kv_parity: tinynn/gpt2_kv_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_kv_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1792
+ $(SPINEL) tinynn/gpt2_kv_parity.rb -o tinynn/gpt2_kv_parity
1793
+
1794
+ # --- CUDA mirrors of the GPT-2 demos / parity / bench --------------
1795
+ # All require `make setup-ggml-cuda` to have produced
1796
+ # vendor/ggml/build-cuda first. Built on the gx10 (NVIDIA GB10);
1797
+ # the Mac build doesn't have CUDA.
1798
+
1799
+ CUDA_GPT2_DEPS = lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb \
1800
+ lib/toy/train/training.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb \
1801
+ tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
1802
+
1803
+ gpt2-ffi-parity-cuda: tinynn/gpt2_ffi_parity_cuda
1804
+ ./tinynn/gpt2_ffi_parity_cuda
1805
+
1806
+ tinynn/gpt2_ffi_parity_cuda: tinynn/gpt2_ffi_parity_cuda.rb lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb $(CUDA_GPT2_DEPS)
1807
+ $(SPINEL) tinynn/gpt2_ffi_parity_cuda.rb -o tinynn/gpt2_ffi_parity_cuda
1808
+
1809
+ gpt2-kv-parity-cuda: tinynn/gpt2_kv_parity_cuda
1810
+ ./tinynn/gpt2_kv_parity_cuda
1811
+
1812
+ tinynn/gpt2_kv_parity_cuda: tinynn/gpt2_kv_parity_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb $(CUDA_GPT2_DEPS)
1813
+ $(SPINEL) tinynn/gpt2_kv_parity_cuda.rb -o tinynn/gpt2_kv_parity_cuda
1814
+
1815
+ gpt2-bench-cuda: tinynn/gpt2_bench_cuda
1816
+ ./tinynn/gpt2_bench_cuda
1817
+
1818
+ tinynn/gpt2_bench_cuda: tinynn/gpt2_bench_cuda.rb lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb $(CUDA_GPT2_DEPS)
1819
+ $(SPINEL) tinynn/gpt2_bench_cuda.rb -o tinynn/gpt2_bench_cuda
1820
+
1821
+ ab-smoke-embed: tinynn/ab_smoke_embed
1822
+ ./tinynn/ab_smoke_embed
1823
+
1824
+ tinynn/ab_smoke_embed: tinynn/ab_smoke_embed.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1825
+ $(SPINEL) tinynn/ab_smoke_embed.rb -o tinynn/ab_smoke_embed
1826
+
1827
+ ab-smoke-sgd: tinynn/ab_smoke_sgd
1828
+ ./tinynn/ab_smoke_sgd
1829
+
1830
+ tinynn/ab_smoke_sgd: tinynn/ab_smoke_sgd.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1831
+ $(SPINEL) tinynn/ab_smoke_sgd.rb -o tinynn/ab_smoke_sgd
1832
+
1833
+ # F1.2 step 1: multi-step LoRA convergence via the F1.1 in-graph
1834
+ # optimizer. Toy shape; SGD; 60 steps; asserts final loss < 10% of
1835
+ # initial (passes at ~10e-13 of initial).
1836
+ ab-smoke-lora-train: tinynn/ab_smoke_lora_train
1837
+ ./tinynn/ab_smoke_lora_train
1838
+
1839
+ tinynn/ab_smoke_lora_train: tinynn/ab_smoke_lora_train.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1840
+ $(SPINEL) tinynn/ab_smoke_lora_train.rb -o tinynn/ab_smoke_lora_train
1841
+
1842
+ # Forward-only smoke: does TransformerLM#forward run at current Spinel
1843
+ # master? (The #473 SIGBUS is in backward; forward might be OK.)
1844
+ forward-smoke: tinynn/forward_smoke
1845
+ ./tinynn/forward_smoke
1846
+
1847
+ tinynn/forward_smoke: tinynn/forward_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1848
+ $(SPINEL) tinynn/forward_smoke.rb -o tinynn/forward_smoke
1849
+
1850
+ persistent-bench: tinynn/persistent_bench
1851
+ ./tinynn/persistent_bench
1852
+
1853
+ tinynn/persistent_bench: tinynn/persistent_bench.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1854
+ $(SPINEL) tinynn/persistent_bench.rb -o tinynn/persistent_bench
1855
+
1856
+ persistent-bench-cuda: tinynn/persistent_bench_cuda
1857
+ ./tinynn/persistent_bench_cuda
1858
+
1859
+ tinynn/persistent_bench_cuda: tinynn/persistent_bench_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
1860
+ $(SPINEL) tinynn/persistent_bench_cuda.rb -o tinynn/persistent_bench_cuda
1861
+
1862
+ persistent-bench-big: tinynn/persistent_bench_big
1863
+ ./tinynn/persistent_bench_big
1864
+
1865
+ tinynn/persistent_bench_big: tinynn/persistent_bench_big.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
1866
+ $(SPINEL) tinynn/persistent_bench_big.rb -o tinynn/persistent_bench_big
1867
+
1868
+ # A/B parity test against CUDA backend on the local GPU (sm_121 / GB10).
1869
+ # Requires `make setup-ggml-cuda` to have produced vendor/ggml/build-cuda.
1870
+ ab-smoke-cuda: tinynn/ab_smoke_cuda
1871
+ ./tinynn/ab_smoke_cuda
1872
+
1873
+ tinynn/tinynn_backend_cuda.o: tinynn/tinynn_backend_cuda.c
1874
+ $(CC) $(CFLAGS) $(GGML_INC) -I$(CUDA_DIR)/include -c $< -o $@
1875
+
1876
+ # Only the CUDA backend init goes into the CUDA archive. Common
1877
+ # wrappers stay in tinynn_ggml.o (CPU archive), referenced from CUDA
1878
+ # programs via a weak link. Avoids the multi-archive multi-definition
1879
+ # linker conflict that older two-fat-archive layout had.
1880
+ tinynn/libtinynn_ggml_cuda.a: tinynn/tinynn_backend_cuda.o
1881
+ ar $(ARFLAGS) $@ $<
1882
+
1883
+ # Metal backend mirror — same archive-isolation pattern as CUDA. The
1884
+ # source is .m (Objective-C) since the Metal frameworks are ObjC; we
1885
+ # compile with -fobjc-arc off (the file holds no ObjC objects of its
1886
+ # own, just a C function calling into ggml-metal). Header search adds
1887
+ # the Metal build dir so ggml-metal.h is reachable.
1888
+ tinynn/tinynn_backend_metal.o: tinynn/tinynn_backend_metal.m
1889
+ ifneq ($(UNAME_S),Darwin)
1890
+ @echo "tinynn_backend_metal.o: macOS-only (Objective-C + Metal frameworks); uname -s = $(UNAME_S)"; exit 1
1891
+ endif
1892
+ $(CC) $(CFLAGS) -x objective-c $(GGML_INC) -c $< -o $@
1893
+
1894
+ tinynn/libtinynn_ggml_metal.a: tinynn/tinynn_backend_metal.o
1895
+ ar $(ARFLAGS) $@ $<
1896
+
1897
+ tinynn/ab_smoke_cuda: tinynn/ab_smoke_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
1898
+ $(SPINEL) tinynn/ab_smoke_cuda.rb -o tinynn/ab_smoke_cuda
1899
+
1900
+ # Consolidated CUDA parity test: matmul + add + gelu + rms_norm + softmax + scale + ffn_pipeline.
1901
+ ab-smoke-all-cuda: tinynn/ab_smoke_all_cuda
1902
+ ./tinynn/ab_smoke_all_cuda
1903
+
1904
+ tinynn/ab_smoke_all_cuda: tinynn/ab_smoke_all_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
1905
+ $(SPINEL) tinynn/ab_smoke_all_cuda.rb -o tinynn/ab_smoke_all_cuda
1906
+
1907
+ # Transformer-shape parity + wallclock bench on CUDA (GB10).
1908
+ ab-smoke-big-cuda: tinynn/ab_smoke_big_cuda
1909
+ ./tinynn/ab_smoke_big_cuda
1910
+
1911
+ tinynn/ab_smoke_big_cuda: tinynn/ab_smoke_big_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
1912
+ $(SPINEL) tinynn/ab_smoke_big_cuda.rb -o tinynn/ab_smoke_big_cuda
1913
+
1914
+ # --- maintenance ------------------------------------------------------------
1915
+ clean:
1916
+ rm -f demos/train_minimal demos/train_tinystories \
1917
+ demos/inference_demo demos/inference_demo_cuda \
1918
+ demos/distilgpt2_demo demos/distilgpt2_demo_ffi \
1919
+ demos/distilgpt2_demo_kv demos/distilgpt2_demo_text \
1920
+ demos/distilgpt2_demo_ffi_cuda demos/distilgpt2_demo_kv_cuda \
1921
+ tinynn/tinynn_ggml.o tinynn/libtinynn_ggml.a \
1922
+ tinynn/tinynn_backend_cuda.o tinynn/libtinynn_ggml_cuda.a \
1923
+ tinynn/tinynn_backend_metal.o tinynn/libtinynn_ggml_metal.a \
1924
+ examples/example_inference_metal \
1925
+ tinynn/smoke tinynn/ab_smoke tinynn/ab_smoke_cuda tinynn/ab_smoke_all_cuda \
1926
+ tinynn/ab_smoke_add tinynn/ab_smoke_gelu tinynn/ab_smoke_rms_norm \
1927
+ tinynn/ab_smoke_softmax tinynn/ab_smoke_transpose tinynn/ab_smoke_scale \
1928
+ tinynn/ab_smoke_pipeline tinynn/ab_smoke_big tinynn/ab_smoke_big_cuda \
1929
+ tinynn/ab_smoke_matmul_variants tinynn/ab_smoke_back tinynn/ab_smoke_embed \
1930
+ tinynn/ab_smoke_sgd tinynn/ab_smoke_gelu_back tinynn/ab_smoke_cegrad \
1931
+ tinynn/ab_smoke_adam tinynn/forward_smoke tinynn/persistent_bench \
1932
+ tinynn/persistent_bench_cuda tinynn/persistent_bench_big \
1933
+ examples/example_train_from_scratch \
1934
+ examples/example_train_from_scratch_cpu \
1935
+ examples/example_train_from_scratch_cuda \
1936
+ examples/example_finetune examples/example_finetune_cuda \
1937
+ libexec/toy-infer libexec/toy-train libexec/toy-train-cuda libexec/toy-train-lora-cuda libexec/toy-eval libexec/toy-eval-lmc libexec/toy-serve examples/example_train \
1938
+ libexec/toy-infer-metal libexec/toy-eval-metal libexec/toy-train-metal
1939
+
1940
+ distclean: clean
1941
+ rm -rf $(GGML_DIR)/build $(GGML_DIR)/build-cuda $(GGML_DIR)/build-metal
1942
+
1943
+ # --- Algorithm-card drift gate -----------------------------------------------
1944
+ # Sanity-check that every Toy:: class with both `def forward` and
1945
+ # `def algorithm` keeps the two in lock-step. Catches the common
1946
+ # drift case where someone changes the forward without updating the
1947
+ # card (or vice versa). Pure-Ruby, runs in a fraction of a second.
1948
+ check-cards:
1949
+ ruby prep/card_drift_check.rb
1950
+
1951
+ # --- Perf regression gate -----------------------------------------------------
1952
+ # Runs each bench/*.rb (LoRA step, inference, tokenizer) and compares the
1953
+ # emitted BENCH lines against bench/baselines.csv. Exit 1 on any metric that
1954
+ # regresses past its per-metric tolerance. `bench-update` re-records the
1955
+ # current values as the new baseline.
1956
+ #
1957
+ # Run before pushing perf-sensitive changes; baselines.csv lives in the repo
1958
+ # so anyone can re-run on the same hardware and compare.
1959
+ bench: tinynn/libtinynn_ggml.a
1960
+ ruby bench/check.rb
1961
+
1962
+ bench-update: tinynn/libtinynn_ggml.a
1963
+ ruby bench/check.rb --update
1964
+
1965
+ bench-report: tinynn/libtinynn_ggml.a
1966
+ ruby bench/check.rb --report
1967
+
1968
+ # Routine comparison vs PyTorch — the "old-stable" yardstick — in the
1969
+ # single-machine single-GPU case. Runs ON gx10: toy CUDA benches run
1970
+ # native, the PyTorch reference (bench/ref_pytorch.py) runs in the
1971
+ # dev-pytorch container. Gates the toy/PyTorch *ratio* (not absolute
1972
+ # ms, which is machine-dependent) so a design change that quietly
1973
+ # widens the gap fails. Budget in bench/baselines_vs_pytorch.csv;
1974
+ # `--update` re-records it. Override the torch invocation with PT_CMD.
1975
+ bench-vs-pytorch: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1976
+ ruby bench/check_vs_pytorch.rb
1977
+
1978
+ bench-vs-pytorch-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1979
+ ruby bench/check_vs_pytorch.rb --update
1980
+
1981
+ bench-vs-pytorch-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1982
+ ruby bench/check_vs_pytorch.rb --report
1983
+
1984
+ # Heavy bench — ambitious workloads that exercise the libs (LoRA on
1985
+ # Qwen2.5-1.5B at seq=256, decode on Qwen2.5-7B-Q8 with KV_Q8+FLASH).
1986
+ # ~3-5 min wallclock; meant as a yardstick for choosing between
1987
+ # optimization strategies, not for every-commit gating.
1988
+ # bench-heavy — toy-only, fast iteration loop (no PyTorch)
1989
+ # bench-vs-pytorch-heavy — same workloads + PyTorch ratio gate
1990
+ bench-heavy: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1991
+ ruby bench/check_heavy.rb
1992
+
1993
+ bench-heavy-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1994
+ ruby bench/check_heavy.rb --update
1995
+
1996
+ bench-heavy-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
1997
+ ruby bench/check_heavy.rb --report
1998
+
1999
+ bench-vs-pytorch-heavy: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
2000
+ ruby bench/check_vs_pytorch.rb --heavy
2001
+
2002
+ bench-vs-pytorch-heavy-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
2003
+ ruby bench/check_vs_pytorch.rb --heavy --update
2004
+
2005
+ bench-vs-pytorch-heavy-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
2006
+ ruby bench/check_vs_pytorch.rb --heavy --report
2007
+
2008
+ .PHONY: all clean distclean setup-ggml setup-ggml-cuda setup-ggml-metal smoke \
2009
+ example_inference_metal \
2010
+ toy-infer-metal toy-eval-metal toy-train-metal gate-metal \
2011
+ ab-smoke ab-smoke-add ab-smoke-gelu ab-smoke-rms-norm \
2012
+ ab-smoke-softmax ab-smoke-transpose ab-smoke-scale ab-smoke-silu \
2013
+ ab-smoke-mul ab-smoke-pipeline ab-smoke-big ab-smoke-cuda \
2014
+ ab-smoke-all-cuda ab-smoke-big-cuda test \
2015
+ gpt2 smollm2 smollm2_kv smollm2_kv_cuda \
2016
+ tinyllama tinyllama_kv tinyllama_kv_cuda \
2017
+ train algorithm_cards \
2018
+ examples gen-mirrors verify-mirrors \
2019
+ bench bench-update bench-report check-cards \
2020
+ bench-vs-pytorch bench-vs-pytorch-update bench-vs-pytorch-report \
2021
+ bench-heavy bench-heavy-update bench-heavy-report \
2022
+ bench-vs-pytorch-heavy bench-vs-pytorch-heavy-update bench-vs-pytorch-heavy-report