toy 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +1124 -0
- data/LICENSE +21 -0
- data/Makefile +2022 -0
- data/README.md +154 -0
- data/bin/toy +10 -0
- data/lib/toy/compute.rb +135 -0
- data/lib/toy/compute_cuda.rb +104 -0
- data/lib/toy/compute_metal.rb +97 -0
- data/lib/toy/core/cli/describe.rb +188 -0
- data/lib/toy/core/cli/eval.rb +385 -0
- data/lib/toy/core/cli/exit_codes.rb +15 -0
- data/lib/toy/core/cli/fetch.rb +238 -0
- data/lib/toy/core/cli/infer.rb +268 -0
- data/lib/toy/core/cli/install.rb +228 -0
- data/lib/toy/core/cli/list.rb +86 -0
- data/lib/toy/core/cli/manifest.rb +49 -0
- data/lib/toy/core/cli/new.rb +594 -0
- data/lib/toy/core/cli/serve.rb +237 -0
- data/lib/toy/core/cli/train.rb +471 -0
- data/lib/toy/core/cli.rb +165 -0
- data/lib/toy/core/config.rb +64 -0
- data/lib/toy/core/gguf_meta.rb +161 -0
- data/lib/toy/core/model_scan.rb +221 -0
- data/lib/toy/core/run_log.rb +94 -0
- data/lib/toy/core/toy_root.rb +95 -0
- data/lib/toy/dev/toy_card.rb +299 -0
- data/lib/toy/dev/toy_describe_flow.rb +412 -0
- data/lib/toy/dev/toy_logprobs.rb +86 -0
- data/lib/toy/dev/toy_tap.rb +183 -0
- data/lib/toy/dev/toy_token_drift.rb +121 -0
- data/lib/toy/ffi/tinynn.rb +1491 -0
- data/lib/toy/ffi/tinynn_cuda.rb +1124 -0
- data/lib/toy/ffi/tinynn_metal.rb +359 -0
- data/lib/toy/ffi_manifest.rb +84 -0
- data/lib/toy/io/bpe.rb +325 -0
- data/lib/toy/io/gguf_kv.rb +35 -0
- data/lib/toy/io/gguf_load.rb +331 -0
- data/lib/toy/io/loaders/toy_gpt2_loader.rb +70 -0
- data/lib/toy/io/loaders/toy_smollm2_loader.rb +754 -0
- data/lib/toy/io/model_index.rb +206 -0
- data/lib/toy/io/run_bundle.rb +280 -0
- data/lib/toy/io/tokenizer.rb +613 -0
- data/lib/toy/io/toy_corpus_loader.rb +52 -0
- data/lib/toy/io/toy_events.rb +56 -0
- data/lib/toy/io/toy_image_loader.rb +48 -0
- data/lib/toy/llm/adamw.rb +169 -0
- data/lib/toy/llm/archs/llama_arch.rb +233 -0
- data/lib/toy/llm/archs/llama_arch_cuda.rb +237 -0
- data/lib/toy/llm/archs/llama_arch_metal.rb +237 -0
- data/lib/toy/llm/blocks/transformer_block.rb +876 -0
- data/lib/toy/llm/blocks/transformer_block_cuda.rb +880 -0
- data/lib/toy/llm/blocks/transformer_block_metal.rb +880 -0
- data/lib/toy/llm/classify_batch.rb +88 -0
- data/lib/toy/llm/engine/gpt2_fwd_engine.rb +360 -0
- data/lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb +362 -0
- data/lib/toy/llm/engine/gpt2_fwd_engine_metal.rb +362 -0
- data/lib/toy/llm/engine/gpt2_kv_engine.rb +346 -0
- data/lib/toy/llm/engine/gpt2_kv_engine_cuda.rb +348 -0
- data/lib/toy/llm/engine/gpt2_kv_engine_metal.rb +348 -0
- data/lib/toy/llm/engine/gpt2_seq_engine.rb +289 -0
- data/lib/toy/llm/engine/gpt2_seq_engine_cuda.rb +293 -0
- data/lib/toy/llm/engine/gpt2_seq_engine_metal.rb +293 -0
- data/lib/toy/llm/engine/llama_kv_engine.rb +1593 -0
- data/lib/toy/llm/engine/llama_kv_engine_cuda.rb +1526 -0
- data/lib/toy/llm/engine/llama_kv_engine_metal.rb +1526 -0
- data/lib/toy/llm/engine/llama_seq_engine.rb +1233 -0
- data/lib/toy/llm/engine/llama_seq_engine_cuda.rb +1238 -0
- data/lib/toy/llm/engine/llama_seq_engine_metal.rb +1238 -0
- data/lib/toy/llm/engine/vit_tiny_engine.rb +467 -0
- data/lib/toy/llm/labels.rb +142 -0
- data/lib/toy/llm/primitives/gqa.rb +62 -0
- data/lib/toy/llm/primitives/gqa_cuda.rb +66 -0
- data/lib/toy/llm/primitives/gqa_metal.rb +66 -0
- data/lib/toy/llm/primitives/rms_norm.rb +39 -0
- data/lib/toy/llm/primitives/rms_norm_cuda.rb +43 -0
- data/lib/toy/llm/primitives/rms_norm_metal.rb +43 -0
- data/lib/toy/llm/primitives/rope.rb +68 -0
- data/lib/toy/llm/primitives/rope_cuda.rb +72 -0
- data/lib/toy/llm/primitives/rope_metal.rb +72 -0
- data/lib/toy/llm/primitives/swiglu.rb +41 -0
- data/lib/toy/llm/primitives/swiglu_cuda.rb +45 -0
- data/lib/toy/llm/primitives/swiglu_metal.rb +45 -0
- data/lib/toy/llm/recipe_options.rb +71 -0
- data/lib/toy/llm/recipes/from_scratch.rb +105 -0
- data/lib/toy/llm/recipes/from_scratch_cuda.rb +109 -0
- data/lib/toy/llm/recipes/from_scratch_metal.rb +109 -0
- data/lib/toy/llm/recipes/lora.rb +110 -0
- data/lib/toy/llm/recipes/lora_cuda.rb +114 -0
- data/lib/toy/llm/recipes/lora_metal.rb +114 -0
- data/lib/toy/llm/recipes/vit_tiny.rb +75 -0
- data/lib/toy/llm/recipes/warm_start.rb +235 -0
- data/lib/toy/llm/recipes/warm_start_cuda.rb +239 -0
- data/lib/toy/llm/recipes/warm_start_metal.rb +239 -0
- data/lib/toy/llm/training_batch.rb +133 -0
- data/lib/toy/models/arch.rb +253 -0
- data/lib/toy/models/gpt2.rb +311 -0
- data/lib/toy/models/toy_gpt2.rb +177 -0
- data/lib/toy/models/toy_smollm2.rb +393 -0
- data/lib/toy/models/toy_vit.rb +83 -0
- data/lib/toy/models/transformer.rb +1494 -0
- data/lib/toy/models/transformer_lm.rb +298 -0
- data/lib/toy/models/transformer_lm_cuda.rb +159 -0
- data/lib/toy/models/transformer_lm_metal.rb +142 -0
- data/lib/toy/mri.rb +300 -0
- data/lib/toy/run/eval.rb +76 -0
- data/lib/toy/run/eval_cuda.rb +66 -0
- data/lib/toy/run/eval_lmc.rb +334 -0
- data/lib/toy/run/eval_metal.rb +67 -0
- data/lib/toy/run/infer.rb +130 -0
- data/lib/toy/run/infer_cuda.rb +118 -0
- data/lib/toy/run/infer_metal.rb +119 -0
- data/lib/toy/run/infer_trace.rb +37 -0
- data/lib/toy/run/serve.rb +144 -0
- data/lib/toy/run/train.rb +404 -0
- data/lib/toy/run/train_cuda.rb +397 -0
- data/lib/toy/run/train_gpt2.rb +103 -0
- data/lib/toy/run/train_gpt2_cuda.rb +85 -0
- data/lib/toy/run/train_gpt2_metal.rb +85 -0
- data/lib/toy/run/train_lora.rb +207 -0
- data/lib/toy/run/train_lora_cuda.rb +219 -0
- data/lib/toy/run/train_metal.rb +227 -0
- data/lib/toy/run/train_vit.rb +251 -0
- data/lib/toy/serve/openai/embeddings_handler.rb +92 -0
- data/lib/toy/serve/openai/handlers.rb +143 -0
- data/lib/toy/serve/openai/server.rb +159 -0
- data/lib/toy/train/sampler.rb +314 -0
- data/lib/toy/train/toy_chat_template.rb +179 -0
- data/lib/toy/train/toy_drift_grad.rb +176 -0
- data/lib/toy/train/toy_gguf_fuse.rb +428 -0
- data/lib/toy/train/toy_gguf_writer.rb +100 -0
- data/lib/toy/train/toy_lr_schedule.rb +39 -0
- data/lib/toy/train/toy_sample.rb +125 -0
- data/lib/toy/train/toy_trainer.rb +86 -0
- data/lib/toy/train/training.rb +160 -0
- data/lib/toy/version.rb +11 -0
- data/lib/toy.rb +902 -0
- data/prep/progress +118 -0
- data/prep/quietly +64 -0
- data/sig/toy.rbs +397 -0
- data/sig/toy_compute.rbs +450 -0
- data/spinel-ext.json +122 -0
- data/tinynn/Makefile +71 -0
- data/tinynn/tinynn_backend_cuda.c +99 -0
- data/tinynn/tinynn_backend_metal.m +75 -0
- data/tinynn/tinynn_events.c +122 -0
- data/tinynn/tinynn_events.h +83 -0
- data/tinynn/tinynn_ggml.c +2460 -0
- data/tinynn/tinynn_ggml.h +545 -0
- data/tinynn/tinynn_gguf.c +783 -0
- data/tinynn/tinynn_gguf.h +167 -0
- data/tinynn/tinynn_trace.c +180 -0
- data/tinynn/tinynn_trace.h +85 -0
- data/vendor/ggml/AUTHORS +335 -0
- data/vendor/ggml/CMakeLists.txt +505 -0
- data/vendor/ggml/CONTRIBUTING.md +3 -0
- data/vendor/ggml/LICENSE +21 -0
- data/vendor/ggml/README.md +50 -0
- data/vendor/ggml/ci/run.sh +395 -0
- data/vendor/ggml/cmake/FindNCCL.cmake +36 -0
- data/vendor/ggml/cmake/GitVars.cmake +22 -0
- data/vendor/ggml/cmake/common.cmake +50 -0
- data/vendor/ggml/cmake/ggml-config.cmake.in +191 -0
- data/vendor/ggml/docs/gguf.md +828 -0
- data/vendor/ggml/examples/CMakeLists.txt +34 -0
- data/vendor/ggml/examples/common-ggml.cpp +244 -0
- data/vendor/ggml/examples/common-ggml.h +18 -0
- data/vendor/ggml/examples/common.cpp +675 -0
- data/vendor/ggml/examples/common.h +322 -0
- data/vendor/ggml/examples/gpt-2/CMakeLists.txt +32 -0
- data/vendor/ggml/examples/gpt-2/README.md +225 -0
- data/vendor/ggml/examples/gpt-2/convert-cerebras-to-ggml.py +183 -0
- data/vendor/ggml/examples/gpt-2/convert-ckpt-to-ggml.py +159 -0
- data/vendor/ggml/examples/gpt-2/convert-h5-to-ggml.py +195 -0
- data/vendor/ggml/examples/gpt-2/download-ggml-model.sh +69 -0
- data/vendor/ggml/examples/gpt-2/download-model.sh +48 -0
- data/vendor/ggml/examples/gpt-2/main-alloc.cpp +880 -0
- data/vendor/ggml/examples/gpt-2/main-backend.cpp +946 -0
- data/vendor/ggml/examples/gpt-2/main-batched.cpp +1210 -0
- data/vendor/ggml/examples/gpt-2/main-ctx.cpp +840 -0
- data/vendor/ggml/examples/gpt-2/main-sched.cpp +1079 -0
- data/vendor/ggml/examples/gpt-2/quantize.cpp +184 -0
- data/vendor/ggml/examples/gpt-j/CMakeLists.txt +13 -0
- data/vendor/ggml/examples/gpt-j/README.md +239 -0
- data/vendor/ggml/examples/gpt-j/convert-h5-to-ggml.py +173 -0
- data/vendor/ggml/examples/gpt-j/download-ggml-model.sh +69 -0
- data/vendor/ggml/examples/gpt-j/download-model.sh +11 -0
- data/vendor/ggml/examples/gpt-j/main.cpp +755 -0
- data/vendor/ggml/examples/gpt-j/quantize.cpp +182 -0
- data/vendor/ggml/examples/magika/CMakeLists.txt +17 -0
- data/vendor/ggml/examples/magika/README.md +23 -0
- data/vendor/ggml/examples/magika/convert.py +32 -0
- data/vendor/ggml/examples/magika/main.cpp +374 -0
- data/vendor/ggml/examples/mnist/CMakeLists.txt +58 -0
- data/vendor/ggml/examples/mnist/README.md +206 -0
- data/vendor/ggml/examples/mnist/mnist-common.cpp +496 -0
- data/vendor/ggml/examples/mnist/mnist-common.h +166 -0
- data/vendor/ggml/examples/mnist/mnist-eval.cpp +67 -0
- data/vendor/ggml/examples/mnist/mnist-train-cnn.py +91 -0
- data/vendor/ggml/examples/mnist/mnist-train-fc.py +131 -0
- data/vendor/ggml/examples/mnist/mnist-train.cpp +39 -0
- data/vendor/ggml/examples/mnist/server.py +36 -0
- data/vendor/ggml/examples/mnist/web/index.html +178 -0
- data/vendor/ggml/examples/perf-metal/CMakeLists.txt +7 -0
- data/vendor/ggml/examples/perf-metal/perf-metal.cpp +152 -0
- data/vendor/ggml/examples/prompts/dolly-v2.txt +100 -0
- data/vendor/ggml/examples/prompts/gpt-2-chinese.txt +1 -0
- data/vendor/ggml/examples/prompts/gpt-2.txt +100 -0
- data/vendor/ggml/examples/prompts/gpt-j.txt +100 -0
- data/vendor/ggml/examples/prompts/gpt-neox-japanese.txt +1 -0
- data/vendor/ggml/examples/prompts/gpt-neox.txt +100 -0
- data/vendor/ggml/examples/prompts/polyglot-ko.txt +3 -0
- data/vendor/ggml/examples/prompts/replit.txt +100 -0
- data/vendor/ggml/examples/prompts/starcoder.txt +100 -0
- data/vendor/ggml/examples/prompts/test-cases.txt +110 -0
- data/vendor/ggml/examples/prompts/tokenize_huggingface.py +65 -0
- data/vendor/ggml/examples/prompts/whisper.txt +100 -0
- data/vendor/ggml/examples/python/README.md +115 -0
- data/vendor/ggml/examples/python/api.h +14 -0
- data/vendor/ggml/examples/python/example_add_quant.py +25 -0
- data/vendor/ggml/examples/python/example_test_all_quants.py +68 -0
- data/vendor/ggml/examples/python/ggml/__init__.py +58 -0
- data/vendor/ggml/examples/python/ggml/__init__.pyi +2406 -0
- data/vendor/ggml/examples/python/ggml/cffi.py +11 -0
- data/vendor/ggml/examples/python/ggml/ffi/__init__.pyi +7 -0
- data/vendor/ggml/examples/python/ggml/utils.py +182 -0
- data/vendor/ggml/examples/python/regenerate.py +42 -0
- data/vendor/ggml/examples/python/stubs.py +128 -0
- data/vendor/ggml/examples/python/test_tensor.py +258 -0
- data/vendor/ggml/examples/sam/CMakeLists.txt +13 -0
- data/vendor/ggml/examples/sam/README.md +95 -0
- data/vendor/ggml/examples/sam/convert-pth-to-ggml.py +147 -0
- data/vendor/ggml/examples/sam/example.jpg +0 -0
- data/vendor/ggml/examples/sam/sam.cpp +2370 -0
- data/vendor/ggml/examples/simple/CMakeLists.txt +21 -0
- data/vendor/ggml/examples/simple/README.md +61 -0
- data/vendor/ggml/examples/simple/simple-backend.cpp +153 -0
- data/vendor/ggml/examples/simple/simple-ctx.cpp +127 -0
- data/vendor/ggml/examples/stb_image.h +7987 -0
- data/vendor/ggml/examples/stb_image_write.h +1724 -0
- data/vendor/ggml/examples/test-cmake/CMakeLists.txt +10 -0
- data/vendor/ggml/examples/test-cmake/README.md +3 -0
- data/vendor/ggml/examples/test-cmake/test-cmake.cpp +6 -0
- data/vendor/ggml/examples/yolo/CMakeLists.txt +6 -0
- data/vendor/ggml/examples/yolo/README.md +59 -0
- data/vendor/ggml/examples/yolo/convert-yolov3-tiny.py +53 -0
- data/vendor/ggml/examples/yolo/data/coco.names +80 -0
- data/vendor/ggml/examples/yolo/data/labels/100_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/100_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/101_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/102_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/103_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/104_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/105_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/106_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/107_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/108_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/109_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/110_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/111_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/112_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/113_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/114_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/115_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/116_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/117_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/118_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/119_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/120_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/121_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/122_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/123_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/124_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/125_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/126_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/32_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/33_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/34_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/35_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/36_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/37_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/38_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/39_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/40_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/41_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/42_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/43_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/44_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/45_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/46_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/47_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/48_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/49_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/50_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/51_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/52_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/53_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/54_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/55_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/56_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/57_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/58_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/59_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/60_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/61_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/62_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/63_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/64_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/65_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/66_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/67_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/68_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/69_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/70_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/71_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/72_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/73_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/74_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/75_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/76_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/77_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/78_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/79_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/80_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/81_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/82_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/83_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/84_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/85_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/86_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/87_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/88_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/89_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/90_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/91_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/92_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/93_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/94_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/95_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/96_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/97_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/98_7.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_0.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_1.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_2.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_3.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_4.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_5.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_6.png +0 -0
- data/vendor/ggml/examples/yolo/data/labels/99_7.png +0 -0
- data/vendor/ggml/examples/yolo/yolo-image.cpp +210 -0
- data/vendor/ggml/examples/yolo/yolo-image.h +39 -0
- data/vendor/ggml/examples/yolo/yolov3-tiny.cpp +661 -0
- data/vendor/ggml/ggml.pc.in +10 -0
- data/vendor/ggml/include/ggml-alloc.h +85 -0
- data/vendor/ggml/include/ggml-backend.h +431 -0
- data/vendor/ggml/include/ggml-blas.h +25 -0
- data/vendor/ggml/include/ggml-cann.h +123 -0
- data/vendor/ggml/include/ggml-cpp.h +39 -0
- data/vendor/ggml/include/ggml-cpu.h +151 -0
- data/vendor/ggml/include/ggml-cuda.h +50 -0
- data/vendor/ggml/include/ggml-hexagon.h +19 -0
- data/vendor/ggml/include/ggml-metal.h +61 -0
- data/vendor/ggml/include/ggml-opencl.h +26 -0
- data/vendor/ggml/include/ggml-openvino.h +37 -0
- data/vendor/ggml/include/ggml-opt.h +256 -0
- data/vendor/ggml/include/ggml-rpc.h +35 -0
- data/vendor/ggml/include/ggml-sycl.h +49 -0
- data/vendor/ggml/include/ggml-virtgpu.h +14 -0
- data/vendor/ggml/include/ggml-vulkan.h +29 -0
- data/vendor/ggml/include/ggml-webgpu.h +19 -0
- data/vendor/ggml/include/ggml-zdnn.h +17 -0
- data/vendor/ggml/include/ggml-zendnn.h +22 -0
- data/vendor/ggml/include/ggml.h +2845 -0
- data/vendor/ggml/include/gguf.h +204 -0
- data/vendor/ggml/requirements.txt +12 -0
- data/vendor/ggml/scripts/gen-authors.sh +9 -0
- data/vendor/ggml/scripts/release.sh +296 -0
- data/vendor/ggml/scripts/sync-llama-am.sh +167 -0
- data/vendor/ggml/scripts/sync-llama.last +1 -0
- data/vendor/ggml/scripts/sync-llama.sh +21 -0
- data/vendor/ggml/scripts/sync-whisper-am.sh +138 -0
- data/vendor/ggml/scripts/sync-whisper.last +1 -0
- data/vendor/ggml/scripts/sync-whisper.sh +17 -0
- data/vendor/ggml/src/CMakeLists.txt +493 -0
- data/vendor/ggml/src/ggml-alloc.c +1248 -0
- data/vendor/ggml/src/ggml-backend-dl.cpp +48 -0
- data/vendor/ggml/src/ggml-backend-dl.h +45 -0
- data/vendor/ggml/src/ggml-backend-impl.h +275 -0
- data/vendor/ggml/src/ggml-backend-meta.cpp +2144 -0
- data/vendor/ggml/src/ggml-backend-reg.cpp +586 -0
- data/vendor/ggml/src/ggml-backend.cpp +2371 -0
- data/vendor/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- data/vendor/ggml/src/ggml-blas/ggml-blas.cpp +522 -0
- data/vendor/ggml/src/ggml-cann/CMakeLists.txt +89 -0
- data/vendor/ggml/src/ggml-cann/acl_tensor.cpp +195 -0
- data/vendor/ggml/src/ggml-cann/acl_tensor.h +349 -0
- data/vendor/ggml/src/ggml-cann/aclnn_ops.cpp +4436 -0
- data/vendor/ggml/src/ggml-cann/aclnn_ops.h +1190 -0
- data/vendor/ggml/src/ggml-cann/common.h +651 -0
- data/vendor/ggml/src/ggml-cann/ggml-cann.cpp +3062 -0
- data/vendor/ggml/src/ggml-common.h +1900 -0
- data/vendor/ggml/src/ggml-cpu/CMakeLists.txt +731 -0
- data/vendor/ggml/src/ggml-cpu/amx/amx.cpp +249 -0
- data/vendor/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/vendor/ggml/src/ggml-cpu/amx/common.h +115 -0
- data/vendor/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- data/vendor/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/vendor/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- data/vendor/ggml/src/ggml-cpu/arch/arm/quants.c +4245 -0
- data/vendor/ggml/src/ggml-cpu/arch/arm/repack.cpp +5156 -0
- data/vendor/ggml/src/ggml-cpu/arch/loongarch/quants.c +2158 -0
- data/vendor/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/vendor/ggml/src/ggml-cpu/arch/powerpc/quants.c +2304 -0
- data/vendor/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/vendor/ggml/src/ggml-cpu/arch/riscv/quants.c +4553 -0
- data/vendor/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1703 -0
- data/vendor/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/vendor/ggml/src/ggml-cpu/arch/s390/quants.c +1465 -0
- data/vendor/ggml/src/ggml-cpu/arch/wasm/quants.c +1220 -0
- data/vendor/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- data/vendor/ggml/src/ggml-cpu/arch/x86/quants.c +3970 -0
- data/vendor/ggml/src/ggml-cpu/arch/x86/repack.cpp +6407 -0
- data/vendor/ggml/src/ggml-cpu/arch-fallback.h +348 -0
- data/vendor/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- data/vendor/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/vendor/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/vendor/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
- data/vendor/ggml/src/ggml-cpu/common.h +95 -0
- data/vendor/ggml/src/ggml-cpu/ggml-cpu-impl.h +539 -0
- data/vendor/ggml/src/ggml-cpu/ggml-cpu.c +3835 -0
- data/vendor/ggml/src/ggml-cpu/ggml-cpu.cpp +703 -0
- data/vendor/ggml/src/ggml-cpu/hbm.cpp +55 -0
- data/vendor/ggml/src/ggml-cpu/hbm.h +8 -0
- data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.cpp +939 -0
- data/vendor/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1513 -0
- data/vendor/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4051 -0
- data/vendor/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- data/vendor/ggml/src/ggml-cpu/ops.cpp +11373 -0
- data/vendor/ggml/src/ggml-cpu/ops.h +119 -0
- data/vendor/ggml/src/ggml-cpu/quants.c +1288 -0
- data/vendor/ggml/src/ggml-cpu/quants.h +103 -0
- data/vendor/ggml/src/ggml-cpu/repack.cpp +4836 -0
- data/vendor/ggml/src/ggml-cpu/repack.h +245 -0
- data/vendor/ggml/src/ggml-cpu/simd-gemm.h +226 -0
- data/vendor/ggml/src/ggml-cpu/simd-mappings.h +1319 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime.cpp +1740 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime.h +21 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +1027 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/ime_kernels.h +189 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
- data/vendor/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
- data/vendor/ggml/src/ggml-cpu/traits.cpp +36 -0
- data/vendor/ggml/src/ggml-cpu/traits.h +38 -0
- data/vendor/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- data/vendor/ggml/src/ggml-cpu/unary-ops.h +35 -0
- data/vendor/ggml/src/ggml-cpu/vec.cpp +629 -0
- data/vendor/ggml/src/ggml-cpu/vec.h +1588 -0
- data/vendor/ggml/src/ggml-cuda/CMakeLists.txt +268 -0
- data/vendor/ggml/src/ggml-cuda/acc.cu +61 -0
- data/vendor/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/vendor/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/allreduce.cu +971 -0
- data/vendor/ggml/src/ggml-cuda/allreduce.cuh +29 -0
- data/vendor/ggml/src/ggml-cuda/arange.cu +34 -0
- data/vendor/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/vendor/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/argsort.cu +266 -0
- data/vendor/ggml/src/ggml-cuda/argsort.cuh +19 -0
- data/vendor/ggml/src/ggml-cuda/binbcast.cu +534 -0
- data/vendor/ggml/src/ggml-cuda/binbcast.cuh +12 -0
- data/vendor/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/vendor/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/common.cuh +1489 -0
- data/vendor/ggml/src/ggml-cuda/concat.cu +204 -0
- data/vendor/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cu +86 -0
- data/vendor/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/vendor/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cu +115 -0
- data/vendor/ggml/src/ggml-cuda/conv2d-transpose.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/vendor/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/convert.cu +892 -0
- data/vendor/ggml/src/ggml-cuda/convert.cuh +66 -0
- data/vendor/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/vendor/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/vendor/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/vendor/ggml/src/ggml-cuda/cpy.cu +558 -0
- data/vendor/ggml/src/ggml-cuda/cpy.cuh +7 -0
- data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cu +177 -0
- data/vendor/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/vendor/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/vendor/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/dequantize.cuh +99 -0
- data/vendor/ggml/src/ggml-cuda/diag.cu +77 -0
- data/vendor/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/vendor/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/fattn-common.cuh +1212 -0
- data/vendor/ggml/src/ggml-cuda/fattn-mma-f16.cuh +2020 -0
- data/vendor/ggml/src/ggml-cuda/fattn-tile.cu +61 -0
- data/vendor/ggml/src/ggml-cuda/fattn-tile.cuh +1347 -0
- data/vendor/ggml/src/ggml-cuda/fattn-vec.cuh +600 -0
- data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cu +696 -0
- data/vendor/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +51 -0
- data/vendor/ggml/src/ggml-cuda/fattn.cu +562 -0
- data/vendor/ggml/src/ggml-cuda/fattn.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/fill.cu +37 -0
- data/vendor/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/gated_delta_net.cu +311 -0
- data/vendor/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/vendor/ggml/src/ggml-cuda/getrows.cu +300 -0
- data/vendor/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/vendor/ggml/src/ggml-cuda/ggml-cuda.cu +5684 -0
- data/vendor/ggml/src/ggml-cuda/gla.cu +93 -0
- data/vendor/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/im2col.cu +267 -0
- data/vendor/ggml/src/ggml-cuda/im2col.cuh +6 -0
- data/vendor/ggml/src/ggml-cuda/mean.cu +75 -0
- data/vendor/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/mma.cuh +1456 -0
- data/vendor/ggml/src/ggml-cuda/mmf.cu +191 -0
- data/vendor/ggml/src/ggml-cuda/mmf.cuh +908 -0
- data/vendor/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/vendor/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/mmq.cu +372 -0
- data/vendor/ggml/src/ggml-cuda/mmq.cuh +4176 -0
- data/vendor/ggml/src/ggml-cuda/mmvf.cu +862 -0
- data/vendor/ggml/src/ggml-cuda/mmvf.cuh +14 -0
- data/vendor/ggml/src/ggml-cuda/mmvq.cu +1161 -0
- data/vendor/ggml/src/ggml-cuda/mmvq.cuh +16 -0
- data/vendor/ggml/src/ggml-cuda/norm.cu +672 -0
- data/vendor/ggml/src/ggml-cuda/norm.cuh +18 -0
- data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/vendor/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/vendor/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/out-prod.cu +84 -0
- data/vendor/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/pad.cu +106 -0
- data/vendor/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/vendor/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/vendor/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/quantize.cu +443 -0
- data/vendor/ggml/src/ggml-cuda/quantize.cuh +41 -0
- data/vendor/ggml/src/ggml-cuda/reduce_rows.cuh +39 -0
- data/vendor/ggml/src/ggml-cuda/roll.cu +67 -0
- data/vendor/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/rope.cu +665 -0
- data/vendor/ggml/src/ggml-cuda/rope.cuh +9 -0
- data/vendor/ggml/src/ggml-cuda/scale.cu +34 -0
- data/vendor/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/set-rows.cu +330 -0
- data/vendor/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/vendor/ggml/src/ggml-cuda/set.cu +39 -0
- data/vendor/ggml/src/ggml-cuda/set.cuh +7 -0
- data/vendor/ggml/src/ggml-cuda/snake.cu +72 -0
- data/vendor/ggml/src/ggml-cuda/snake.cuh +8 -0
- data/vendor/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/vendor/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/softmax.cu +472 -0
- data/vendor/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/vendor/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/vendor/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/ssm-conv.cu +197 -0
- data/vendor/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/ssm-scan.cu +342 -0
- data/vendor/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/sum.cu +41 -0
- data/vendor/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/sumrows.cu +43 -0
- data/vendor/ggml/src/ggml-cuda/sumrows.cuh +4 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +6 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +6 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +6 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +6 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +6 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +12 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +110 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/vendor/ggml/src/ggml-cuda/top-k.cu +95 -0
- data/vendor/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/vendor/ggml/src/ggml-cuda/topk-moe.cu +415 -0
- data/vendor/ggml/src/ggml-cuda/topk-moe.cuh +27 -0
- data/vendor/ggml/src/ggml-cuda/tri.cu +136 -0
- data/vendor/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/vendor/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/unary.cu +640 -0
- data/vendor/ggml/src/ggml-cuda/unary.cuh +114 -0
- data/vendor/ggml/src/ggml-cuda/upscale.cu +293 -0
- data/vendor/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/vendor/ggml/src/ggml-cuda/vecdotq.cuh +1317 -0
- data/vendor/ggml/src/ggml-cuda/vendors/cuda.h +28 -0
- data/vendor/ggml/src/ggml-cuda/vendors/hip.h +304 -0
- data/vendor/ggml/src/ggml-cuda/vendors/musa.h +150 -0
- data/vendor/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/vendor/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/vendor/ggml/src/ggml-hexagon/CMakeLists.txt +118 -0
- data/vendor/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3680 -0
- data/vendor/ggml/src/ggml-hexagon/htp/CMakeLists.txt +78 -0
- data/vendor/ggml/src/ggml-hexagon/htp/act-ops.c +782 -0
- data/vendor/ggml/src/ggml-hexagon/htp/argsort-ops.c +293 -0
- data/vendor/ggml/src/ggml-hexagon/htp/binary-ops.c +872 -0
- data/vendor/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/vendor/ggml/src/ggml-hexagon/htp/cpy-ops.c +275 -0
- data/vendor/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
- data/vendor/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
- data/vendor/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
- data/vendor/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +727 -0
- data/vendor/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +955 -0
- data/vendor/ggml/src/ggml-hexagon/htp/get-rows-ops.c +124 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hex-dma.h +372 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hex-dump.h +86 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hex-utils.h +137 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1841 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +1785 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-ops.h +71 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
- data/vendor/ggml/src/ggml-hexagon/htp/htp-ctx.h +111 -0
- data/vendor/ggml/src/ggml-hexagon/htp/htp-ops.h +181 -0
- data/vendor/ggml/src/ggml-hexagon/htp/htp_iface.idl +22 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-base.h +308 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-copy.h +262 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-div.h +291 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-exp.h +216 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +142 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/vendor/ggml/src/ggml-hexagon/htp/hvx-utils.h +19 -0
- data/vendor/ggml/src/ggml-hexagon/htp/main.c +880 -0
- data/vendor/ggml/src/ggml-hexagon/htp/matmul-ops.c +3173 -0
- data/vendor/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
- data/vendor/ggml/src/ggml-hexagon/htp/rope-ops.c +494 -0
- data/vendor/ggml/src/ggml-hexagon/htp/set-rows-ops.c +184 -0
- data/vendor/ggml/src/ggml-hexagon/htp/softmax-ops.c +407 -0
- data/vendor/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
- data/vendor/ggml/src/ggml-hexagon/htp/ssm-conv.c +340 -0
- data/vendor/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/vendor/ggml/src/ggml-hexagon/htp/unary-ops.c +657 -0
- data/vendor/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
- data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- data/vendor/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/vendor/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/vendor/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/vendor/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/vendor/ggml/src/ggml-hexagon/libggml-htp.inf +40 -0
- data/vendor/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/vendor/ggml/src/ggml-hip/CMakeLists.txt +157 -0
- data/vendor/ggml/src/ggml-impl.h +783 -0
- data/vendor/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-context.m +739 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-device.cpp +2053 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-device.h +296 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-device.m +1829 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-impl.h +1175 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-ops.cpp +4606 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal-ops.h +97 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal.cpp +950 -0
- data/vendor/ggml/src/ggml-metal/ggml-metal.metal +10679 -0
- data/vendor/ggml/src/ggml-musa/CMakeLists.txt +124 -0
- data/vendor/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/vendor/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/vendor/ggml/src/ggml-opencl/CMakeLists.txt +189 -0
- data/vendor/ggml/src/ggml-opencl/ggml-opencl.cpp +16374 -0
- data/vendor/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- data/vendor/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/vendor/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/vendor/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/vendor/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- data/vendor/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/vendor/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/vendor/ggml/src/ggml-opencl/kernels/cpy.cl +229 -0
- data/vendor/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/vendor/ggml/src/ggml-opencl/kernels/cvt.cl +1471 -0
- data/vendor/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/vendor/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/vendor/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/vendor/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/vendor/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/vendor/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- data/vendor/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- data/vendor/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +302 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +252 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +254 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +256 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +258 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl +139 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl +129 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +161 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +116 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +119 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +119 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +121 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl +274 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl +268 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
- data/vendor/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl +195 -0
- data/vendor/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- data/vendor/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/vendor/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/vendor/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/vendor/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/vendor/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- data/vendor/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
- data/vendor/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/vendor/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/vendor/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/vendor/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- data/vendor/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- data/vendor/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/vendor/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- data/vendor/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- data/vendor/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- data/vendor/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- data/vendor/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- data/vendor/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/vendor/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- data/vendor/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- data/vendor/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- data/vendor/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- data/vendor/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- data/vendor/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/vendor/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/vendor/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/vendor/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/vendor/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/vendor/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- data/vendor/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- data/vendor/ggml/src/ggml-opencl/kernels/transpose.cl +143 -0
- data/vendor/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/vendor/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/vendor/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/vendor/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/vendor/ggml/src/ggml-openvino/ggml-decoder.cpp +985 -0
- data/vendor/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +380 -0
- data/vendor/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/vendor/ggml/src/ggml-openvino/ggml-openvino.cpp +1132 -0
- data/vendor/ggml/src/ggml-openvino/ggml-quants.cpp +956 -0
- data/vendor/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/vendor/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/vendor/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/vendor/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/vendor/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/vendor/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/vendor/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/rope.cpp +149 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op_table.cpp +47 -0
- data/vendor/ggml/src/ggml-openvino/openvino/op_table.h +40 -0
- data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/vendor/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/vendor/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/vendor/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/vendor/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
- data/vendor/ggml/src/ggml-openvino/openvino/translate_session.cpp +317 -0
- data/vendor/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/vendor/ggml/src/ggml-openvino/openvino/utils.cpp +257 -0
- data/vendor/ggml/src/ggml-openvino/openvino/utils.h +86 -0
- data/vendor/ggml/src/ggml-openvino/utils.cpp +880 -0
- data/vendor/ggml/src/ggml-openvino/utils.h +143 -0
- data/vendor/ggml/src/ggml-opt.cpp +1094 -0
- data/vendor/ggml/src/ggml-quants.c +5491 -0
- data/vendor/ggml/src/ggml-quants.h +112 -0
- data/vendor/ggml/src/ggml-rpc/CMakeLists.txt +33 -0
- data/vendor/ggml/src/ggml-rpc/ggml-rpc.cpp +1974 -0
- data/vendor/ggml/src/ggml-rpc/transport.cpp +683 -0
- data/vendor/ggml/src/ggml-rpc/transport.h +34 -0
- data/vendor/ggml/src/ggml-sycl/CMakeLists.txt +207 -0
- data/vendor/ggml/src/ggml-sycl/add-id.cpp +81 -0
- data/vendor/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/vendor/ggml/src/ggml-sycl/backend.hpp +48 -0
- data/vendor/ggml/src/ggml-sycl/binbcast.cpp +346 -0
- data/vendor/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/vendor/ggml/src/ggml-sycl/common.cpp +155 -0
- data/vendor/ggml/src/ggml-sycl/common.hpp +1002 -0
- data/vendor/ggml/src/ggml-sycl/concat.cpp +202 -0
- data/vendor/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/vendor/ggml/src/ggml-sycl/conv.cpp +101 -0
- data/vendor/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/vendor/ggml/src/ggml-sycl/convert.cpp +825 -0
- data/vendor/ggml/src/ggml-sycl/convert.hpp +64 -0
- data/vendor/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/vendor/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/vendor/ggml/src/ggml-sycl/cpy.cpp +602 -0
- data/vendor/ggml/src/ggml-sycl/cpy.hpp +223 -0
- data/vendor/ggml/src/ggml-sycl/cumsum.cpp +148 -0
- data/vendor/ggml/src/ggml-sycl/cumsum.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/dequantize.hpp +975 -0
- data/vendor/ggml/src/ggml-sycl/diag.cpp +67 -0
- data/vendor/ggml/src/ggml-sycl/diag.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/dmmv.cpp +1579 -0
- data/vendor/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/vendor/ggml/src/ggml-sycl/dpct/helper.hpp +3774 -0
- data/vendor/ggml/src/ggml-sycl/element_wise.cpp +1124 -0
- data/vendor/ggml/src/ggml-sycl/element_wise.hpp +94 -0
- data/vendor/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
- data/vendor/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
- data/vendor/ggml/src/ggml-sycl/fattn-common.hpp +1181 -0
- data/vendor/ggml/src/ggml-sycl/fattn-tile.cpp +59 -0
- data/vendor/ggml/src/ggml-sycl/fattn-tile.hpp +1246 -0
- data/vendor/ggml/src/ggml-sycl/fattn-vec.hpp +674 -0
- data/vendor/ggml/src/ggml-sycl/fattn.cpp +227 -0
- data/vendor/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/vendor/ggml/src/ggml-sycl/fill.cpp +55 -0
- data/vendor/ggml/src/ggml-sycl/fill.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/gated_delta_net.cpp +307 -0
- data/vendor/ggml/src/ggml-sycl/gated_delta_net.hpp +9 -0
- data/vendor/ggml/src/ggml-sycl/gemm.hpp +93 -0
- data/vendor/ggml/src/ggml-sycl/getrows.cpp +219 -0
- data/vendor/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/vendor/ggml/src/ggml-sycl/ggml-sycl.cpp +5520 -0
- data/vendor/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/vendor/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/vendor/ggml/src/ggml-sycl/im2col.cpp +400 -0
- data/vendor/ggml/src/ggml-sycl/im2col.hpp +23 -0
- data/vendor/ggml/src/ggml-sycl/mmq.cpp +3030 -0
- data/vendor/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/vendor/ggml/src/ggml-sycl/mmvq.cpp +1380 -0
- data/vendor/ggml/src/ggml-sycl/mmvq.hpp +43 -0
- data/vendor/ggml/src/ggml-sycl/norm.cpp +656 -0
- data/vendor/ggml/src/ggml-sycl/norm.hpp +28 -0
- data/vendor/ggml/src/ggml-sycl/outprod.cpp +47 -0
- data/vendor/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/vendor/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/vendor/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/vendor/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/vendor/ggml/src/ggml-sycl/presets.hpp +79 -0
- data/vendor/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/vendor/ggml/src/ggml-sycl/quants.hpp +156 -0
- data/vendor/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/vendor/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/vendor/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/vendor/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/vendor/ggml/src/ggml-sycl/rope.cpp +641 -0
- data/vendor/ggml/src/ggml-sycl/rope.hpp +26 -0
- data/vendor/ggml/src/ggml-sycl/set.cpp +73 -0
- data/vendor/ggml/src/ggml-sycl/set.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/set_rows.cpp +240 -0
- data/vendor/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/vendor/ggml/src/ggml-sycl/softmax.cpp +426 -0
- data/vendor/ggml/src/ggml-sycl/softmax.hpp +24 -0
- data/vendor/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
- data/vendor/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
- data/vendor/ggml/src/ggml-sycl/ssm_conv.cpp +132 -0
- data/vendor/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
- data/vendor/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
- data/vendor/ggml/src/ggml-sycl/sycl_hw.cpp +67 -0
- data/vendor/ggml/src/ggml-sycl/sycl_hw.hpp +38 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +8 -0
- data/vendor/ggml/src/ggml-sycl/tsembd.cpp +73 -0
- data/vendor/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/vendor/ggml/src/ggml-sycl/type.hpp +112 -0
- data/vendor/ggml/src/ggml-sycl/upscale.cpp +410 -0
- data/vendor/ggml/src/ggml-sycl/upscale.hpp +9 -0
- data/vendor/ggml/src/ggml-sycl/vecdotq.hpp +1508 -0
- data/vendor/ggml/src/ggml-sycl/wkv.cpp +293 -0
- data/vendor/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/vendor/ggml/src/ggml-threading.cpp +12 -0
- data/vendor/ggml/src/ggml-threading.h +14 -0
- data/vendor/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/vendor/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/vendor/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +123 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +160 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-backend.cpp +71 -0
- data/vendor/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/vendor/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/vendor/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/vendor/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +99 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu.cpp +545 -0
- data/vendor/ggml/src/ggml-virtgpu/virtgpu.h +115 -0
- data/vendor/ggml/src/ggml-vulkan/CMakeLists.txt +220 -0
- data/vendor/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/vendor/ggml/src/ggml-vulkan/ggml-vulkan.cpp +17208 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +320 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +653 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +768 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +28 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +756 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +255 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +626 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +427 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +123 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +190 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +65 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +28 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +39 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +93 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +124 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +134 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +503 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +600 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +93 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +194 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +127 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +19 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +31 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +42 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1846 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1183 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/vendor/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/vendor/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- data/vendor/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +3231 -0
- data/vendor/ggml/src/ggml-webgpu/ggml-webgpu.cpp +4461 -0
- data/vendor/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +139 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +905 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +81 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +89 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +706 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +351 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +720 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +132 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +773 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +747 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +1210 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +55 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +149 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +200 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +133 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1433 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/rope.wgsl +224 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.wgsl +245 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +210 -0
- data/vendor/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
- data/vendor/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/vendor/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/vendor/ggml/src/ggml-zdnn/ggml-zdnn.cpp +637 -0
- data/vendor/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/vendor/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/vendor/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/vendor/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/vendor/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
- data/vendor/ggml/src/ggml-zendnn/ggml-zendnn.cpp +669 -0
- data/vendor/ggml/src/ggml.c +7777 -0
- data/vendor/ggml/src/ggml.cpp +26 -0
- data/vendor/ggml/src/gguf.cpp +1556 -0
- data/vendor/ggml/tests/CMakeLists.txt +356 -0
- data/vendor/ggml/tests/test-arange.cpp +100 -0
- data/vendor/ggml/tests/test-backend-ops.cpp +9786 -0
- data/vendor/ggml/tests/test-cont.c +170 -0
- data/vendor/ggml/tests/test-conv-transpose-1d.cpp +691 -0
- data/vendor/ggml/tests/test-conv-transpose.c +248 -0
- data/vendor/ggml/tests/test-conv1d-dw-c1.cpp +243 -0
- data/vendor/ggml/tests/test-conv1d-dw-c2.cpp +243 -0
- data/vendor/ggml/tests/test-conv1d.cpp +289 -0
- data/vendor/ggml/tests/test-conv2d-dw.cpp +153 -0
- data/vendor/ggml/tests/test-conv2d.cpp +391 -0
- data/vendor/ggml/tests/test-customop.c +300 -0
- data/vendor/ggml/tests/test-dup.c +111 -0
- data/vendor/ggml/tests/test-interpolate.cpp +166 -0
- data/vendor/ggml/tests/test-opt.cpp +1003 -0
- data/vendor/ggml/tests/test-pad-reflect-1d.cpp +213 -0
- data/vendor/ggml/tests/test-pool.c +274 -0
- data/vendor/ggml/tests/test-quantize-fns.cpp +196 -0
- data/vendor/ggml/tests/test-quantize-perf.cpp +356 -0
- data/vendor/ggml/tests/test-rel-pos.c +87 -0
- data/vendor/ggml/tests/test-roll.cpp +128 -0
- data/vendor/ggml/tests/test-timestep_embedding.cpp +180 -0
- data/vendor-patches/0001-cuda-buffer_from_ptr.patch +253 -0
- data/vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch +117 -0
- data/vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch +128 -0
- data/vendor-patches/0004-cuda-cpy-strided.patch +61 -0
- data/vendor-patches/0005-concat-backward.patch +36 -0
- data/vendor-patches/0006-getrows-back-large-vocab.patch +69 -0
- data/vendor-patches/0007-gpt2-backward-kernels.patch +438 -0
- data/vendor-patches/0008-mul-mat-backward-mixed-precision.patch +50 -0
- data/vendor-patches/0009-sched-unsupported-node-diagnostic.patch +26 -0
- metadata +2161 -0
data/Makefile
ADDED
|
@@ -0,0 +1,2022 @@
|
|
|
1
|
+
# toy build system.
|
|
2
|
+
#
|
|
3
|
+
# Demo / training Ruby drivers live in demos/ and compile to native
|
|
4
|
+
# binaries via Spinel. GPU acceleration is opt-in:
|
|
5
|
+
#
|
|
6
|
+
# make # demos/train_minimal + demos/train_tinystories
|
|
7
|
+
# make setup-ggml # one-time clone + CPU build of vendored ggml
|
|
8
|
+
# make setup-ggml-cuda # one-time clone + CUDA build (needs CUDA toolkit)
|
|
9
|
+
# make setup-ggml-metal # one-time Metal build (macOS only)
|
|
10
|
+
# make smoke # tinynn FFI smoke test (4x3 ggml matmul demo)
|
|
11
|
+
# make distilgpt2_demo_text # → demos/distilgpt2_demo_text
|
|
12
|
+
#
|
|
13
|
+
# Vendored ggml lives at vendor/ggml/ (gitignored).
|
|
14
|
+
# The CUDA build expects sm_121 (NVIDIA GB10); override with
|
|
15
|
+
# GGML_CUDA_ARCH=NN on the command line.
|
|
16
|
+
# The Metal build uses GGML_METAL_EMBED_LIBRARY=ON so it works with
|
|
17
|
+
# Command Line Tools only (the xcrun metal / metallib compilers ship
|
|
18
|
+
# only with full Xcode). Kernels get JIT-compiled at first device load.
|
|
19
|
+
|
|
20
|
+
SPINEL_DIR ?= $(HOME)/sites/spinel
|
|
21
|
+
SPINEL_BIN ?= $(SPINEL_DIR)/spinel
|
|
22
|
+
|
|
23
|
+
# toy#69 — sig/*.rbs type roots. Every Spinel compile seeds the analyzer
|
|
24
|
+
# with toy's shipped RBS tree (`--rbs sig`): uncalled public methods
|
|
25
|
+
# keep their DECLARED param/return/ivar types instead of widening to
|
|
26
|
+
# poly (the spinel-dev#11/#12 facet family). Seeds are ADVISORY —
|
|
27
|
+
# inference runs on top and widens on observed contradiction (spinel
|
|
28
|
+
# docs/RBS-EXTRACT.md), and the full gate sweep was byte-exact at
|
|
29
|
+
# adoption. Vendored gems' sig roots ride along through the gitignored
|
|
30
|
+
# sig/vendor symlink -> ../vendor/spinel/sig, refreshed by `make
|
|
31
|
+
# vendor-tep` (spinel takes ONE --rbs dir; spinel_rbs_extract walks it
|
|
32
|
+
# recursively and follows symlinks). Set SPINEL_RBS= (empty) to compile
|
|
33
|
+
# without seeds when chasing an analyzer issue.
|
|
34
|
+
SPINEL_RBS ?= --rbs $(CURDIR)/sig
|
|
35
|
+
|
|
36
|
+
# --- DevEx polish knobs (cosmetic, never gate correctness) ----------------
|
|
37
|
+
# QUIET=1 (default) routes known-harmless build chatter through the
|
|
38
|
+
# prep/quietly + prep/progress helpers so the terminal stays readable
|
|
39
|
+
# on a fresh clone. QUIET=0 disables all filtering (useful when chasing
|
|
40
|
+
# a Spinel codegen issue or a cmake misconfig).
|
|
41
|
+
# - prep/quietly silences exact-substring patterns; exit code is
|
|
42
|
+
# ALWAYS the child's, so real errors still propagate.
|
|
43
|
+
# - prep/progress draws a single-line [NN%] bar over cmake/make's
|
|
44
|
+
# own progress markers; full output is tee'd to a .log file in
|
|
45
|
+
# vendor/ggml/. On non-zero exit it dumps the log tail to stderr.
|
|
46
|
+
QUIET ?= 1
|
|
47
|
+
QUIETLY := $(CURDIR)/prep/quietly
|
|
48
|
+
PROGRESS := $(CURDIR)/prep/progress
|
|
49
|
+
ifeq ($(QUIET),0)
|
|
50
|
+
SPINEL = $(SPINEL_BIN) $(SPINEL_RBS)
|
|
51
|
+
else
|
|
52
|
+
SPINEL = $(QUIETLY) \
|
|
53
|
+
'cannot resolve call to' \
|
|
54
|
+
'ignoring duplicate libraries' \
|
|
55
|
+
-- $(SPINEL_BIN) $(SPINEL_RBS)
|
|
56
|
+
endif
|
|
57
|
+
# Sentinel deps so example/demo Spinel-compiled binaries get re-spun
|
|
58
|
+
# when the Spinel compiler itself changes. Without this, stale .o /
|
|
59
|
+
# .a in tinynn/ combined with newer Spinel C codegen can produce
|
|
60
|
+
# misaligned binaries that segfault at init (Tao hit this 2026-05-26
|
|
61
|
+
# after pulling Spinel 2183a92 — the lib archives weren't rebuilt).
|
|
62
|
+
SPINEL_DEPS := $(SPINEL_DIR)/spinel_analyze $(SPINEL_DIR)/spinel_codegen
|
|
63
|
+
|
|
64
|
+
CC ?= cc
|
|
65
|
+
CFLAGS ?= -O2 -fPIC -Wall -Wextra
|
|
66
|
+
ARFLAGS = rcs
|
|
67
|
+
|
|
68
|
+
# macOS Command Line Tools (as of 26.x) keep stale 2023 C++ stub headers
|
|
69
|
+
# at /Library/Developer/CommandLineTools/usr/include/c++/v1 which shadow
|
|
70
|
+
# the real headers in the SDK. Prepend the SDK's libc++ include path so
|
|
71
|
+
# ggml's C++ files can find <mutex>, <array>, etc. No-op on Linux.
|
|
72
|
+
UNAME_S := $(shell uname -s)
|
|
73
|
+
ifeq ($(UNAME_S),Darwin)
|
|
74
|
+
CMAKE_ENV := CPLUS_INCLUDE_PATH=$(shell xcrun --show-sdk-path)/usr/include/c++/v1
|
|
75
|
+
NJOBS := $(shell sysctl -n hw.logicalcpu)
|
|
76
|
+
else
|
|
77
|
+
CMAKE_ENV :=
|
|
78
|
+
NJOBS := $(shell nproc)
|
|
79
|
+
endif
|
|
80
|
+
|
|
81
|
+
# --- vendored ggml ----------------------------------------------------------
|
|
82
|
+
GGML_DIR := vendor/ggml
|
|
83
|
+
GGML_REPO := https://github.com/ggml-org/ggml.git
|
|
84
|
+
# Pinned upstream rev: what the vendor-patches/ set is proven against, and
|
|
85
|
+
# what ships inside the gem (toy#45). Bump deliberately, re-proving patches.
|
|
86
|
+
#
|
|
87
|
+
# MUST be the FULL 40-char SHA (toy#60 item 5): the clone rule does a
|
|
88
|
+
# shallow `git fetch origin $(GGML_REV)`, and GitHub only serves
|
|
89
|
+
# fetch-by-SHA for FULL SHAs (allowReachableSHA1InWant — a short SHA
|
|
90
|
+
# gets "fatal: couldn't find remote ref", which broke every pristine
|
|
91
|
+
# clone during #45). Full-SHA shallow fetch verified cold against
|
|
92
|
+
# github.com/ggml-org/ggml on 2026-06-11.
|
|
93
|
+
GGML_REV := 41e7949d705fd5dfeac33f3804e1af2a136cebd9
|
|
94
|
+
GGML_CUDA_ARCH ?= 121
|
|
95
|
+
CUDA_DIR ?= /usr/local/cuda
|
|
96
|
+
|
|
97
|
+
# --- Tep dependency (spinelgems convention) ---------------------------------
|
|
98
|
+
# Tep is consumed as a RELEASED gem from RubyGems (Gemfile: `gem "tep",
|
|
99
|
+
# "~> 0.11"`; published at https://rubygems.org/gems/tep) via the
|
|
100
|
+
# bundler-spinel / spinelgems convention. Two steps:
|
|
101
|
+
#
|
|
102
|
+
# 1. `bundle lock` (Gemfile → Gemfile.lock)
|
|
103
|
+
# 2. `../spinelgems/exe/spinel-compat vendor` (lock → vendor/spinel/:
|
|
104
|
+
# copies tep lib/ AND
|
|
105
|
+
# natively compiles+wires
|
|
106
|
+
# its C-exts from tep's
|
|
107
|
+
# spinel-ext.json, AND
|
|
108
|
+
# writes vendor/spinel/deps.rb)
|
|
109
|
+
#
|
|
110
|
+
# No step 3: the old `prep/post_vendor_tep.rb` @TEP_*@ substitution is
|
|
111
|
+
# RETIRED — spinel-compat vendor owns C-ext wiring now (tep#98). Spinel
|
|
112
|
+
# entrypoints do `require_relative "vendor/spinel/deps"`.
|
|
113
|
+
#
|
|
114
|
+
# Precheck: ../spinelgems (the vendor tool) must be present. tep itself
|
|
115
|
+
# comes from RubyGems (bundler fetches the released gem), so ../tep is NOT
|
|
116
|
+
# required for the vendor flow.
|
|
117
|
+
#
|
|
118
|
+
# `bundle` env note: use a user-managed Ruby (rbenv / rv / ruby-install
|
|
119
|
+
# with --user-install gems). With system-owned gems (e.g. Debian's
|
|
120
|
+
# /var/lib/gems), `bundle lock` can't write the git cache without sudo —
|
|
121
|
+
# that's an env-setup concern, not a toy bug.
|
|
122
|
+
#
|
|
123
|
+
# SPINEL_EXT_DISABLE=pg: tep's optional pg C-ext currently fails to
|
|
124
|
+
# compile under spinel-compat (its libpq pkg-config cflags aren't wired
|
|
125
|
+
# to the source .o compile — spinelgems#8). toy only uses tep for HTTP
|
|
126
|
+
# serving, not its pg adapter, so we opt out. Drop this once #8 lands.
|
|
127
|
+
vendor-tep:
|
|
128
|
+
@if [ ! -d ../spinelgems ]; then \
|
|
129
|
+
echo ""; \
|
|
130
|
+
echo " ✗ vendor-tep needs the spinelgems sibling checkout (the vendor tool):"; \
|
|
131
|
+
echo " missing: ../spinelgems"; \
|
|
132
|
+
echo ""; \
|
|
133
|
+
echo " From this directory's parent ($$(cd .. && pwd)):"; \
|
|
134
|
+
echo " git clone https://github.com/OriPekelman/spinelgems"; \
|
|
135
|
+
echo " Or symlink an existing checkout:"; \
|
|
136
|
+
echo " ln -s ~/sites/spinelgems ../spinelgems"; \
|
|
137
|
+
echo ""; \
|
|
138
|
+
exit 1; \
|
|
139
|
+
fi
|
|
140
|
+
bundle lock
|
|
141
|
+
SPINEL_EXT_DISABLE=pg SPINEL_DIR=$(HOME)/sites/spinel ../spinelgems/exe/spinel-compat vendor
|
|
142
|
+
@# toy#69 — fold the vendored gems' aggregated sig root (advertised
|
|
143
|
+
@# by vendor/spinel/deps.rb, spinelgems#13) into toy's own --rbs
|
|
144
|
+
@# root via a gitignored symlink: spinel accepts ONE --rbs dir and
|
|
145
|
+
@# spinel_rbs_extract follows symlinks. Removed when no gem ships
|
|
146
|
+
@# sig (a dangling link would warn on every compile).
|
|
147
|
+
@if [ -d vendor/spinel/sig ]; then \
|
|
148
|
+
ln -sfn ../vendor/spinel/sig sig/vendor; \
|
|
149
|
+
echo " sig/vendor -> ../vendor/spinel/sig (rbs ride-along)"; \
|
|
150
|
+
else \
|
|
151
|
+
rm -f sig/vendor; \
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Build vendor/spinel/tep/lib/tep.rb on demand for tep_demo/* targets.
|
|
155
|
+
# Triggers vendor-tep, which gates on sibling checkouts.
|
|
156
|
+
vendor/spinel/tep/lib/tep.rb:
|
|
157
|
+
@$(MAKE) vendor-tep
|
|
158
|
+
|
|
159
|
+
# SpinelKit (toy#44) is vendored by the SAME `vendor-tep` step (both gems are in
|
|
160
|
+
# the Gemfile/lock; `spinel-compat vendor` copies all of them). This rule lets
|
|
161
|
+
# any runner/example list the vendored spinel_kit/git.rb (toy_events' git
|
|
162
|
+
# provenance) as a build prereq and have it produced on demand. Pure Ruby, no
|
|
163
|
+
# C-ext — the vendor copy is just lib/ files.
|
|
164
|
+
vendor/spinel/spinel_kit/lib/spinel_kit/git.rb:
|
|
165
|
+
@$(MAKE) vendor-tep
|
|
166
|
+
|
|
167
|
+
# SpinelKit JSON builder (toy#44) — the run_start/events JSON emitter, vendored
|
|
168
|
+
# by the same `vendor-tep` step. Replaces the retired lib/toy/io/toy_json.rb
|
|
169
|
+
# (Toy::Json → SpinelKit::Json::Builder; byte-identical output).
|
|
170
|
+
vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb:
|
|
171
|
+
@$(MAKE) vendor-tep
|
|
172
|
+
|
|
173
|
+
# --- pure-Spinel drivers ----------------------------------------------------
|
|
174
|
+
# Source lives in demos/. We expose short top-level target names
|
|
175
|
+
# (`make train_minimal`, `make distilgpt2_demo_text`) that build into
|
|
176
|
+
# demos/. Run the resulting binaries from the repo root.
|
|
177
|
+
# `make` with no args = `make help`. Previously it ran `all` which
|
|
178
|
+
# triggered vendor-tep and failed on machines without ../tep checked
|
|
179
|
+
# out (DevEx footgun a fresh-clone-on-Mac hit 2026-05-28). `make help`
|
|
180
|
+
# is always safe + discoverable; `make all` still works for the
|
|
181
|
+
# original behaviour.
|
|
182
|
+
.DEFAULT_GOAL := help
|
|
183
|
+
|
|
184
|
+
all: demos/train demos/smollm2
|
|
185
|
+
|
|
186
|
+
# `make setup` auto-detects the best backend for this host and runs
|
|
187
|
+
# the right setup-ggml-* variant. macOS → metal; nvcc on PATH → cuda;
|
|
188
|
+
# else CPU. Sentinels in setup-ggml-* make this a no-op if already
|
|
189
|
+
# done. Saves new users from picking the wrong setup target.
|
|
190
|
+
.PHONY: setup
|
|
191
|
+
|
|
192
|
+
setup:
|
|
193
|
+
@uname_s="$$(uname -s)"; \
|
|
194
|
+
if [ "$$uname_s" = "Darwin" ]; then \
|
|
195
|
+
echo "[setup] macOS detected → setup-ggml + setup-ggml-metal"; \
|
|
196
|
+
echo " (CPU examples link against vendor/ggml/build/;"; \
|
|
197
|
+
echo " Metal examples link against vendor/ggml/build-metal/.)"; \
|
|
198
|
+
$(MAKE) setup-ggml; \
|
|
199
|
+
$(MAKE) setup-ggml-metal; \
|
|
200
|
+
elif command -v nvcc >/dev/null 2>&1; then \
|
|
201
|
+
echo "[setup] nvcc on PATH → setup-ggml + setup-ggml-cuda"; \
|
|
202
|
+
$(MAKE) setup-ggml; \
|
|
203
|
+
$(MAKE) setup-ggml-cuda; \
|
|
204
|
+
else \
|
|
205
|
+
echo "[setup] CPU only → setup-ggml"; \
|
|
206
|
+
$(MAKE) setup-ggml; \
|
|
207
|
+
fi; \
|
|
208
|
+
echo ""; \
|
|
209
|
+
echo "Done. Next: run 'make help' for the entry points."
|
|
210
|
+
|
|
211
|
+
# --- help / time-to-joy entry points --------------------------------------
|
|
212
|
+
# `make help` is the discoverable index for someone who just cloned.
|
|
213
|
+
# Keep it short — pointers to the heavier docs (examples/README.md,
|
|
214
|
+
# tep_demo/README.md, docs/INDEX.md) for the details.
|
|
215
|
+
|
|
216
|
+
.PHONY: help
|
|
217
|
+
|
|
218
|
+
help:
|
|
219
|
+
@echo ""
|
|
220
|
+
@echo " toy — a transformer LM in Ruby, Spinel-compiled."
|
|
221
|
+
@echo " Full docs: README.md, examples/README.md, docs/INDEX.md."
|
|
222
|
+
@echo ""
|
|
223
|
+
@echo " NEW HERE? Scaffold a project + discover models with the toy CLI:"
|
|
224
|
+
@echo " toy new <dir> scaffold a conventional toy project tree"
|
|
225
|
+
@echo " toy install build/verify the CPU backend"
|
|
226
|
+
@echo " toy list find GGUFs in caches + project data/"
|
|
227
|
+
@echo " toy fetch <repo> <file> download a GGUF from HuggingFace"
|
|
228
|
+
@echo ""
|
|
229
|
+
@echo " ONE-TIME SETUP"
|
|
230
|
+
@echo " make setup auto-detect platform; pick CUDA/Metal/CPU"
|
|
231
|
+
@echo " make setup-ggml force CPU build (~2 min)"
|
|
232
|
+
@echo " make setup-ggml-cuda force CUDA backend"
|
|
233
|
+
@echo " make setup-ggml-metal force Metal backend (macOS)"
|
|
234
|
+
@echo ""
|
|
235
|
+
@if [ "$$(uname -s)" = "Darwin" ]; then \
|
|
236
|
+
echo " ⚡ macOS detected — for GPU acceleration use the _metal example"; \
|
|
237
|
+
echo " variants below (they link against libggml-metal + KV kernels)."; \
|
|
238
|
+
echo " The plain CPU runner (\`toy infer\`) still works but is CPU-only."; \
|
|
239
|
+
echo ""; \
|
|
240
|
+
fi
|
|
241
|
+
@echo " GETTING STARTED — examples/"
|
|
242
|
+
@echo " toy list list GGUFs cached locally / in HF / Ollama / LM Studio"
|
|
243
|
+
@echo " toy infer <model.gguf> load a GGUF, generate 16 tokens (CPU)"
|
|
244
|
+
@if [ "$$(uname -s)" = "Darwin" ]; then \
|
|
245
|
+
echo " make example_inference_metal same, Metal-accelerated (macOS) — use this on Mac"; \
|
|
246
|
+
fi
|
|
247
|
+
@echo " Most tasks are the CLI now: toy train|infer|eval|serve (see 'toy --help')."
|
|
248
|
+
@echo " Curated examples (narrated; examples/README.md is the tour):"
|
|
249
|
+
@echo " make example_01 train a tiny Llama from scratch (start here; ~2 s)"
|
|
250
|
+
@echo " make example_02 warm-start fine-tune from a real GGUF's embeddings"
|
|
251
|
+
@echo " make example_03 LoRA adapters over a frozen mmap'd base"
|
|
252
|
+
@echo " make example_04 load a GGUF, KV decode, print text"
|
|
253
|
+
@echo " make example_05 per-token logprobs (the eval building block)"
|
|
254
|
+
@echo " make example_06 compare your runs/ (CRuby, no build)"
|
|
255
|
+
@echo " make example_07 ViT-Tiny image classifier (same recipe shape)"
|
|
256
|
+
@echo " (Superseded tutorials live on in examples/legacy/ — they still build.)"
|
|
257
|
+
@echo ""
|
|
258
|
+
@echo " HTTP SERVING — tep_demo/"
|
|
259
|
+
@echo " make tep_demo/hello minimal Tep HTTP smoke"
|
|
260
|
+
@if [ ! -f vendor/spinel/tep/lib/tep.rb ]; then \
|
|
261
|
+
printf " (prereq: run %s first — needs ../tep + ../spinelgems checkouts)\n" "'make vendor-tep'"; \
|
|
262
|
+
fi
|
|
263
|
+
@echo ""
|
|
264
|
+
@echo " BENCH + CHECKS"
|
|
265
|
+
@echo " make bench routine perf regression gate (vs bench/baselines.csv)"
|
|
266
|
+
@echo " make bench-vs-pytorch same workloads, gated vs PyTorch (ratio, not absolute ms)"
|
|
267
|
+
@echo " make coverage regenerate the ggml-op coverage matrix"
|
|
268
|
+
@echo " make coverage-check CI form (no diff means in sync)"
|
|
269
|
+
@echo " make test all tinynn FFI smoke binaries"
|
|
270
|
+
@echo ""
|
|
271
|
+
@echo " COMMON MAKE FLAGS"
|
|
272
|
+
@echo " DEVICE=cuda on example_train_from_scratch / example_finetune_cuda"
|
|
273
|
+
@echo " GGUF=path/to/model.gguf on example_finetune (toy infer takes a positional path)"
|
|
274
|
+
@echo ""
|
|
275
|
+
|
|
276
|
+
# --- examples/ getting-started entry points --------------------------------
|
|
277
|
+
# Compact, one-file demos covering the main use cases. See
|
|
278
|
+
# examples/README.md.
|
|
279
|
+
# `toy infer` COMPUTE runner — lib-side Spinel binary the CLI shells to.
|
|
280
|
+
# Lifted from the retired examples/01_inference.rb. Target name MUST equal
|
|
281
|
+
# the output path string: lib/toy/core/cli/infer.rb uses RUNNER_TARGET both
|
|
282
|
+
# as the make target (ensure_built) AND the joined binary path. CPU-only;
|
|
283
|
+
# NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
|
|
284
|
+
libexec:
|
|
285
|
+
mkdir -p libexec
|
|
286
|
+
libexec/toy-infer: lib/toy/run/infer.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a | libexec
|
|
287
|
+
$(SPINEL) $< -o $@
|
|
288
|
+
toy-infer: libexec/toy-infer
|
|
289
|
+
|
|
290
|
+
# Diagnostic sibling of toy-infer: enables the cache trace and dumps per-tap
|
|
291
|
+
# min/max/|mean|/nan for every layer (used to localize ggml#1506 — the K-quant
|
|
292
|
+
# MoE attention head_nbytes collapse). See docs/notes/mul_mat_id_quants.md.
|
|
293
|
+
libexec/toy-infer-trace: lib/toy/run/infer_trace.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a | libexec
|
|
294
|
+
$(SPINEL) $< -o $@
|
|
295
|
+
toy-infer-trace: libexec/toy-infer-trace
|
|
296
|
+
|
|
297
|
+
# P4 — `toy eval` COMPUTE runner (CRuby→runner COMPUTE BRIDGE, same shape as
|
|
298
|
+
# toy-infer). Spinel source lib/toy/run/eval.rb; the binary path EQUALS the
|
|
299
|
+
# make target so ToyRoot.ensure_built("libexec/toy-eval") both builds and
|
|
300
|
+
# locates it. Deps = infer's deps + lib/toy/dev/toy_logprobs.rb (a transitive require
|
|
301
|
+
# of transformer_lm; listed explicitly so a touch of it rebuilds the runner).
|
|
302
|
+
# CPU-only; NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
|
|
303
|
+
libexec/toy-eval: lib/toy/run/eval.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a | libexec
|
|
304
|
+
$(SPINEL) $< -o $@
|
|
305
|
+
toy-eval: libexec/toy-eval
|
|
306
|
+
|
|
307
|
+
# LMC (Linear Mode Connectivity) eval runner — `toy eval lmc --ckpt A --other B`.
|
|
308
|
+
# Interpolates two checkpoints θ_α = (1-α)·θ_A + α·θ_B and evals CE per α.
|
|
309
|
+
# Spinel source lib/toy/run/eval_lmc.rb; the binary path EQUALS the make target
|
|
310
|
+
# so ToyRoot.ensure_built("libexec/toy-eval-lmc") both builds and locates it.
|
|
311
|
+
# Deps mirror example_lmc (Makefile:479) NOT toy-eval; order-only | libexec (no
|
|
312
|
+
# $(SPINEL_DEPS)) like the CPU toy-eval runner. CPU-only; NOT in MIRRORABLE (see
|
|
313
|
+
# prep/gen_cuda_mirror.rb); a cuda LMC twin is a later slice.
|
|
314
|
+
libexec/toy-eval-lmc: lib/toy/run/eval_lmc.rb lib/toy/llm/adamw.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/train/toy_drift_grad.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
|
|
315
|
+
$(SPINEL) $< -o $@
|
|
316
|
+
toy-eval-lmc: libexec/toy-eval-lmc
|
|
317
|
+
|
|
318
|
+
# CUDA siblings of toy-infer / toy-eval — selected by the CRuby CLI shell when
|
|
319
|
+
# invoked with `--device cuda` (lib/toy/core/cli/{infer,eval}.rb derive the
|
|
320
|
+
# target). PER-DEVICE binaries (not one polymorphic runner): a single source
|
|
321
|
+
# requiring BOTH ToyLM and ToyLMCuda would force the CUDA archive onto the CPU
|
|
322
|
+
# binary's link line, changing it. Keeping separate binaries leaves
|
|
323
|
+
# libexec/toy-infer / toy-eval link lines BYTE-UNCHANGED. Source is the
|
|
324
|
+
# hand-written lib/toy/run/{infer,eval}_cuda.rb (ToyLMCuda ctor arity 1 →
|
|
325
|
+
# NOT mechanically mirrorable → ABSENT from MIRRORABLE, like the CPU runners).
|
|
326
|
+
# Force-link recipe matches every other cuda target (-Wl,-u,tnn_cuda_force_link).
|
|
327
|
+
libexec/toy-infer-cuda: lib/toy/run/infer_cuda.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
|
|
328
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
329
|
+
toy-infer-cuda: libexec/toy-infer-cuda
|
|
330
|
+
|
|
331
|
+
libexec/toy-eval-cuda: lib/toy/run/eval_cuda.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
|
|
332
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
333
|
+
toy-eval-cuda: libexec/toy-eval-cuda
|
|
334
|
+
|
|
335
|
+
# Metal twins of the infer/eval cuda runners (macOS ONLY). Same single-type
|
|
336
|
+
# binary discipline (landmine #16): TinyNNMetal is the only compute module.
|
|
337
|
+
# Source is the hand-written lib/toy/run/{infer,eval}_metal.rb (ToyLMMetal ctor
|
|
338
|
+
# arity 1 -> NOT mechanically mirrorable -> ABSENT from MIRRORABLE, like the
|
|
339
|
+
# cuda/CPU runners). The macOS guard MUST come first so Linux/gx10 never touches
|
|
340
|
+
# the Apple frameworks; the metal --cc recipe links Foundation/Metal/MetalKit
|
|
341
|
+
# with the leading-underscore force-link symbol (_tnn_metal_force_link, macOS
|
|
342
|
+
# symbol convention) vs cuda's tnn_cuda_force_link. libtinynn_ggml.a (CPU
|
|
343
|
+
# archive) stays in deps for the base ggml symbols. gx10 RUNTIME-UNVERIFIED.
|
|
344
|
+
libexec/toy-infer-metal: lib/toy/run/infer_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
|
|
345
|
+
ifneq ($(UNAME_S),Darwin)
|
|
346
|
+
@echo "toy-infer-metal: macOS-only"; exit 1
|
|
347
|
+
endif
|
|
348
|
+
$(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
|
|
349
|
+
toy-infer-metal: libexec/toy-infer-metal
|
|
350
|
+
|
|
351
|
+
libexec/toy-eval-metal: lib/toy/run/eval_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
|
|
352
|
+
ifneq ($(UNAME_S),Darwin)
|
|
353
|
+
@echo "toy-eval-metal: macOS-only"; exit 1
|
|
354
|
+
endif
|
|
355
|
+
$(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
|
|
356
|
+
toy-eval-metal: libexec/toy-eval-metal
|
|
357
|
+
|
|
358
|
+
# Convenience: run both functional gates with the CUDA parity arm enabled.
|
|
359
|
+
.PHONY: gate-cuda
|
|
360
|
+
gate-cuda:
|
|
361
|
+
TOY_GATE_CUDA=1 ruby prep/infer_gate.rb
|
|
362
|
+
TOY_GATE_CUDA=1 ruby prep/eval_gate.rb
|
|
363
|
+
|
|
364
|
+
# GPT-2 minimal inline training proof (toy#12 part-b foundation). Builds a
|
|
365
|
+
# self-contained forward+CE+backward+AdamW loop over the GPT-2-distinctive
|
|
366
|
+
# structure (wte+wpe learned embeddings, composite LayerNorm, GELU FFN, tied
|
|
367
|
+
# output) — exercising the two vendored backward kernels (ggml_gelu_back,
|
|
368
|
+
# ggml_norm_back; vendor-patches/0007) end-to-end. Attention is the next
|
|
369
|
+
# increment; this proves the kernels train. Asserts CE decreases (exit 1 if
|
|
370
|
+
# not). CPU-only. "record-from-inline-first" reference for prep/gpt2_train_gate.
|
|
371
|
+
libexec/gpt2-train-min: prep/gpt2_train_min.rb lib/toy.rb lib/toy/ffi/tinynn.rb \
|
|
372
|
+
lib/toy/models/transformer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
|
|
373
|
+
$(SPINEL) $< -o $@
|
|
374
|
+
gpt2-train-min: libexec/gpt2-train-min
|
|
375
|
+
.PHONY: gpt2-train-min
|
|
376
|
+
gate-gpt2-min: libexec/gpt2-train-min
|
|
377
|
+
./libexec/gpt2-train-min
|
|
378
|
+
.PHONY: gate-gpt2-min
|
|
379
|
+
# Byte-exact GPT-2 train gate: assert the CE loss curve is byte-identical to
|
|
380
|
+
# prep/fixtures/gpt2_train_baseline.txt (record-from-inline reference for the
|
|
381
|
+
# eventual `toy train --arch gpt2`). Re-record with `ruby prep/gpt2_train_gate.rb --record`.
|
|
382
|
+
gate-gpt2: libexec/gpt2-train-min
|
|
383
|
+
ruby prep/gpt2_train_gate.rb
|
|
384
|
+
.PHONY: gate-gpt2
|
|
385
|
+
# Byte-exact gate for the GPT-2 ENGINE runner (libexec/toy-train-gpt2 →
|
|
386
|
+
# Toy::LLM::Engine::GPT2SeqEngine, the `toy train --arch gpt2` compute). Asserts
|
|
387
|
+
# the from-scratch loss curve is byte-identical + decreasing. Re-record with
|
|
388
|
+
# `ruby prep/gpt2_train_engine_gate.rb --record`.
|
|
389
|
+
gate-gpt2-train: libexec/toy-train-gpt2
|
|
390
|
+
ruby prep/gpt2_train_engine_gate.rb
|
|
391
|
+
.PHONY: gate-gpt2-train
|
|
392
|
+
# CUDA arm: `toy train --arch gpt2 --device cuda`. Forward + most backward on
|
|
393
|
+
# CUDA; GELU/LayerNorm backward fall back to CPU (no GPU kernel). CUDA-vs-CUDA
|
|
394
|
+
# byte-exact (empirical on GB10) + decreasing. Re-record with
|
|
395
|
+
# `ruby prep/gpt2_train_cuda_gate.rb --record`.
|
|
396
|
+
gate-gpt2-train-cuda: libexec/toy-train-gpt2-cuda
|
|
397
|
+
ruby prep/gpt2_train_cuda_gate.rb
|
|
398
|
+
.PHONY: gate-gpt2-train-cuda
|
|
399
|
+
|
|
400
|
+
# Deterministic train→infer ROUND-TRIP gate: train from-scratch --steps 5
|
|
401
|
+
# --seed 0, then infer a fixed numeric prompt greedily from the written
|
|
402
|
+
# checkpoint and assert the generated ids byte-equal the recorded fixture.
|
|
403
|
+
# Proves the from-scratch checkpoint is a standard fused-llama GGUF that
|
|
404
|
+
# `toy infer` loads. CPU-only (no CUDA arm); bin/toy auto-builds the runners.
|
|
405
|
+
.PHONY: gate-ckpt-roundtrip
|
|
406
|
+
gate-ckpt-roundtrip:
|
|
407
|
+
ruby prep/ckpt_roundtrip_gate.rb
|
|
408
|
+
|
|
409
|
+
# Deterministic LMC gate: `toy eval lmc` interpolates two PINNED from-scratch
|
|
410
|
+
# checkpoints and evals CE per α. The curve is ggml-internal CE (no Ruby libm)
|
|
411
|
+
# → byte-exact everywhere. Run twice (determinism) and assert byte-identical to
|
|
412
|
+
# prep/fixtures/lmc_baseline.txt. CPU-only (no CUDA arm this slice).
|
|
413
|
+
.PHONY: gate-lmc
|
|
414
|
+
gate-lmc:
|
|
415
|
+
ruby prep/lmc_gate.rb
|
|
416
|
+
|
|
417
|
+
# The 6th realize-gate (F3 full fine-tune) — past P2's accepted ceiling.
|
|
418
|
+
# Records the engine's full_finetune CE curve and re-verifies it byte-for-byte
|
|
419
|
+
# so the per-block alloc lift onto TransformerBlock is provably behavior-
|
|
420
|
+
# preserving. MODEL-GATED: needs data/smollm2-135m-native.gguf (gitignored dev
|
|
421
|
+
# artifact); SKIPs loudly when absent. Train losses are ggml-internal → byte-
|
|
422
|
+
# exact. Re-record with `ruby prep/full_finetune_gate.rb --record`.
|
|
423
|
+
.PHONY: gate-full-finetune
|
|
424
|
+
gate-full-finetune:
|
|
425
|
+
ruby prep/full_finetune_gate.rb
|
|
426
|
+
|
|
427
|
+
# Mixed-precision training gate (GH#9, f16, CPU). Drives the from-scratch
|
|
428
|
+
# example at WEIGHT_DTYPE=1 vs =0 and asserts: f16 runs to completion (needs the
|
|
429
|
+
# 0008 mul_mat-backward-mixed-precision ggml patch — without it backward aborts),
|
|
430
|
+
# run_start.model.weight_type surfaces the dtype, and the f16 final loss lands
|
|
431
|
+
# within tolerance of the f32 baseline. TOLERANCE arm (dtype changes numerics),
|
|
432
|
+
# not byte-exact. bf16 is the CUDA/GB10 follow-up. Builds the example itself.
|
|
433
|
+
.PHONY: gate-mixed-precision
|
|
434
|
+
gate-mixed-precision:
|
|
435
|
+
ruby prep/mixed_precision_gate.rb
|
|
436
|
+
|
|
437
|
+
# toy#64 item 6 — Toy::RunLog unit gate (CRuby-only, no Spinel build).
|
|
438
|
+
# Self-contained synthetic fixture + integration sniff of repo runs/.
|
|
439
|
+
.PHONY: gate-run-log
|
|
440
|
+
gate-run-log:
|
|
441
|
+
ruby prep/run_log_gate.rb
|
|
442
|
+
|
|
443
|
+
# toy#71 — the MRI dev-run gate, BOTH arms (plain `ruby`, NO Spinel
|
|
444
|
+
# build, NO SPINEL_DIR). Stub leg (Stage A): `require "toy/mri"` loads
|
|
445
|
+
# the full compute surface under CRuby, the pure-Ruby teaching path
|
|
446
|
+
# genuinely trains, and crossing the native boundary raises the NAMED
|
|
447
|
+
# Toy::MRI::NativeCallError. Native leg (Stage B, the CRuby oracle;
|
|
448
|
+
# needs `make libtinynn_shared`, loud SKIP otherwise — MRI_GATE_STRICT=1
|
|
449
|
+
# turns the skip into a failure): MRI+Fiddle reproduces the recorded
|
|
450
|
+
# Spinel from-scratch gate curve BIT-EXACT (train_baseline.txt) and the
|
|
451
|
+
# smollm2-135m greedy decode ids byte-equal infer_baseline.txt.
|
|
452
|
+
.PHONY: gate-mri
|
|
453
|
+
gate-mri:
|
|
454
|
+
ruby prep/mri_gate.rb
|
|
455
|
+
|
|
456
|
+
# toy#60 item 4 — the COLD-START consumer gate: `toy new` scaffold →
|
|
457
|
+
# hello.rb compiles + runs (default ENV, then D_MODEL override without
|
|
458
|
+
# recompiling) → `toy train` prints losses + writes runs/<id>/ → the
|
|
459
|
+
# missing-corpus guard fails loud; PLUS the `toy new --lib` leg
|
|
460
|
+
# (bundle lock → spinel-compat vendor → ./build.sh cpu → run; skips
|
|
461
|
+
# loudly when bundler/spinel-compat are absent). Structural, not
|
|
462
|
+
# byte-exact. ~4 min (the lib leg builds ggml inside the tmp project).
|
|
463
|
+
.PHONY: gate-consumer
|
|
464
|
+
gate-consumer:
|
|
465
|
+
ruby prep/consumer_gate.rb
|
|
466
|
+
|
|
467
|
+
# toy#42 full-API require gate. Builds prep/smokes/smoke_compute_surface (which
|
|
468
|
+
# requires ONLY lib/toy/compute.rb) and asserts it realizes a live engine —
|
|
469
|
+
# proving the one-require compute surface co-compiles + works for a library
|
|
470
|
+
# consumer. Builds the smoke itself.
|
|
471
|
+
.PHONY: gate-compute-surface
|
|
472
|
+
gate-compute-surface: prep/smokes/smoke_compute_surface
|
|
473
|
+
@out="$$(./prep/smokes/smoke_compute_surface 2>&1)"; \
|
|
474
|
+
echo "$$out" | tail -2; \
|
|
475
|
+
echo "$$out" | grep -q "compute-surface: ok" \
|
|
476
|
+
&& echo "GATE PASS [compute-surface]: lib/toy/compute.rb one-require surface is live" \
|
|
477
|
+
|| { echo "GATE FAIL [compute-surface]"; exit 1; }
|
|
478
|
+
|
|
479
|
+
# toy#64 item 8 — CUDA twin of gate-compute-surface: build + run the
|
|
480
|
+
# consumer-ish CUDA entry smoke on the GPU (GB10 sm_121).
|
|
481
|
+
.PHONY: gate-compute-surface-cuda
|
|
482
|
+
gate-compute-surface-cuda: prep/smokes/smoke_compute_surface_cuda
|
|
483
|
+
@out="$$(./prep/smokes/smoke_compute_surface_cuda 2>&1)"; \
|
|
484
|
+
echo "$$out" | tail -2; \
|
|
485
|
+
echo "$$out" | grep -q "compute-surface-cuda: ok" \
|
|
486
|
+
&& echo "GATE PASS [compute-surface-cuda]: lib/toy/compute_cuda.rb device entry is live" \
|
|
487
|
+
|| { echo "GATE FAIL [compute-surface-cuda]"; exit 1; }
|
|
488
|
+
|
|
489
|
+
# K-quant MoE attention regression gate (the bug long misfiled as ggml#1506):
|
|
490
|
+
# head_nbytes returned 0 for K-quant attention weights → per-head mmap stride
|
|
491
|
+
# collapsed every head onto head 0 → degenerate repeating decode on OLMoE
|
|
492
|
+
# Q4_K_M. Structural assertion (distinct-count + max single-token run), not
|
|
493
|
+
# byte-exact, so it survives benign K-quant drift. MODEL-GATED: needs the ~4 GB
|
|
494
|
+
# data/OLMoE-1b-7b-0924-Instruct-Q4_K_M.gguf (gitignored); SKIPs loudly when
|
|
495
|
+
# absent. bin/toy auto-builds the infer runner. See docs/notes/mul_mat_id_quants.md.
|
|
496
|
+
.PHONY: gate-moe-kquant
|
|
497
|
+
gate-moe-kquant:
|
|
498
|
+
ruby prep/moe_kquant_gate.rb
|
|
499
|
+
|
|
500
|
+
# Silent poly-degradation gate (#32): compiles the canonical compute entrypoints
|
|
501
|
+
# with spinel and fails if a NEW `cannot resolve … on poly … (emitting 0)` warning
|
|
502
|
+
# appears vs the frozen baseline — i.e. a refactor just silently compiled a literal
|
|
503
|
+
# 0 into a numerical path (compiled != correct). Re-record the known-benign set with
|
|
504
|
+
# `ruby prep/poly_degrade_gate.rb --record`. See feedback_spinel_type_inference_landmines.
|
|
505
|
+
.PHONY: gate-poly-degrade
|
|
506
|
+
gate-poly-degrade:
|
|
507
|
+
ruby prep/poly_degrade_gate.rb
|
|
508
|
+
|
|
509
|
+
# CUDA from-scratch TRAINING gate (STRONG arm, no epsilon): train
|
|
510
|
+
# from-scratch --device cuda --steps 5 --seed 0, assert the "step N: loss="
|
|
511
|
+
# curve byte-equals prep/fixtures/train_cuda_baseline.txt, loss decreases,
|
|
512
|
+
# and the CUDA checkpoint round-trips through CPU `toy infer`. Determinism is
|
|
513
|
+
# EMPIRICAL on this GB10 — see the fixture header. bin/toy auto-builds.
|
|
514
|
+
.PHONY: gate-train-cuda
|
|
515
|
+
gate-train-cuda:
|
|
516
|
+
ruby prep/train_cuda_gate.rb
|
|
517
|
+
|
|
518
|
+
# Metal RUNTIME parity gate (macOS ONLY). Builds the three metal runners then
|
|
519
|
+
# runs prep/metal_gate.rb: infer (cpu-vs-metal byte-equal ids), eval (top-k id
|
|
520
|
+
# ORDER equality), train-from-scratch (run-twice byte-determinism OR a Mac-
|
|
521
|
+
# pinned baseline, loss-decrease, ckpt round-trip vs the SHARED fixture,
|
|
522
|
+
# events.jsonl run_start/run_end). On Linux/gx10 this SKIPS GREEN (exit 0) so
|
|
523
|
+
# umbrella `make gate-*` runs do not false-fail — Metal cannot build or run
|
|
524
|
+
# here. THIS is the gate that actually validates metal numerics; run it on the
|
|
525
|
+
# Mac. (The metal BUILD targets exit 1 on Linux — a gate that can't run skips
|
|
526
|
+
# green, a build target that can't build errors red.)
|
|
527
|
+
.PHONY: gate-metal
|
|
528
|
+
gate-metal:
|
|
529
|
+
ifneq ($(UNAME_S),Darwin)
|
|
530
|
+
@echo "gate-metal: Metal is macOS-only (uname -s = $(UNAME_S)) — skipping"; exit 0
|
|
531
|
+
else
|
|
532
|
+
$(MAKE) libexec/toy-infer-metal libexec/toy-eval-metal libexec/toy-train-metal
|
|
533
|
+
ruby prep/metal_gate.rb
|
|
534
|
+
endif
|
|
535
|
+
|
|
536
|
+
# STRUCTURAL serving-telemetry gate: boot libexec/toy-serve with TAO_RUN_DIR
|
|
537
|
+
# set, POST /v1/completions, SIGTERM, then assert runs/<id>/events.jsonl carries
|
|
538
|
+
# the toy/v1 run_start(serve) + eval/serve/request + run_end stream (Tao #6).
|
|
539
|
+
# Honest STRUCTURAL (NOT byte-identical): t/latency_us/request_id are
|
|
540
|
+
# wall-clock/counter and cannot be byte-stable. Self-builds the runner.
|
|
541
|
+
.PHONY: gate-serve-events
|
|
542
|
+
gate-serve-events:
|
|
543
|
+
ruby prep/serve_events_gate.rb
|
|
544
|
+
|
|
545
|
+
# Umbrella: the byte-baseline serve gate THEN the structural events gate.
|
|
546
|
+
.PHONY: gate-serve
|
|
547
|
+
gate-serve:
|
|
548
|
+
ruby prep/serve_gate.rb
|
|
549
|
+
ruby prep/serve_events_gate.rb
|
|
550
|
+
|
|
551
|
+
# P4 — from-scratch TRAINING compute runner (CRuby→runner COMPUTE BRIDGE,
|
|
552
|
+
# same shape as toy-infer). Spinel source lib/toy/run/train.rb; the binary
|
|
553
|
+
# path EQUALS the make target so ToyRoot.ensure_built("libexec/toy-train")
|
|
554
|
+
# both builds and locates it. Deps list every transitive require the runner
|
|
555
|
+
# pulls (the recipe → llama_seq_engine → transformer + toy + smollm2 +
|
|
556
|
+
# tinynn + the L1-L3 primitives/blocks/archs; plus gguf_writer + drift_grad
|
|
557
|
+
# for the checkpoint). CPU-only; NOT in MIRRORABLE (see prep/gen_cuda_mirror.rb).
|
|
558
|
+
libexec/toy-train: lib/toy/run/train.rb lib/toy/dev/toy_describe_flow.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
|
|
559
|
+
vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb \
|
|
560
|
+
lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb \
|
|
561
|
+
lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb \
|
|
562
|
+
lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb \
|
|
563
|
+
lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
|
|
564
|
+
lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
|
|
565
|
+
lib/toy/llm/primitives/rms_norm.rb lib/toy/llm/primitives/rope.rb \
|
|
566
|
+
lib/toy/llm/primitives/swiglu.rb lib/toy/llm/primitives/gqa.rb \
|
|
567
|
+
lib/toy/llm/blocks/transformer_block.rb lib/toy/llm/archs/llama_arch.rb \
|
|
568
|
+
lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
|
|
569
|
+
$(SPINEL) $< -o $@
|
|
570
|
+
toy-train: libexec/toy-train
|
|
571
|
+
|
|
572
|
+
# `toy train lora` DEDICATED runner. Separate binary from toy-train: the
|
|
573
|
+
# LoRA realize_for_mmap path cannot share a Spinel compilation unit with the
|
|
574
|
+
# random-init path (cfg type-merge miscompile; see lib/toy/run/train_lora.rb
|
|
575
|
+
# header). CPU-only; NOT in MIRRORABLE.
|
|
576
|
+
libexec/toy-train-lora: lib/toy/run/train_lora.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
|
|
577
|
+
lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb \
|
|
578
|
+
lib/toy/llm/adamw.rb \
|
|
579
|
+
lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
|
|
580
|
+
lib/toy/llm/primitives/rms_norm.rb lib/toy/llm/primitives/rope.rb \
|
|
581
|
+
lib/toy/llm/primitives/swiglu.rb lib/toy/llm/primitives/gqa.rb \
|
|
582
|
+
lib/toy/llm/blocks/transformer_block.rb lib/toy/llm/archs/llama_arch.rb \
|
|
583
|
+
lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
|
|
584
|
+
$(SPINEL) $< -o $@
|
|
585
|
+
toy-train-lora: libexec/toy-train-lora
|
|
586
|
+
|
|
587
|
+
# `toy train from-scratch --arch gpt2` DEDICATED runner. Separate binary from
|
|
588
|
+
# toy-train (landmine #16: the GPT-2 realize path can't share a Spinel unit with
|
|
589
|
+
# the llama random-init path). Self-contained GPT2SeqEngine (no llama engine /
|
|
590
|
+
# primitives dep), so it also can't churn the llama gates. CPU-only this slice.
|
|
591
|
+
libexec/toy-train-gpt2: lib/toy/run/train_gpt2.rb lib/toy.rb \
|
|
592
|
+
lib/toy/llm/engine/gpt2_seq_engine.rb lib/toy/llm/labels.rb lib/toy/llm/adamw.rb \
|
|
593
|
+
lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a | libexec
|
|
594
|
+
$(SPINEL) $< -o $@
|
|
595
|
+
toy-train-gpt2: libexec/toy-train-gpt2
|
|
596
|
+
.PHONY: toy-train-gpt2
|
|
597
|
+
|
|
598
|
+
# CUDA twin of toy-train-gpt2 (`--arch gpt2 --device cuda`). SEPARATE single-type
|
|
599
|
+
# binary (landmine #16): links the generated CUDA engine mirror + the CUDA TinyNN
|
|
600
|
+
# shim; the GELU/LayerNorm backward ops fall back to the CPU backend via the
|
|
601
|
+
# scheduler (no CUDA kernel). lib/toy/ffi/tinynn.rb + transformer.rb stay in deps (Mat /
|
|
602
|
+
# CPU-TinyNN seam). NOT in MIRRORABLE (the engine mirror IS; the runner is hand-written).
|
|
603
|
+
libexec/toy-train-gpt2-cuda: lib/toy/run/train_gpt2_cuda.rb lib/toy.rb \
|
|
604
|
+
lib/toy/llm/engine/gpt2_seq_engine_cuda.rb lib/toy/models/transformer.rb \
|
|
605
|
+
lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
|
|
606
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
607
|
+
toy-train-gpt2-cuda: libexec/toy-train-gpt2-cuda
|
|
608
|
+
.PHONY: toy-train-gpt2-cuda
|
|
609
|
+
|
|
610
|
+
# Metal twin (`--arch gpt2 --device metal`), macOS ONLY. Same structure; links
|
|
611
|
+
# the generated Metal engine mirror + the Metal TinyNN shim + Apple frameworks.
|
|
612
|
+
# gx10 RUNTIME-UNVERIFIED (codegen + structural parity here; runtime-gate on Mac).
|
|
613
|
+
libexec/toy-train-gpt2-metal: lib/toy/run/train_gpt2_metal.rb lib/toy.rb \
|
|
614
|
+
lib/toy/llm/engine/gpt2_seq_engine_metal.rb lib/toy/models/transformer.rb \
|
|
615
|
+
lib/toy/ffi/tinynn_metal.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
|
|
616
|
+
ifneq ($(UNAME_S),Darwin)
|
|
617
|
+
@echo "toy-train-gpt2-metal: macOS-only"; exit 1
|
|
618
|
+
endif
|
|
619
|
+
$(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
|
|
620
|
+
toy-train-gpt2-metal: libexec/toy-train-gpt2-metal
|
|
621
|
+
.PHONY: toy-train-gpt2-metal
|
|
622
|
+
|
|
623
|
+
# P4/vit — ViT-Tiny from-scratch CPU TRAINING runner. SEPARATE binary
|
|
624
|
+
# (landmine #16): ViTTinyConfig must NOT share a Spinel compilation unit
|
|
625
|
+
# with SmolLM2Config. Source lib/toy/run/train_vit.rb; binary path EQUALS
|
|
626
|
+
# the make target. Reads STEPS/SEED/IMG_DIR/TAO_RUN_DIR/TOY_RUN_ID from ENV;
|
|
627
|
+
# trains random-init on the COMMITTED data/vit_smoke corpus. NO toy_gguf_writer
|
|
628
|
+
# dep (cfg.vocab/d_ff poly-collide with ViTTinyConfig — #169 checkpoint
|
|
629
|
+
# follow-up). CPU-only; absent from MIRRORABLE (no CUDA/Metal twin this slice).
|
|
630
|
+
libexec/toy-train-vit: lib/toy/run/train_vit.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/vit_tiny.rb \
|
|
631
|
+
lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb \
|
|
632
|
+
lib/toy/io/toy_image_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/train/toy_drift_grad.rb \
|
|
633
|
+
lib/toy/llm/adamw.rb \
|
|
634
|
+
lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
|
|
635
|
+
$(SPINEL) $< -o $@
|
|
636
|
+
toy-train-vit: libexec/toy-train-vit
|
|
637
|
+
|
|
638
|
+
# P4/GPU — from-scratch CUDA TRAINING runner. CUDA twin of libexec/toy-train,
|
|
639
|
+
# from-scratch ONLY (warm_start dropped). SINGLE-TYPE binary (landmine #16):
|
|
640
|
+
# TinyNNCuda is the compute path; lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in
|
|
641
|
+
# deps because transformer.rb requires tinynn -> defines CPU TinyNN for the
|
|
642
|
+
# checkpoint write/fuse/drift seam (dropping them breaks the writer). Links
|
|
643
|
+
# the CUDA ggml backend via -Wl,-u,tnn_cuda_force_link (every cuda target).
|
|
644
|
+
# CPU-only; NOT in MIRRORABLE (hand-written, see prep/gen_cuda_mirror.rb).
|
|
645
|
+
libexec/toy-train-cuda: lib/toy/run/train_cuda.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
|
|
646
|
+
lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb \
|
|
647
|
+
lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch_cuda.rb \
|
|
648
|
+
lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start_cuda.rb \
|
|
649
|
+
lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
|
|
650
|
+
lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/models/transformer.rb \
|
|
651
|
+
lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
|
|
652
|
+
lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
|
|
653
|
+
lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
|
|
654
|
+
lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
|
|
655
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
656
|
+
toy-train-cuda: libexec/toy-train-cuda
|
|
657
|
+
|
|
658
|
+
# P4/GPU — LoRA CUDA TRAINING runner. CUDA twin of libexec/toy-train-lora.
|
|
659
|
+
# SEPARATE binary from libexec/toy-train-cuda: the LoRA realize_for_mmap path
|
|
660
|
+
# cannot share a Spinel compilation unit with the random-init path (cfg
|
|
661
|
+
# type-merge miscompile; landmine #16 — same reason toy-train-lora is split
|
|
662
|
+
# from toy-train). SINGLE-TYPE binary: TinyNNCuda is the compute path;
|
|
663
|
+
# lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in deps because transformer.rb
|
|
664
|
+
# requires tinynn -> defines CPU TinyNN for the checkpoint write seam
|
|
665
|
+
# (ToyDriftGrad.params downloads via CPU TinyNN). toy_gguf_fuse is NOT a dep
|
|
666
|
+
# (lora uses ToyDriftGrad.params, not the lens-fold path). Links the CUDA
|
|
667
|
+
# ggml backend via -Wl,-u,tnn_cuda_force_link. NOT in MIRRORABLE (hand-written).
|
|
668
|
+
libexec/toy-train-lora-cuda: lib/toy/run/train_lora_cuda.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
|
|
669
|
+
lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora_cuda.rb \
|
|
670
|
+
lib/toy/llm/adamw.rb \
|
|
671
|
+
lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb \
|
|
672
|
+
lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
|
|
673
|
+
lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
|
|
674
|
+
lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
|
|
675
|
+
lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS) | libexec
|
|
676
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
677
|
+
toy-train-lora-cuda: libexec/toy-train-lora-cuda
|
|
678
|
+
|
|
679
|
+
# P4/GPU — from-scratch METAL TRAINING runner (macOS ONLY). Metal twin of
|
|
680
|
+
# libexec/toy-train-cuda, from-scratch ONLY. SINGLE-TYPE binary (landmine #16):
|
|
681
|
+
# TinyNNMetal is the compute path; lib/toy/ffi/tinynn.rb + lib/toy/models/transformer.rb stay in
|
|
682
|
+
# deps because transformer.rb requires tinynn -> defines CPU TinyNN for the
|
|
683
|
+
# checkpoint write/fuse/drift seam (dropping them breaks the writer). The macOS
|
|
684
|
+
# guard MUST come first so Linux/gx10 never touches the Apple frameworks; the
|
|
685
|
+
# metal --cc recipe links Foundation/Metal/MetalKit with _tnn_metal_force_link
|
|
686
|
+
# (leading underscore, macOS symbol convention). libtinynn_ggml.a (CPU archive)
|
|
687
|
+
# stays in deps for the write seam + base ggml. NOT in MIRRORABLE (hand-written).
|
|
688
|
+
# gx10 RUNTIME-UNVERIFIED — pin baseline + gate on the Mac.
|
|
689
|
+
libexec/toy-train-metal: lib/toy/run/train_metal.rb lib/toy/dev/toy_describe_flow.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy.rb lib/toy/models/toy_smollm2.rb \
|
|
690
|
+
lib/toy/llm/engine/llama_seq_engine_metal.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch_metal.rb \
|
|
691
|
+
lib/toy/llm/adamw.rb lib/toy/llm/labels.rb \
|
|
692
|
+
lib/toy/train/toy_gguf_writer.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/models/transformer.rb \
|
|
693
|
+
lib/toy/llm/primitives/rms_norm_metal.rb lib/toy/llm/primitives/rope_metal.rb \
|
|
694
|
+
lib/toy/llm/primitives/swiglu_metal.rb lib/toy/llm/primitives/gqa_metal.rb \
|
|
695
|
+
lib/toy/llm/blocks/transformer_block_metal.rb lib/toy/llm/archs/llama_arch_metal.rb \
|
|
696
|
+
lib/toy/ffi/tinynn_metal.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a $(SPINEL_DEPS) | libexec
|
|
697
|
+
ifneq ($(UNAME_S),Darwin)
|
|
698
|
+
@echo "toy-train-metal: macOS-only"; exit 1
|
|
699
|
+
endif
|
|
700
|
+
$(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
|
|
701
|
+
toy-train-metal: libexec/toy-train-metal
|
|
702
|
+
|
|
703
|
+
# P4 — `toy serve` PERSISTENT compute runner (OpenAI-compatible HTTP).
|
|
704
|
+
# Unlike infer/train/eval (compute-once), this runner blocks in Tep.run!.
|
|
705
|
+
# Spinel source lib/toy/run/serve.rb; the binary path EQUALS the make
|
|
706
|
+
# target so ToyRoot.ensure_built("libexec/toy-serve") both builds and
|
|
707
|
+
# locates it. The endpoint logic moved out of tep_demo/openai_api_llama.rb
|
|
708
|
+
# into lib/toy/serve/openai/* (Server/State + handlers + the embeddings
|
|
709
|
+
# handler; JSON via SpinelKit::Json, toy#44). vendor/spinel/tep/lib/tep.rb is the TEP BUILD-DEP
|
|
710
|
+
# edge — Tep is consumed purely as transport (built by `make vendor-tep`
|
|
711
|
+
# on a fresh tree; needs ../tep + ../spinelgems siblings). Deps mirror the
|
|
712
|
+
# tep_demo recipe (Makefile:486) + the KV stack. CPU-only; NOT in
|
|
713
|
+
# MIRRORABLE (see prep/gen_cuda_mirror.rb).
|
|
714
|
+
libexec/toy-serve: lib/toy/run/serve.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb \
|
|
715
|
+
lib/toy/serve/openai/server.rb \
|
|
716
|
+
lib/toy/serve/openai/handlers.rb lib/toy/serve/openai/embeddings_handler.rb \
|
|
717
|
+
vendor/spinel/tep/lib/tep.rb \
|
|
718
|
+
lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb \
|
|
719
|
+
tinynn/libtinynn_ggml.a | libexec
|
|
720
|
+
$(SPINEL) $< -o $@
|
|
721
|
+
toy-serve: libexec/toy-serve
|
|
722
|
+
|
|
723
|
+
# toy#gguf-checkpoint-reload (#153) — smoke binary that loads a
|
|
724
|
+
# from-scratch toy GGUF and runs a tiny generation. No tokenizer.
|
|
725
|
+
prep/smokes/smoke_toy_ckpt_reload: prep/smokes/smoke_toy_ckpt_reload.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a
|
|
726
|
+
$(SPINEL) $< -o $@
|
|
727
|
+
|
|
728
|
+
# toy#embed-api (#145) — smoke for ToyLM#embed_lookup.
|
|
729
|
+
prep/smokes/smoke_embed_api: prep/smokes/smoke_embed_api.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a
|
|
730
|
+
$(SPINEL) $< -o $@
|
|
731
|
+
|
|
732
|
+
# P1 framework refactor — runtime Card derivation smoke. Loads a
|
|
733
|
+
# llama-family GGUF, realizes the seq-mode cache, derives a
|
|
734
|
+
# structural Toy::Card via ToyDescribeFlow.card, prints + gates.
|
|
735
|
+
prep/smokes/smoke_card_derive: prep/smokes/smoke_card_derive.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/dev/toy_card.rb tinynn/libtinynn_ggml.a
|
|
736
|
+
$(SPINEL) $< -o $@
|
|
737
|
+
|
|
738
|
+
# toy#decode-logprobs (#151) — smoke for ToyLM#decode_step_with_logprobs.
|
|
739
|
+
prep/smokes/smoke_decode_logprobs: prep/smokes/smoke_decode_logprobs.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a
|
|
740
|
+
$(SPINEL) $< -o $@
|
|
741
|
+
|
|
742
|
+
# GH#18 — LMC interpolate-and-eval runner.
|
|
743
|
+
examples/example_lmc: examples/legacy/08_lmc.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
744
|
+
$(SPINEL) $< -o $@
|
|
745
|
+
example_lmc: examples/example_lmc
|
|
746
|
+
|
|
747
|
+
# E2.3 (towards GH#14) — projection-lens smoke.
|
|
748
|
+
prep/smokes/smoke_projection_lens: prep/smokes/smoke_projection_lens.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
749
|
+
$(SPINEL) $< -o $@
|
|
750
|
+
|
|
751
|
+
# toy#42 full-API require gate. Compiling this proves lib/toy/compute.rb's whole
|
|
752
|
+
# surface (all three engines + recipes + loaders) co-compiles in one program;
|
|
753
|
+
# running it realizes a LlamaSeqEngine to prove the surface is live. The prereq
|
|
754
|
+
# is just lib/toy/compute.rb — it pulls everything else transitively, and
|
|
755
|
+
# $(SPINEL) follows the require graph.
|
|
756
|
+
prep/smokes/smoke_compute_surface: prep/smokes/smoke_compute_surface.rb lib/toy/compute.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
757
|
+
$(SPINEL) $< -o $@
|
|
758
|
+
|
|
759
|
+
# toy#64 item 8 — the CUDA compute entry (lib/toy/compute_cuda.rb), the
|
|
760
|
+
# consumer-ish device-at-compile-time gate. Same shape as the CPU
|
|
761
|
+
# compute-surface gate but requires compute_cuda + links the CUDA
|
|
762
|
+
# archives with the force-link flag. The generated CUDA mirrors in the
|
|
763
|
+
# dep list are kept fresh by the $(MIRROR_CUDA) pattern rules.
|
|
764
|
+
prep/smokes/smoke_compute_surface_cuda: prep/smokes/smoke_compute_surface_cuda.rb lib/toy/compute_cuda.rb \
|
|
765
|
+
lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb \
|
|
766
|
+
lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/gpt2_seq_engine_cuda.rb \
|
|
767
|
+
lib/toy/llm/engine/llama_kv_engine_cuda.rb \
|
|
768
|
+
lib/toy/llm/recipes/from_scratch_cuda.rb lib/toy/llm/recipes/warm_start_cuda.rb \
|
|
769
|
+
lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
|
|
770
|
+
lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
|
|
771
|
+
lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
|
|
772
|
+
lib/toy/ffi/tinynn_cuda.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
|
|
773
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
774
|
+
|
|
775
|
+
# P2.6 — GQA-divergent (w_o) gate. Realizes a config with head_dim=24 so
|
|
776
|
+
# n_heads*head_dim (96) != d_model (64), proving the divergent w_o shape
|
|
777
|
+
# [d_model, n_heads*head_dim] allocates and runs forward+backward.
|
|
778
|
+
prep/smokes/smoke_gate_gqa_divergent: prep/smokes/smoke_gate_gqa_divergent.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
779
|
+
$(SPINEL) $< -o $@
|
|
780
|
+
|
|
781
|
+
# P2.6 — llama3 RoPE post-rope TENSOR parity gate. Builds a standalone
|
|
782
|
+
# post-rope subgraph from the SAME public primitive (RoPE.apply_2d) the
|
|
783
|
+
# model's K/Q paths call, with a NON-NULL, NON-TRIVIAL llama3 freq_factors
|
|
784
|
+
# ptr (computed via Toy::RopeScaling.compute_llama3_freq_factors). Logit-
|
|
785
|
+
# level is rope-angle-INSENSITIVE, so the gate taps the post-rope tensor:
|
|
786
|
+
# asserts (a) freq_factors non-uniform / kind==:llama3, (b) post-rope output
|
|
787
|
+
# byte-identical run-to-run, plus a contrast guard vs :none (NULL factors).
|
|
788
|
+
# No model file, no lib/ change, no mirror regen. Run from repo root.
|
|
789
|
+
prep/smokes/smoke_gate_llama3_tensor: prep/smokes/smoke_gate_llama3_tensor.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
790
|
+
$(SPINEL) $< -o $@
|
|
791
|
+
|
|
792
|
+
# P2.6 — B>1 (micro-batch) gate. Realizes with t_batch=2 so @seq_b=2,
|
|
793
|
+
# forcing the block-causal mask alloc + upload (gated on @seq_b>1) and the
|
|
794
|
+
# soft_max_ext attention path (gqa.rb:50). Proves the batched graph
|
|
795
|
+
# allocates the [T*B,T*B] mask and runs forward+backward; records a
|
|
796
|
+
# reproducible loss baseline. MUST run from repo root (data/ts_seqs.txt).
|
|
797
|
+
prep/smokes/smoke_gate_b_gt_1: prep/smokes/smoke_gate_b_gt_1.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
798
|
+
$(SPINEL) $< -o $@
|
|
799
|
+
|
|
800
|
+
# P2.6 — L4 FromScratch recipe gate. Drives the same random-init config
|
|
801
|
+
# as smoke_projection_lens THROUGH Toy::LLM::Recipes::FromScratch; its
|
|
802
|
+
# loss curve must byte-equal the projection-lens reference.
|
|
803
|
+
prep/smokes/smoke_recipe_from_scratch: prep/smokes/smoke_recipe_from_scratch.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
804
|
+
$(SPINEL) $< -o $@
|
|
805
|
+
|
|
806
|
+
# BLESSED from-scratch path — the short tutorial. Same gate-fixture
|
|
807
|
+
# config as smoke_recipe_from_scratch, but the clean tutorial read using
|
|
808
|
+
# the value objects (Toy::SmolLM2Config.mha + Toy::Labels + Toy::AdamW).
|
|
809
|
+
examples/example_train_from_scratch_blessed: examples/legacy/train_from_scratch.rb lib/toy.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/from_scratch.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
810
|
+
$(SPINEL) $< -o $@
|
|
811
|
+
example_train_from_scratch_blessed: examples/example_train_from_scratch_blessed
|
|
812
|
+
|
|
813
|
+
# ── Curated examples (toy#60) — the narrated teaching set. One file,
|
|
814
|
+
# one make target, one binary each; see examples/README.md for the tour.
|
|
815
|
+
# 01 — from-scratch on the bundled tiny corpus via the one-require
|
|
816
|
+
# compute surface + the named value objects. THE showcase; the example
|
|
817
|
+
# in docs/framework.md must stay truthful to this file.
|
|
818
|
+
examples/example_01_train_tiny: examples/01_train_tiny.rb lib/toy/compute.rb lib/toy/io/toy_corpus_loader.rb lib/toy/io/run_bundle.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
819
|
+
$(SPINEL) $< -o $@
|
|
820
|
+
example_01: examples/example_01_train_tiny
|
|
821
|
+
.PHONY: example_01
|
|
822
|
+
|
|
823
|
+
# 02 — warm-start fine-tune: donor token_embd from a real GGUF through
|
|
824
|
+
# Toy::LLM::Recipes::WarmStart (realize_scratch! → realize_warm! → build!).
|
|
825
|
+
examples/example_02_finetune_warm_start: examples/02_finetune_warm_start.rb lib/toy/compute.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
826
|
+
$(SPINEL) $< -o $@
|
|
827
|
+
example_02: examples/example_02_finetune_warm_start
|
|
828
|
+
.PHONY: example_02
|
|
829
|
+
|
|
830
|
+
# 03 — LoRA adapters over a frozen mmap'd base GGUF, via the one-require
|
|
831
|
+
# compute surface (lora re-added to it by toy#52).
|
|
832
|
+
examples/example_03_lora: examples/03_lora.rb lib/toy/compute.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/training_batch.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
833
|
+
$(SPINEL) $< -o $@
|
|
834
|
+
example_03: examples/example_03_lora
|
|
835
|
+
.PHONY: example_03
|
|
836
|
+
|
|
837
|
+
# 04 — load a GGUF, KV-cache decode, print text (the llama_kv_engine
|
|
838
|
+
# path the `toy infer` runner drives).
|
|
839
|
+
examples/example_04_generate: examples/04_generate.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
840
|
+
$(SPINEL) $< -o $@
|
|
841
|
+
example_04: examples/example_04_generate
|
|
842
|
+
.PHONY: example_04
|
|
843
|
+
|
|
844
|
+
# 05 — per-token logprobs at a decode position (the `toy eval` compute).
|
|
845
|
+
examples/example_05_eval_logprobs: examples/05_eval_logprobs.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/io/tokenizer.rb lib/toy/dev/toy_logprobs.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
846
|
+
$(SPINEL) $< -o $@
|
|
847
|
+
example_05: examples/example_05_eval_logprobs
|
|
848
|
+
.PHONY: example_05
|
|
849
|
+
|
|
850
|
+
# 06 — CRuby, NOT compiled: Toy::RunLog comparison table over runs/.
|
|
851
|
+
example_06:
|
|
852
|
+
ruby examples/06_runlog_compare.rb
|
|
853
|
+
.PHONY: example_06
|
|
854
|
+
|
|
855
|
+
# 07 — ViT-Tiny on the committed data/vit_smoke corpus via Recipes::VitTiny.
|
|
856
|
+
examples/example_07_vit_tiny: examples/07_vit_tiny.rb lib/toy/compute.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/llm/recipes/vit_tiny.rb lib/toy/models/toy_vit.rb lib/toy/io/toy_image_loader.rb lib/toy/io/run_bundle.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/classify_batch.rb lib/toy/llm/recipe_options.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
857
|
+
$(SPINEL) $< -o $@
|
|
858
|
+
example_07: examples/example_07_vit_tiny
|
|
859
|
+
.PHONY: example_07
|
|
860
|
+
|
|
861
|
+
examples-curated: example_01 example_02 example_03 example_04 example_05 example_07
|
|
862
|
+
.PHONY: examples-curated
|
|
863
|
+
|
|
864
|
+
# L4 LoRA recipe gate. Drives the same LoRA fine-tune config as the
|
|
865
|
+
# frozen reference 03_finetune_lora THROUGH Toy::LLM::Recipes::LoRA; its
|
|
866
|
+
# loss curve must byte-equal the reference at the fixed config.
|
|
867
|
+
prep/smokes/smoke_recipe_lora: prep/smokes/smoke_recipe_lora.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/llm/adamw.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/lora.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
868
|
+
$(SPINEL) $< -o $@
|
|
869
|
+
|
|
870
|
+
# L4 WarmStart recipe gate. Drives the same warm-start config as the
|
|
871
|
+
# frozen reference 09_warm_start_train (INIT=scratch) THROUGH
|
|
872
|
+
# Toy::LLM::Recipes::WarmStart; its loss curve must byte-equal 09's at
|
|
873
|
+
# the fixed config (SEED=0 STEPS=5). The fixture drives the cosine LR
|
|
874
|
+
# schedule + streaming corpus loader (deps below); the recipe stays thin.
|
|
875
|
+
prep/smokes/smoke_recipe_warm_start: prep/smokes/smoke_recipe_warm_start.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/llm/adamw.rb lib/toy/llm/labels.rb lib/toy/llm/recipe_options.rb lib/toy/llm/recipes/warm_start.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
876
|
+
$(SPINEL) $< -o $@
|
|
877
|
+
|
|
878
|
+
# P2.6 gate — GGUF F32 mmap round-trip parity. Head-fuses a random_init
|
|
879
|
+
# model into the FUSED llama.cpp naming, writes a GGUF, reloads via
|
|
880
|
+
# realize_for_mmap, and asserts the reloaded forward is BIT-IDENTICAL to
|
|
881
|
+
# the in-memory forward. This is the behavioral gate for realize_for_mmap
|
|
882
|
+
# (previously only realize_for_random_init was gated). CPU-only: the GGUF
|
|
883
|
+
# WRITE half reads host data ptrs (tnn_gguf_w_add_tensor), which the CUDA
|
|
884
|
+
# writer doesn't implement — do NOT auto-mirror this to CUDA.
|
|
885
|
+
prep/smokes/smoke_gguf_roundtrip: prep/smokes/smoke_gguf_roundtrip.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_gguf_fuse.rb lib/toy/train/toy_gguf_writer.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
886
|
+
$(SPINEL) $< -o $@
|
|
887
|
+
|
|
888
|
+
prep/smokes/smoke_full_finetune: prep/smokes/smoke_full_finetune.rb lib/toy/llm/adamw.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
889
|
+
$(SPINEL) $< -o $@
|
|
890
|
+
|
|
891
|
+
# P2.6 gate — qkv_bias mmap branch. Loads the real Qwen2.5-0.5B native GGUF
|
|
892
|
+
# (which DOES carry blk.N.attn_{q,k,v}.bias) and realizes via
|
|
893
|
+
# realize_for_mmap with qkv_bias=TRUE, untied=FALSE (output.weight absent =>
|
|
894
|
+
# tied), forcing the bias mmap branch (llama_seq_engine.rb:635-661) and
|
|
895
|
+
# its transformer_block tnn_add consumer — neither hit by smoke_gguf_roundtrip
|
|
896
|
+
# (qkv_bias=FALSE). Records a deterministic finite-logit baseline. CPU-only;
|
|
897
|
+
# DATA DEPENDENCY: data/qwen25-0.5b-native.gguf (not self-contained). MUST run
|
|
898
|
+
# from repo root. Do NOT auto-mirror to CUDA.
|
|
899
|
+
prep/smokes/smoke_gate_qkv_bias: prep/smokes/smoke_gate_qkv_bias.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
900
|
+
$(SPINEL) $< -o $@
|
|
901
|
+
|
|
902
|
+
# P2.6 gate — Q8-stays-Q8 realize_for_q8_copy branch. Loads the existing
|
|
903
|
+
# Q8 GGUF, asserts blk.0 attn_q weight stays Q8_0 in memory (NOT dequant
|
|
904
|
+
# to F32), deterministic forward x2 byte-identical baseline. Pure-Ruby
|
|
905
|
+
# fixture (no toy_drift_grad dep; seq_blocks_ffi directly).
|
|
906
|
+
prep/smokes/smoke_gate_q8_preserve: prep/smokes/smoke_gate_q8_preserve.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
907
|
+
$(SPINEL) $< -o $@
|
|
908
|
+
|
|
909
|
+
# P2.6 CUDA gate — GPU mirror of the projection-lens smoke. Exercises
|
|
910
|
+
# realize_for_random_init + seq forward on the CUDA backend so the
|
|
911
|
+
# realize-path refactor can be parity-gated on GPU (CUDA self-consistency
|
|
912
|
+
# before/after; CUDA floats don't bit-equal CPU). Mirror auto-generated
|
|
913
|
+
# by prep/gen_cuda_mirror.rb. Same force-link recipe as the 06 CUDA entry.
|
|
914
|
+
prep/smokes/smoke_projection_lens_cuda: prep/smokes/smoke_projection_lens_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/train/toy_drift_grad.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
|
|
915
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
916
|
+
|
|
917
|
+
# E2.4 (towards GH#14) — streaming corpus loader + cosine LR smoke.
|
|
918
|
+
prep/smokes/smoke_corpus_loader: prep/smokes/smoke_corpus_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb tinynn/libtinynn_ggml.a
|
|
919
|
+
$(SPINEL) $< -o $@
|
|
920
|
+
|
|
921
|
+
# E2.5 (towards GH#14) — warm-start training driver.
|
|
922
|
+
examples/example_warm_start_train: examples/legacy/09_warm_start_train.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/io/toy_corpus_loader.rb lib/toy/train/toy_lr_schedule.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
923
|
+
$(SPINEL) $< -o $@
|
|
924
|
+
example_warm_start_train: examples/example_warm_start_train
|
|
925
|
+
|
|
926
|
+
# Auto-generated coverage matrix — ggml ops vs our FFI surface.
|
|
927
|
+
# Sources are vendor/ggml/include/ggml.h, tinynn/tinynn_ggml.c, and the
|
|
928
|
+
# two FFI binding files. See docs/coverage.md for the matrix.
|
|
929
|
+
coverage: docs/coverage.md
|
|
930
|
+
docs/coverage.md: prep/gen_coverage.rb vendor/ggml/include/ggml.h \
|
|
931
|
+
tinynn/tinynn_ggml.c lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb \
|
|
932
|
+
lib/toy/ffi/tinynn_metal.rb
|
|
933
|
+
ruby prep/gen_coverage.rb
|
|
934
|
+
coverage-check:
|
|
935
|
+
ruby prep/gen_coverage.rb --check
|
|
936
|
+
.PHONY: coverage coverage-check
|
|
937
|
+
|
|
938
|
+
examples/example_train: examples/legacy/02_train_custom_gpt.rb lib/toy/models/transformer.rb lib/toy/train/training.rb lib/toy/train/toy_trainer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
939
|
+
$(SPINEL) $< -o $@
|
|
940
|
+
example_train: examples/example_train
|
|
941
|
+
|
|
942
|
+
examples/example_finetune: examples/legacy/03_finetune_lora.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
943
|
+
$(SPINEL) $< -o $@
|
|
944
|
+
example_finetune: examples/example_finetune
|
|
945
|
+
|
|
946
|
+
# CUDA mirror — same source, swap TinyNN → TinyNNCuda by including
|
|
947
|
+
# both libs. The example source uses TinyNN; the CUDA build link-step
|
|
948
|
+
# carries CUDA symbols too (no source change). For real GPU speedup
|
|
949
|
+
# users typically write a `_cuda` variant; this mirror is for the
|
|
950
|
+
# build-recipe story.
|
|
951
|
+
examples/example_finetune_cuda: examples/legacy/03_finetune_lora_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
952
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
953
|
+
example_finetune_cuda: examples/example_finetune_cuda
|
|
954
|
+
|
|
955
|
+
# Metal mirror of example_inference (macOS only). Uses TinyNNMetal.
|
|
956
|
+
# Same -Wl,-u trick as CUDA so the Metal backend init survives
|
|
957
|
+
# weak-symbol resolution. macOS expects a leading underscore on
|
|
958
|
+
# external symbols, hence `-Wl,-u,_tnn_metal_force_link`.
|
|
959
|
+
# Frameworks (Foundation/Metal/MetalKit) are linked via -framework.
|
|
960
|
+
examples/example_inference_metal: examples/legacy/01_inference_metal.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm_metal.rb lib/toy/llm/engine/llama_kv_engine_metal.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_metal.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_metal.a
|
|
961
|
+
ifneq ($(UNAME_S),Darwin)
|
|
962
|
+
@echo "example_inference_metal: macOS-only"; exit 1
|
|
963
|
+
endif
|
|
964
|
+
$(SPINEL) --cc='cc -Wl,-u,_tnn_metal_force_link -framework Foundation -framework Metal -framework MetalKit' $< -o $@
|
|
965
|
+
example_inference_metal: examples/example_inference_metal
|
|
966
|
+
|
|
967
|
+
# DEVICE-aware entry point. Toy's per-backend Spinel binaries can't
|
|
968
|
+
# share a Ruby file (poly-dispatch landmines on LlamaSeqForwardFFICache
|
|
969
|
+
# vs *Cuda), so the entry point is a shell-script dispatcher.
|
|
970
|
+
#
|
|
971
|
+
# Today only DEVICE=cpu is supported for from-scratch training:
|
|
972
|
+
# LlamaSeqForwardFFICacheCuda implements realize_for_mmap (LoRA /
|
|
973
|
+
# fine-tune from a base GGUF) but NOT realize_for_random_init.
|
|
974
|
+
# Adding CUDA random-init is a real feature — tracked under
|
|
975
|
+
# toy#train-device-select-cuda follow-up. The dispatcher errors
|
|
976
|
+
# cleanly on DEVICE=cuda so Tao's `run_start.backend.kind=="cuda"`
|
|
977
|
+
# acceptance fails honestly rather than silently emitting cpu data.
|
|
978
|
+
examples/example_train_from_scratch_cpu: examples/legacy/06_train_from_scratch.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/dev/toy_tap.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
979
|
+
$(SPINEL) $< -o $@
|
|
980
|
+
examples/example_train_from_scratch_cuda: examples/legacy/06_train_from_scratch_cuda.rb vendor/spinel/spinel_kit/lib/spinel_kit/json_builder.rb lib/toy/io/toy_events.rb vendor/spinel/spinel_kit/lib/spinel_kit/git.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb lib/toy/dev/toy_describe_flow.rb lib/toy/train/toy_drift_grad.rb lib/toy/train/toy_gguf_writer.rb lib/toy/dev/toy_tap.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
|
|
981
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
982
|
+
examples/example_train_from_scratch: examples/example_train_from_scratch_cpu
|
|
983
|
+
@printf '#!/bin/sh\n# Auto-generated by Makefile. DEVICE selects the backend binary.\n# Edit examples/legacy/06_train_from_scratch.rb (cpu) for behaviour; CUDA mirror is auto-generated by prep/gen_cuda_mirror.rb.\ncase "$${DEVICE:-cpu}" in\n cpu|"") exec "$$(dirname "$$0")/example_train_from_scratch_cpu" "$$@" ;;\n cuda) exec "$$(dirname "$$0")/example_train_from_scratch_cuda" "$$@" ;;\n metal) echo "DEVICE=metal not yet supported for training (inference only)" >&2; exit 2 ;;\n *) echo "DEVICE=$${DEVICE} not recognised (want cpu|cuda)" >&2; exit 2 ;;\nesac\n' > $@
|
|
984
|
+
@chmod +x $@
|
|
985
|
+
example_train_from_scratch: examples/example_train_from_scratch
|
|
986
|
+
example_train_from_scratch_cuda: examples/example_train_from_scratch_cuda
|
|
987
|
+
|
|
988
|
+
# GPT-2 from-scratch via the GPT2SeqEngine library API (the curated GPT-2 demo;
|
|
989
|
+
# CLI surface is `toy train from-scratch --arch gpt2`). Memorizes a synthetic
|
|
990
|
+
# sequence so CE visibly collapses; exercises the vendored LayerNorm/GELU kernels.
|
|
991
|
+
examples/gpt2_train: examples/legacy/gpt2_train.rb lib/toy.rb \
|
|
992
|
+
lib/toy/llm/engine/gpt2_seq_engine.rb lib/toy/models/transformer.rb \
|
|
993
|
+
lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS) | libexec
|
|
994
|
+
$(SPINEL) $< -o $@
|
|
995
|
+
gpt2_train: examples/gpt2_train
|
|
996
|
+
.PHONY: gpt2_train
|
|
997
|
+
|
|
998
|
+
examples: toy-infer example_train example_train_from_scratch gpt2_train
|
|
999
|
+
|
|
1000
|
+
# Phase 0.6 — CUDA-mirror generator. The CPU file is the source of
|
|
1001
|
+
# truth; the CUDA file is auto-generated by prep/gen_cuda_mirror.rb.
|
|
1002
|
+
# `make verify-mirrors` exits non-zero if any committed CUDA mirror
|
|
1003
|
+
# has drifted from what the generator would produce.
|
|
1004
|
+
gen-mirrors:
|
|
1005
|
+
@ruby prep/gen_cuda_mirror.rb
|
|
1006
|
+
|
|
1007
|
+
# Mirrors are off-disk build artifacts (gitignored), so there is no committed
|
|
1008
|
+
# copy to drift against. verify-mirrors now regenerates every mirror (incl. the
|
|
1009
|
+
# Metal twins, which no Linux build consumes) and then re-runs the generator in
|
|
1010
|
+
# --verify mode: this asserts the generator is healthy and IDEMPOTENT (generate
|
|
1011
|
+
# == verify), the only invariant left once nothing is committed.
|
|
1012
|
+
verify-mirrors:
|
|
1013
|
+
@ruby prep/gen_cuda_mirror.rb
|
|
1014
|
+
@ruby prep/gen_cuda_mirror.rb --verify
|
|
1015
|
+
|
|
1016
|
+
# Mirrors generated at build time (off-disk; gitignored). Every runner rule
|
|
1017
|
+
# lists the mirror .rb as a prerequisite, so Make regenerates it on demand from
|
|
1018
|
+
# the CPU source of truth + the generator. `--backend` writes one backend, so
|
|
1019
|
+
# each target rebuilds exactly itself. These mirror MIRRORABLE in
|
|
1020
|
+
# prep/gen_cuda_mirror.rb — keep the two lists in sync. STATIC pattern rules
|
|
1021
|
+
# (targets restricted to this explicit list) so hand-written mirrors like
|
|
1022
|
+
# lib/toy/ffi/tinynn_cuda.rb / lib/toy/models/transformer_lm_cuda.rb are NOT captured.
|
|
1023
|
+
MIRROR_CUDA := \
|
|
1024
|
+
lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb \
|
|
1025
|
+
lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb \
|
|
1026
|
+
lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb \
|
|
1027
|
+
lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/gpt2_seq_engine_cuda.rb \
|
|
1028
|
+
lib/toy/llm/recipes/from_scratch_cuda.rb lib/toy/llm/recipes/lora_cuda.rb \
|
|
1029
|
+
lib/toy/llm/recipes/warm_start_cuda.rb \
|
|
1030
|
+
lib/toy/llm/engine/llama_kv_engine_cuda.rb \
|
|
1031
|
+
lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb \
|
|
1032
|
+
examples/legacy/06_train_from_scratch_cuda.rb prep/smokes/smoke_projection_lens_cuda.rb
|
|
1033
|
+
MIRROR_METAL := $(MIRROR_CUDA:_cuda.rb=_metal.rb)
|
|
1034
|
+
|
|
1035
|
+
$(MIRROR_CUDA): %_cuda.rb: %.rb prep/gen_cuda_mirror.rb
|
|
1036
|
+
@ruby prep/gen_cuda_mirror.rb --backend cuda $<
|
|
1037
|
+
$(MIRROR_METAL): %_metal.rb: %.rb prep/gen_cuda_mirror.rb
|
|
1038
|
+
@ruby prep/gen_cuda_mirror.rb --backend metal $<
|
|
1039
|
+
|
|
1040
|
+
# Parity-checks vs native TransformerLM.forward.
|
|
1041
|
+
|
|
1042
|
+
# Tep+Spinel HTTP server demos. See tep_demo/README.md. Builds bypass
|
|
1043
|
+
# tep's translator (we use spinel directly on the spinelgems-vendored
|
|
1044
|
+
# tep tree at vendor/spinel/tep/lib/, produced by `make vendor-tep`).
|
|
1045
|
+
tep_demo/hello: tep_demo/hello_api.rb vendor/spinel/tep/lib/tep.rb
|
|
1046
|
+
$(SPINEL) tep_demo/hello_api.rb -o tep_demo/hello
|
|
1047
|
+
|
|
1048
|
+
# Inference API: /generate?n=N runs greedy generation via FullForwardFFICache.
|
|
1049
|
+
tep_demo/api: tep_demo/legacy/inference_api.rb vendor/spinel/tep/lib/tep.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1050
|
+
$(SPINEL) tep_demo/legacy/inference_api.rb -o tep_demo/api
|
|
1051
|
+
|
|
1052
|
+
# --- ggml vendor ------------------------------------------------------------
|
|
1053
|
+
# Vendor patches that must land before any ggml build target. See
|
|
1054
|
+
# vendor-patches/README.md for the per-patch rationale.
|
|
1055
|
+
GGML_PATCHES := \
|
|
1056
|
+
vendor-patches/0001-cuda-buffer_from_ptr.patch \
|
|
1057
|
+
vendor-patches/0002-cuda-buffer_from_ptr-reuse-iface.patch \
|
|
1058
|
+
vendor-patches/0003-cuda-buffer_from_ptr-copy-mode.patch \
|
|
1059
|
+
vendor-patches/0004-cuda-cpy-strided.patch \
|
|
1060
|
+
vendor-patches/0005-concat-backward.patch \
|
|
1061
|
+
vendor-patches/0006-getrows-back-large-vocab.patch \
|
|
1062
|
+
vendor-patches/0007-gpt2-backward-kernels.patch \
|
|
1063
|
+
vendor-patches/0008-mul-mat-backward-mixed-precision.patch \
|
|
1064
|
+
vendor-patches/0009-sched-unsupported-node-diagnostic.patch
|
|
1065
|
+
|
|
1066
|
+
# Sentinel file marking that all $(GGML_PATCHES) have been applied to
|
|
1067
|
+
# the vendored tree. Build targets depend on it through CMakeLists.txt
|
|
1068
|
+
# (which depends on this sentinel) so a fresh clone applies the patches
|
|
1069
|
+
# exactly once, and re-runs of `make setup-ggml` are no-ops as long as
|
|
1070
|
+
# the patch set is unchanged.
|
|
1071
|
+
$(GGML_DIR)/.patched: $(GGML_DIR)/CMakeLists.txt $(GGML_PATCHES)
|
|
1072
|
+
@echo " reset vendor/ggml to upstream HEAD (build/ untouched)"
|
|
1073
|
+
@cd $(GGML_DIR) && git reset --hard HEAD >/dev/null
|
|
1074
|
+
@cd $(GGML_DIR) && for p in $(GGML_PATCHES); do \
|
|
1075
|
+
echo " apply $$p"; \
|
|
1076
|
+
git apply "$(CURDIR)/$$p" || { echo " FAILED"; exit 1; }; \
|
|
1077
|
+
done
|
|
1078
|
+
touch $@
|
|
1079
|
+
|
|
1080
|
+
$(GGML_DIR)/CMakeLists.txt:
|
|
1081
|
+
mkdir -p vendor
|
|
1082
|
+
git init -q $(GGML_DIR)
|
|
1083
|
+
cd $(GGML_DIR) && git remote add origin $(GGML_REPO) \
|
|
1084
|
+
&& git fetch -q --depth 1 origin $(GGML_REV) \
|
|
1085
|
+
&& git checkout -q FETCH_HEAD
|
|
1086
|
+
|
|
1087
|
+
# GGML_OPENMP=OFF: avoid the libgomp link dependency. On macOS clang
|
|
1088
|
+
# ships libomp (LLVM), not libgomp (GNU); ggml's own thread pool covers
|
|
1089
|
+
# CPU parallelism either way. Same setting used on Linux for build
|
|
1090
|
+
# parity (and so lib/toy/ffi/tinynn.rb doesn't need ffi_lib "gomp").
|
|
1091
|
+
#
|
|
1092
|
+
# Build output is routed through prep/progress, which:
|
|
1093
|
+
# - tees full cmake/build output to vendor/ggml/<dir>.log
|
|
1094
|
+
# - draws a one-line [NN%] progress bar on a TTY (plain "[NN%] msg"
|
|
1095
|
+
# lines on CI / non-tty stdout, no overdraw)
|
|
1096
|
+
# - on non-zero exit, dumps the last 40 lines of the log + exits
|
|
1097
|
+
# with the child's status. NEVER swallows errors.
|
|
1098
|
+
# Disable with QUIET=0 (passes through stdout unchanged).
|
|
1099
|
+
# (PROGRESS / QUIET / QUIETLY are defined near the top of this file
|
|
1100
|
+
# alongside SPINEL_BIN — see the DevEx polish knobs block.)
|
|
1101
|
+
|
|
1102
|
+
# Helper: run a `cd $(GGML_DIR) && cmake -B <DIR> <FLAGS>` configure
|
|
1103
|
+
# step. Routes output to a logfile when QUIET=1; on failure dumps the
|
|
1104
|
+
# log tail and propagates the exit code. QUIET=0 passes through.
|
|
1105
|
+
# Args: $(1) = build dir name (build / build-metal / build-cuda)
|
|
1106
|
+
# $(2) = cmake invocation (everything after the cd)
|
|
1107
|
+
define ggml_configure
|
|
1108
|
+
@if [ "$(QUIET)" = "1" ]; then \
|
|
1109
|
+
log="$(CURDIR)/$(GGML_DIR)/$(1).config.log"; \
|
|
1110
|
+
( cd $(GGML_DIR) && $(2) ) >"$$log" 2>&1 || { \
|
|
1111
|
+
echo " ✗ cmake configure ($(1)) failed; tail of $$log:"; \
|
|
1112
|
+
tail -30 "$$log"; exit 1; }; \
|
|
1113
|
+
else \
|
|
1114
|
+
cd $(GGML_DIR) && $(2) ; \
|
|
1115
|
+
fi
|
|
1116
|
+
endef
|
|
1117
|
+
|
|
1118
|
+
# Helper: run a `cmake --build <DIR> -j<N>` step. Routes through
|
|
1119
|
+
# prep/progress when QUIET=1 (single-line [NN%] bar, log tee). QUIET=0
|
|
1120
|
+
# passes through.
|
|
1121
|
+
# Args: $(1) = build dir name; $(2) = label tag (cpu/metal/cuda);
|
|
1122
|
+
# $(3) = cmake --build command
|
|
1123
|
+
define ggml_build
|
|
1124
|
+
@if [ "$(QUIET)" = "1" ]; then \
|
|
1125
|
+
LOG="$(CURDIR)/$(GGML_DIR)/$(1).build.log" LABEL="ggml-$(2)" \
|
|
1126
|
+
$(PROGRESS) -- sh -c "cd $(GGML_DIR) && $(3)"; \
|
|
1127
|
+
else \
|
|
1128
|
+
cd $(GGML_DIR) && $(3) ; \
|
|
1129
|
+
fi
|
|
1130
|
+
endef
|
|
1131
|
+
|
|
1132
|
+
# setup-ggml-* targets are user-facing phonies; the real work happens
|
|
1133
|
+
# in the libggml.a sentinel rules below so re-running setup is a no-op
|
|
1134
|
+
# once the static archive is built. Lets `make setup` / `toy install`
|
|
1135
|
+
# chain through without redoing the ~5 s incremental cmake check on
|
|
1136
|
+
# every invocation.
|
|
1137
|
+
.PHONY: setup-ggml setup-ggml-cuda setup-ggml-metal
|
|
1138
|
+
|
|
1139
|
+
setup-ggml: $(GGML_DIR)/build/src/libggml.a
|
|
1140
|
+
setup-ggml-cuda: $(GGML_DIR)/build-cuda/src/libggml.a
|
|
1141
|
+
setup-ggml-metal: $(GGML_DIR)/build-metal/src/libggml.a
|
|
1142
|
+
|
|
1143
|
+
$(GGML_DIR)/build/src/libggml.a: $(GGML_DIR)/.patched
|
|
1144
|
+
@echo " → configure ggml (cpu)"
|
|
1145
|
+
$(call ggml_configure,build,$(CMAKE_ENV) cmake -B build \
|
|
1146
|
+
-DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
|
|
1147
|
+
-DGGML_CUDA=OFF -DGGML_METAL=OFF -DGGML_VULKAN=OFF \
|
|
1148
|
+
-DGGML_OPENCL=OFF -DGGML_BLAS=OFF -DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
|
|
1149
|
+
-DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
|
|
1150
|
+
-DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
|
|
1151
|
+
@echo " → build ggml (cpu, $(NJOBS) jobs)"
|
|
1152
|
+
$(call ggml_build,build,cpu,$(CMAKE_ENV) cmake --build build -j$(NJOBS))
|
|
1153
|
+
|
|
1154
|
+
$(GGML_DIR)/build-cuda/src/libggml.a: $(GGML_DIR)/.patched
|
|
1155
|
+
@echo " → configure ggml (cuda, sm_$(GGML_CUDA_ARCH))"
|
|
1156
|
+
$(call ggml_configure,build-cuda,PATH=$(CUDA_DIR)/bin:$$PATH $(CMAKE_ENV) cmake -B build-cuda \
|
|
1157
|
+
-DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
|
|
1158
|
+
-DGGML_CUDA=ON -DGGML_METAL=OFF -DGGML_VULKAN=OFF \
|
|
1159
|
+
-DGGML_OPENCL=OFF -DGGML_BLAS=OFF -DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
|
|
1160
|
+
-DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
|
|
1161
|
+
-DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
|
1162
|
+
-DCMAKE_CUDA_ARCHITECTURES=$(GGML_CUDA_ARCH) -DGGML_NATIVE=OFF)
|
|
1163
|
+
@echo " → build ggml (cuda, $(NJOBS) jobs)"
|
|
1164
|
+
$(call ggml_build,build-cuda,cuda,PATH=$(CUDA_DIR)/bin:$$PATH $(CMAKE_ENV) cmake --build build-cuda -j$(NJOBS))
|
|
1165
|
+
|
|
1166
|
+
# Metal build (macOS only). GGML_METAL_EMBED_LIBRARY=ON bakes the
|
|
1167
|
+
# .metal shader source into the static archive as raw bytes; the
|
|
1168
|
+
# Metal driver JIT-compiles it on first device load. This lets the
|
|
1169
|
+
# whole pipeline work with the Command Line Tools (xcrun metal /
|
|
1170
|
+
# metallib are full-Xcode-only). On a Mac with full Xcode you can
|
|
1171
|
+
# flip GGML_METAL_EMBED_LIBRARY=OFF for AOT-compiled kernels.
|
|
1172
|
+
$(GGML_DIR)/build-metal/src/libggml.a: $(GGML_DIR)/.patched
|
|
1173
|
+
ifneq ($(UNAME_S),Darwin)
|
|
1174
|
+
@echo "setup-ggml-metal: Metal is macOS-only (uname -s = $(UNAME_S))"; exit 1
|
|
1175
|
+
endif
|
|
1176
|
+
@echo " → configure ggml (metal)"
|
|
1177
|
+
$(call ggml_configure,build-metal,$(CMAKE_ENV) cmake -B build-metal \
|
|
1178
|
+
-DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON \
|
|
1179
|
+
-DGGML_CUDA=OFF -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON \
|
|
1180
|
+
-DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_BLAS=OFF \
|
|
1181
|
+
-DGGML_OPENMP=OFF -DGGML_ACCELERATE=OFF \
|
|
1182
|
+
-DGGML_BUILD_EXAMPLES=OFF -DGGML_BUILD_TESTS=OFF \
|
|
1183
|
+
-DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
|
|
1184
|
+
@echo " → build ggml (metal, $(NJOBS) jobs)"
|
|
1185
|
+
$(call ggml_build,build-metal,metal,$(CMAKE_ENV) cmake --build build-metal -j$(NJOBS))
|
|
1186
|
+
|
|
1187
|
+
# --- tinynn shim (CPU build) ------------------------------------------------
|
|
1188
|
+
GGML_INC := -I$(GGML_DIR)/include -I$(GGML_DIR)/src
|
|
1189
|
+
|
|
1190
|
+
tinynn/tinynn_ggml.o: tinynn/tinynn_ggml.c tinynn/tinynn_ggml.h tinynn/tinynn_trace.h
|
|
1191
|
+
$(CC) $(CFLAGS) $(GGML_INC) -c $< -o $@
|
|
1192
|
+
|
|
1193
|
+
tinynn/tinynn_gguf.o: tinynn/tinynn_gguf.c tinynn/tinynn_gguf.h
|
|
1194
|
+
$(CC) $(CFLAGS) $(GGML_INC) -c $< -o $@
|
|
1195
|
+
|
|
1196
|
+
tinynn/tinynn_trace.o: tinynn/tinynn_trace.c tinynn/tinynn_trace.h
|
|
1197
|
+
$(CC) $(CFLAGS) -c $< -o $@
|
|
1198
|
+
|
|
1199
|
+
tinynn/tinynn_events.o: tinynn/tinynn_events.c tinynn/tinynn_events.h
|
|
1200
|
+
$(CC) $(CFLAGS) -c $< -o $@
|
|
1201
|
+
|
|
1202
|
+
tinynn/libtinynn_ggml.a: tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o
|
|
1203
|
+
ar $(ARFLAGS) $@ tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o
|
|
1204
|
+
|
|
1205
|
+
# --- toy#71 Stage B: the CRuby-oracle shared library ------------------------
|
|
1206
|
+
# tinynn objects + the static CPU ggml archives linked into ONE self-
|
|
1207
|
+
# contained shared object that plain MRI dlopens via Fiddle (lib/toy/mri.rb
|
|
1208
|
+
# native arm). PIC is already on everywhere (CFLAGS -fPIC for tinynn,
|
|
1209
|
+
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON for ggml). -Wl,-Bsymbolic binds
|
|
1210
|
+
# ggml's intra-library references locally — without it the aarch64 link
|
|
1211
|
+
# rejects adrp relocations against ggml's C++ vtables ("may bind
|
|
1212
|
+
# externally"); no interposition is wanted anyway. --whole-archive keeps
|
|
1213
|
+
# every backend-registry object alive. Link order mirrors the Spinel
|
|
1214
|
+
# ffi_lib list in lib/toy/ffi/tinynn.rb (stdc++/pthread, -lm TRAILING; no
|
|
1215
|
+
# gomp — the CPU ggml build is -DGGML_OPENMP=OFF). CPU ONLY this stage:
|
|
1216
|
+
# the CUDA/Metal shims stay static-archive-only (follow-up — a
|
|
1217
|
+
# libtinynn_ggml_cuda_shared.so would whole-archive build-cuda/ + the CUDA
|
|
1218
|
+
# stub libs; Metal additionally needs a Mac to verify -dynamiclib +
|
|
1219
|
+
# -force_load). Artifact is gitignored (rebuild: make libtinynn_shared).
|
|
1220
|
+
.PHONY: libtinynn_shared
|
|
1221
|
+
libtinynn_shared: tinynn/libtinynn_ggml_shared.so
|
|
1222
|
+
|
|
1223
|
+
tinynn/libtinynn_ggml_shared.so: tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o $(GGML_DIR)/build/src/libggml.a
|
|
1224
|
+
ifeq ($(UNAME_S),Darwin)
|
|
1225
|
+
# macOS variant (toy#71 Stage B follow-up, Mac-verified 2026-06-12):
|
|
1226
|
+
# -dynamiclib for -shared; -force_load per archive for GNU ld's
|
|
1227
|
+
# --whole-archive (pulls every ggml object so the Fiddle backend
|
|
1228
|
+
# resolves all tnn_* symbols); -lc++ for libc++ (not libstdc++); no
|
|
1229
|
+
# -Bsymbolic (macOS two-level namespace already binds internally).
|
|
1230
|
+
# Output keeps the .so name the gate/Fiddle loader expects.
|
|
1231
|
+
$(CC) -dynamiclib -o $@ \
|
|
1232
|
+
tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o \
|
|
1233
|
+
-Wl,-force_load,$(GGML_DIR)/build/src/libggml.a \
|
|
1234
|
+
-Wl,-force_load,$(GGML_DIR)/build/src/libggml-cpu.a \
|
|
1235
|
+
-Wl,-force_load,$(GGML_DIR)/build/src/libggml-base.a \
|
|
1236
|
+
-lc++ -lpthread -lm
|
|
1237
|
+
else
|
|
1238
|
+
$(CC) -shared -Wl,-Bsymbolic -o $@ \
|
|
1239
|
+
tinynn/tinynn_ggml.o tinynn/tinynn_gguf.o tinynn/tinynn_trace.o tinynn/tinynn_events.o \
|
|
1240
|
+
-L$(GGML_DIR)/build/src \
|
|
1241
|
+
-Wl,--whole-archive -lggml -lggml-cpu -lggml-base -Wl,--no-whole-archive \
|
|
1242
|
+
-lstdc++ -lpthread -lm
|
|
1243
|
+
endif
|
|
1244
|
+
|
|
1245
|
+
# --- smoke test -------------------------------------------------------------
|
|
1246
|
+
# Builds tinynn/smoke.rb against the CPU shim. Requires `setup-ggml` to have
|
|
1247
|
+
# been run once first.
|
|
1248
|
+
# --- gem release prep (toy#45) ----------------------------------------------
|
|
1249
|
+
# The gem ships PRISTINE pinned ggml (patches apply at the consumer's vendor
|
|
1250
|
+
# step), so reset the working tree's ggml before `gem build`. Re-run setup-ggml
|
|
1251
|
+
# afterwards to restore the dev build. Also materialize the generated CUDA
|
|
1252
|
+
# mirrors (gitignored; toy.gemspec ships lib/toy/llm/*_cuda.rb explicitly) —
|
|
1253
|
+
# without them the gem's compute_cuda.rb requires point at missing files and
|
|
1254
|
+
# Spinel silently compiles them to nothing (toy#70 finding).
|
|
1255
|
+
# NB: reset to GGML_REV explicitly — NEVER FETCH_HEAD ("whatever was
|
|
1256
|
+
# fetched last"): a cold-fetch test moved FETCH_HEAD to ggml master and
|
|
1257
|
+
# this target silently staged UNVERIFIED ggml sources into the gem
|
|
1258
|
+
# (caught at the v0.8.0 wire). The assert keeps it loud.
|
|
1259
|
+
gem-prep: $(GGML_DIR)/CMakeLists.txt gen-mirrors
|
|
1260
|
+
cd $(GGML_DIR) && (git rev-parse --verify -q $(GGML_REV)^{commit} >/dev/null || git fetch -q --depth 1 origin $(GGML_REV)) && git reset --hard $(GGML_REV) >/dev/null
|
|
1261
|
+
rm -f $(GGML_DIR)/.patched
|
|
1262
|
+
@test "$$(cd $(GGML_DIR) && git rev-parse HEAD)" = "$(GGML_REV)" || { echo "FATAL: vendor/ggml HEAD != GGML_REV ($(GGML_REV)) after gem-prep"; exit 1; }
|
|
1263
|
+
@echo "ggml pristine at GGML_REV $$(cd $(GGML_DIR) && git rev-parse --short HEAD); now: gem build toy.gemspec"
|
|
1264
|
+
.PHONY: gem-prep
|
|
1265
|
+
|
|
1266
|
+
smoke: tinynn/smoke
|
|
1267
|
+
./tinynn/smoke
|
|
1268
|
+
|
|
1269
|
+
tinynn/smoke: tinynn/smoke.rb tinynn/libtinynn_ggml.a
|
|
1270
|
+
$(SPINEL) tinynn/smoke.rb -o tinynn/smoke
|
|
1271
|
+
|
|
1272
|
+
# A/B parity tests: native vs FFI (CPU) for one op each.
|
|
1273
|
+
ab-smoke: tinynn/ab_smoke
|
|
1274
|
+
./tinynn/ab_smoke
|
|
1275
|
+
|
|
1276
|
+
ab-smoke-add: tinynn/ab_smoke_add
|
|
1277
|
+
./tinynn/ab_smoke_add
|
|
1278
|
+
|
|
1279
|
+
ab-smoke-gelu: tinynn/ab_smoke_gelu
|
|
1280
|
+
./tinynn/ab_smoke_gelu
|
|
1281
|
+
|
|
1282
|
+
# Llama-family ops (silu, mul, eventually rope) — added with the
|
|
1283
|
+
# Toy::SmolLM2 FFI mirror work.
|
|
1284
|
+
ab-smoke-silu: tinynn/ab_smoke_silu
|
|
1285
|
+
./tinynn/ab_smoke_silu
|
|
1286
|
+
|
|
1287
|
+
tinynn/ab_smoke_silu: tinynn/ab_smoke_silu.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1288
|
+
$(SPINEL) $< -o $@
|
|
1289
|
+
|
|
1290
|
+
ab-smoke-mul: tinynn/ab_smoke_mul
|
|
1291
|
+
./tinynn/ab_smoke_mul
|
|
1292
|
+
|
|
1293
|
+
tinynn/ab_smoke_mul: tinynn/ab_smoke_mul.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1294
|
+
$(SPINEL) $< -o $@
|
|
1295
|
+
|
|
1296
|
+
ab-smoke-rms-norm: tinynn/ab_smoke_rms_norm
|
|
1297
|
+
./tinynn/ab_smoke_rms_norm
|
|
1298
|
+
|
|
1299
|
+
ab-smoke-softmax: tinynn/ab_smoke_softmax
|
|
1300
|
+
./tinynn/ab_smoke_softmax
|
|
1301
|
+
|
|
1302
|
+
ab-smoke-transpose: tinynn/ab_smoke_transpose
|
|
1303
|
+
./tinynn/ab_smoke_transpose
|
|
1304
|
+
|
|
1305
|
+
ab-smoke-scale: tinynn/ab_smoke_scale
|
|
1306
|
+
./tinynn/ab_smoke_scale
|
|
1307
|
+
|
|
1308
|
+
# Chained-op pipeline: gelu(h·w1)·w2 in one ggml graph.
|
|
1309
|
+
ab-smoke-pipeline: tinynn/ab_smoke_pipeline
|
|
1310
|
+
./tinynn/ab_smoke_pipeline
|
|
1311
|
+
|
|
1312
|
+
# Run every CPU smoke. (CUDA variants would need `make setup-ggml-cuda` first.)
|
|
1313
|
+
# `ab-smoke-transpose` is omitted: ggml_cont(ggml_transpose(...)) trips
|
|
1314
|
+
# the scheduler's buffer allocation; we fold transposes into consuming
|
|
1315
|
+
# ops instead (see TinyNN.matmul's b-transposed upload).
|
|
1316
|
+
test: smoke ab-smoke ab-smoke-add ab-smoke-gelu ab-smoke-rms-norm \
|
|
1317
|
+
ab-smoke-softmax ab-smoke-scale ab-smoke-pipeline \
|
|
1318
|
+
ab-smoke-matmul-variants ab-smoke-back ab-smoke-embed ab-smoke-sgd \
|
|
1319
|
+
ab-smoke-gelu-back ab-smoke-cegrad ab-smoke-adam
|
|
1320
|
+
|
|
1321
|
+
tinynn/ab_smoke: tinynn/ab_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1322
|
+
$(SPINEL) tinynn/ab_smoke.rb -o tinynn/ab_smoke
|
|
1323
|
+
|
|
1324
|
+
tinynn/ab_smoke_add: tinynn/ab_smoke_add.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1325
|
+
$(SPINEL) tinynn/ab_smoke_add.rb -o tinynn/ab_smoke_add
|
|
1326
|
+
|
|
1327
|
+
# E1.1 / GH#13 — Conv2D smoke + JSON dump for PyTorch parity.
|
|
1328
|
+
tinynn/ab_smoke_conv2d: tinynn/ab_smoke_conv2d.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1329
|
+
$(SPINEL) tinynn/ab_smoke_conv2d.rb -o tinynn/ab_smoke_conv2d
|
|
1330
|
+
|
|
1331
|
+
# E1.2 / GH#13 — patch_embed composite smoke + parity dump.
|
|
1332
|
+
tinynn/ab_smoke_patch_embed: tinynn/ab_smoke_patch_embed.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb lib/toy/models/toy_vit.rb tinynn/libtinynn_ggml.a
|
|
1333
|
+
$(SPINEL) tinynn/ab_smoke_patch_embed.rb -o tinynn/ab_smoke_patch_embed
|
|
1334
|
+
|
|
1335
|
+
# E1.3 / GH#13 — ViT-Tiny forward + training smoke.
|
|
1336
|
+
prep/smokes/smoke_vit_tiny: prep/smokes/smoke_vit_tiny.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
1337
|
+
$(SPINEL) $< -o $@
|
|
1338
|
+
|
|
1339
|
+
# E1.5 / GH#13 — image-loader smoke.
|
|
1340
|
+
prep/smokes/smoke_image_loader: prep/smokes/smoke_image_loader.rb lib/toy/io/toy_image_loader.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1341
|
+
$(SPINEL) $< -o $@
|
|
1342
|
+
|
|
1343
|
+
# E1.6 / GH#13 — ViT-Tiny training driver.
|
|
1344
|
+
examples/example_train_vit_tiny: examples/legacy/07_train_vit_tiny.rb lib/toy/llm/engine/vit_tiny_engine.rb lib/toy/models/toy_vit.rb lib/toy/models/toy_smollm2.rb lib/toy/io/toy_image_loader.rb lib/toy/train/toy_lr_schedule.rb lib/toy/train/toy_drift_grad.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a $(SPINEL_DEPS)
|
|
1345
|
+
$(SPINEL) $< -o $@
|
|
1346
|
+
example_train_vit_tiny: examples/example_train_vit_tiny
|
|
1347
|
+
|
|
1348
|
+
tinynn/ab_smoke_gelu: tinynn/ab_smoke_gelu.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1349
|
+
$(SPINEL) tinynn/ab_smoke_gelu.rb -o tinynn/ab_smoke_gelu
|
|
1350
|
+
|
|
1351
|
+
tinynn/ab_smoke_rms_norm: tinynn/ab_smoke_rms_norm.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1352
|
+
$(SPINEL) tinynn/ab_smoke_rms_norm.rb -o tinynn/ab_smoke_rms_norm
|
|
1353
|
+
|
|
1354
|
+
tinynn/ab_smoke_softmax: tinynn/ab_smoke_softmax.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1355
|
+
$(SPINEL) tinynn/ab_smoke_softmax.rb -o tinynn/ab_smoke_softmax
|
|
1356
|
+
|
|
1357
|
+
tinynn/ab_smoke_flash_attn: tinynn/ab_smoke_flash_attn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1358
|
+
$(SPINEL) tinynn/ab_smoke_flash_attn.rb -o tinynn/ab_smoke_flash_attn
|
|
1359
|
+
|
|
1360
|
+
tinynn/ab_smoke_q8_kv: tinynn/ab_smoke_q8_kv.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1361
|
+
$(SPINEL) tinynn/ab_smoke_q8_kv.rb -o tinynn/ab_smoke_q8_kv
|
|
1362
|
+
|
|
1363
|
+
tinynn/ab_smoke_moe_ffn: tinynn/ab_smoke_moe_ffn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1364
|
+
$(SPINEL) tinynn/ab_smoke_moe_ffn.rb -o tinynn/ab_smoke_moe_ffn
|
|
1365
|
+
|
|
1366
|
+
tinynn/ab_smoke_transpose: tinynn/ab_smoke_transpose.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1367
|
+
$(SPINEL) tinynn/ab_smoke_transpose.rb -o tinynn/ab_smoke_transpose
|
|
1368
|
+
|
|
1369
|
+
tinynn/ab_smoke_scale: tinynn/ab_smoke_scale.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1370
|
+
$(SPINEL) tinynn/ab_smoke_scale.rb -o tinynn/ab_smoke_scale
|
|
1371
|
+
|
|
1372
|
+
tinynn/ab_smoke_pipeline: tinynn/ab_smoke_pipeline.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1373
|
+
$(SPINEL) tinynn/ab_smoke_pipeline.rb -o tinynn/ab_smoke_pipeline
|
|
1374
|
+
|
|
1375
|
+
# Chained FFNFFICache parity: pre, hidden, out vs hand-rolled native.
|
|
1376
|
+
ab-smoke-ffncache: tinynn/ab_smoke_ffncache
|
|
1377
|
+
./tinynn/ab_smoke_ffncache
|
|
1378
|
+
|
|
1379
|
+
tinynn/ab_smoke_ffncache: tinynn/ab_smoke_ffncache.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1380
|
+
$(SPINEL) tinynn/ab_smoke_ffncache.rb -o tinynn/ab_smoke_ffncache
|
|
1381
|
+
|
|
1382
|
+
# ggml-native AdamW step (opt_step_adamw) parity vs project's plain-Adam.
|
|
1383
|
+
ab-smoke-adamw-op: tinynn/ab_smoke_adamw_op
|
|
1384
|
+
./tinynn/ab_smoke_adamw_op
|
|
1385
|
+
|
|
1386
|
+
tinynn/ab_smoke_adamw_op: tinynn/ab_smoke_adamw_op.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1387
|
+
$(SPINEL) tinynn/ab_smoke_adamw_op.rb -o tinynn/ab_smoke_adamw_op
|
|
1388
|
+
|
|
1389
|
+
# Persistent-tensor architecture check: data uploaded to a ctx_w tensor
|
|
1390
|
+
# survives a compute cycle.
|
|
1391
|
+
ab-smoke-persistent: tinynn/ab_smoke_persistent
|
|
1392
|
+
./tinynn/ab_smoke_persistent
|
|
1393
|
+
|
|
1394
|
+
tinynn/ab_smoke_persistent: tinynn/ab_smoke_persistent.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1395
|
+
$(SPINEL) tinynn/ab_smoke_persistent.rb -o tinynn/ab_smoke_persistent
|
|
1396
|
+
|
|
1397
|
+
# Dual-cgraph + persistent-weights design check: forward reads t_w;
|
|
1398
|
+
# adam mutates t_w in place; forward sees the new value.
|
|
1399
|
+
ab-smoke-dual-graph: tinynn/ab_smoke_dual_graph
|
|
1400
|
+
./tinynn/ab_smoke_dual_graph
|
|
1401
|
+
|
|
1402
|
+
tinynn/ab_smoke_dual_graph: tinynn/ab_smoke_dual_graph.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1403
|
+
$(SPINEL) tinynn/ab_smoke_dual_graph.rb -o tinynn/ab_smoke_dual_graph
|
|
1404
|
+
|
|
1405
|
+
# M2 foundation: view_2d + cpy to write a single row into a persistent
|
|
1406
|
+
# (max_T, d_head) KV buffer at a runtime-baked position.
|
|
1407
|
+
ab-smoke-kv-write: tinynn/ab_smoke_kv_write
|
|
1408
|
+
./tinynn/ab_smoke_kv_write
|
|
1409
|
+
|
|
1410
|
+
tinynn/ab_smoke_kv_write: tinynn/ab_smoke_kv_write.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1411
|
+
$(SPINEL) tinynn/ab_smoke_kv_write.rb -o tinynn/ab_smoke_kv_write
|
|
1412
|
+
|
|
1413
|
+
# M2 prototype: single-step decode through a KV cache. Pre-fills K/V
|
|
1414
|
+
# for positions 0..POS-1, writes k_new/v_new at POS, computes scores
|
|
1415
|
+
# + soft_max_ext + head_out. Parity vs hand-rolled native.
|
|
1416
|
+
ab-smoke-kv-attn: tinynn/ab_smoke_kv_attn
|
|
1417
|
+
./tinynn/ab_smoke_kv_attn
|
|
1418
|
+
|
|
1419
|
+
tinynn/ab_smoke_kv_attn: tinynn/ab_smoke_kv_attn.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1420
|
+
$(SPINEL) tinynn/ab_smoke_kv_attn.rb -o tinynn/ab_smoke_kv_attn
|
|
1421
|
+
|
|
1422
|
+
# M1.2: full single-block forward through the persistent graph.
|
|
1423
|
+
# Parity vs native TransformerLM.forward() at n_layers=1, n_heads=2.
|
|
1424
|
+
ab-smoke-full-forward-block: tinynn/ab_smoke_full_forward_block
|
|
1425
|
+
./tinynn/ab_smoke_full_forward_block
|
|
1426
|
+
|
|
1427
|
+
tinynn/ab_smoke_full_forward_block: tinynn/ab_smoke_full_forward_block.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1428
|
+
$(SPINEL) tinynn/ab_smoke_full_forward_block.rb -o tinynn/ab_smoke_full_forward_block
|
|
1429
|
+
|
|
1430
|
+
# Wallclock bench: native TransformerLM.forward vs FullForwardFFICache.
|
|
1431
|
+
full-forward-bench: tinynn/full_forward_bench
|
|
1432
|
+
./tinynn/full_forward_bench
|
|
1433
|
+
|
|
1434
|
+
tinynn/full_forward_bench: tinynn/full_forward_bench.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1435
|
+
$(SPINEL) tinynn/full_forward_bench.rb -o tinynn/full_forward_bench
|
|
1436
|
+
|
|
1437
|
+
full-forward-bench-cuda: tinynn/full_forward_bench_cuda
|
|
1438
|
+
./tinynn/full_forward_bench_cuda
|
|
1439
|
+
|
|
1440
|
+
tinynn/full_forward_bench_cuda: tinynn/full_forward_bench_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
|
|
1441
|
+
$(SPINEL) tinynn/full_forward_bench_cuda.rb -o tinynn/full_forward_bench_cuda
|
|
1442
|
+
|
|
1443
|
+
ab-smoke-dual-graph-cuda: tinynn/ab_smoke_dual_graph_cuda
|
|
1444
|
+
./tinynn/ab_smoke_dual_graph_cuda
|
|
1445
|
+
|
|
1446
|
+
tinynn/ab_smoke_dual_graph_cuda: tinynn/ab_smoke_dual_graph_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
|
|
1447
|
+
$(SPINEL) tinynn/ab_smoke_dual_graph_cuda.rb -o tinynn/ab_smoke_dual_graph_cuda
|
|
1448
|
+
|
|
1449
|
+
ab-smoke-adamw-op-cuda: tinynn/ab_smoke_adamw_op_cuda
|
|
1450
|
+
./tinynn/ab_smoke_adamw_op_cuda
|
|
1451
|
+
|
|
1452
|
+
tinynn/ab_smoke_adamw_op_cuda: tinynn/ab_smoke_adamw_op_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
|
|
1453
|
+
$(SPINEL) tinynn/ab_smoke_adamw_op_cuda.rb -o tinynn/ab_smoke_adamw_op_cuda
|
|
1454
|
+
|
|
1455
|
+
# A/B harness for the "fuse-or-not" question: N_HEADS small matmuls vs
|
|
1456
|
+
# 1 batched matmul at LoRA-Q shape. Override D_MODEL / N_HEADS / R / T
|
|
1457
|
+
# via env to sweep launch-overhead vs compute-bound regimes. See
|
|
1458
|
+
# docs/heavy-train-attribution-2026-05-24.md.
|
|
1459
|
+
ab-smoke-lora-fused-cuda: tinynn/ab_smoke_lora_fused_cuda
|
|
1460
|
+
./tinynn/ab_smoke_lora_fused_cuda
|
|
1461
|
+
|
|
1462
|
+
tinynn/ab_smoke_lora_fused_cuda: tinynn/ab_smoke_lora_fused_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml_cuda.a
|
|
1463
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' tinynn/ab_smoke_lora_fused_cuda.rb -o tinynn/ab_smoke_lora_fused_cuda
|
|
1464
|
+
|
|
1465
|
+
# Transformer-shape sized parity + wallclock comparison.
|
|
1466
|
+
ab-smoke-big: tinynn/ab_smoke_big
|
|
1467
|
+
./tinynn/ab_smoke_big
|
|
1468
|
+
|
|
1469
|
+
tinynn/ab_smoke_big: tinynn/ab_smoke_big.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1470
|
+
$(SPINEL) tinynn/ab_smoke_big.rb -o tinynn/ab_smoke_big
|
|
1471
|
+
|
|
1472
|
+
ab-smoke-matmul-variants: tinynn/ab_smoke_matmul_variants
|
|
1473
|
+
./tinynn/ab_smoke_matmul_variants
|
|
1474
|
+
|
|
1475
|
+
tinynn/ab_smoke_matmul_variants: tinynn/ab_smoke_matmul_variants.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1476
|
+
$(SPINEL) tinynn/ab_smoke_matmul_variants.rb -o tinynn/ab_smoke_matmul_variants
|
|
1477
|
+
|
|
1478
|
+
ab-smoke-back: tinynn/ab_smoke_back
|
|
1479
|
+
./tinynn/ab_smoke_back
|
|
1480
|
+
|
|
1481
|
+
tinynn/ab_smoke_back: tinynn/ab_smoke_back.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1482
|
+
$(SPINEL) tinynn/ab_smoke_back.rb -o tinynn/ab_smoke_back
|
|
1483
|
+
|
|
1484
|
+
ab-smoke-gelu-back: tinynn/ab_smoke_gelu_back
|
|
1485
|
+
./tinynn/ab_smoke_gelu_back
|
|
1486
|
+
|
|
1487
|
+
tinynn/ab_smoke_gelu_back: tinynn/ab_smoke_gelu_back.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1488
|
+
$(SPINEL) tinynn/ab_smoke_gelu_back.rb -o tinynn/ab_smoke_gelu_back
|
|
1489
|
+
|
|
1490
|
+
ab-smoke-cegrad: tinynn/ab_smoke_cegrad
|
|
1491
|
+
./tinynn/ab_smoke_cegrad
|
|
1492
|
+
|
|
1493
|
+
tinynn/ab_smoke_cegrad: tinynn/ab_smoke_cegrad.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1494
|
+
$(SPINEL) tinynn/ab_smoke_cegrad.rb -o tinynn/ab_smoke_cegrad
|
|
1495
|
+
|
|
1496
|
+
ab-smoke-adam: tinynn/ab_smoke_adam
|
|
1497
|
+
./tinynn/ab_smoke_adam
|
|
1498
|
+
|
|
1499
|
+
tinynn/ab_smoke_adam: tinynn/ab_smoke_adam.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1500
|
+
$(SPINEL) tinynn/ab_smoke_adam.rb -o tinynn/ab_smoke_adam
|
|
1501
|
+
|
|
1502
|
+
gguf-smoke: tinynn/gguf_smoke
|
|
1503
|
+
./tinynn/gguf_smoke
|
|
1504
|
+
|
|
1505
|
+
tinynn/gguf_smoke: tinynn/gguf_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1506
|
+
$(SPINEL) tinynn/gguf_smoke.rb -o tinynn/gguf_smoke
|
|
1507
|
+
|
|
1508
|
+
# Walks every tensor in data/distilgpt2-f32.gguf via tnn_gguf_*. Used to
|
|
1509
|
+
# confirm large HF-converted GGUFs roundtrip through the project FFI.
|
|
1510
|
+
gguf-inspect: tinynn/gguf_inspect
|
|
1511
|
+
./tinynn/gguf_inspect
|
|
1512
|
+
|
|
1513
|
+
tinynn/gguf_inspect: tinynn/gguf_inspect.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1514
|
+
$(SPINEL) tinynn/gguf_inspect.rb -o tinynn/gguf_inspect
|
|
1515
|
+
|
|
1516
|
+
# GPT2LM build smoke: confirm lib/toy/models/gpt2.rb Spinel-compiles and the
|
|
1517
|
+
# forward shapes line up. Toy dims, random weights — values mean nothing.
|
|
1518
|
+
gpt2-build-smoke: tinynn/gpt2_build_smoke
|
|
1519
|
+
./tinynn/gpt2_build_smoke
|
|
1520
|
+
|
|
1521
|
+
tinynn/gpt2_build_smoke: tinynn/gpt2_build_smoke.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb
|
|
1522
|
+
$(SPINEL) tinynn/gpt2_build_smoke.rb -o tinynn/gpt2_build_smoke
|
|
1523
|
+
|
|
1524
|
+
# Load distilgpt2-f32.gguf into a GPT2LM and print sentinel weights
|
|
1525
|
+
# per category. Verifies name mapping + per-head split before forward.
|
|
1526
|
+
gpt2-load-smoke: tinynn/gpt2_load_smoke
|
|
1527
|
+
./tinynn/gpt2_load_smoke
|
|
1528
|
+
|
|
1529
|
+
tinynn/gpt2_load_smoke: tinynn/gpt2_load_smoke.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1530
|
+
$(SPINEL) tinynn/gpt2_load_smoke.rb -o tinynn/gpt2_load_smoke
|
|
1531
|
+
|
|
1532
|
+
# data/prompt_ids.txt, loads weights from data/distilgpt2-f32.gguf,
|
|
1533
|
+
# greedy-generates N_NEW tokens via native Mat forward, writes the
|
|
1534
|
+
# full ID sequence back. Decode with prep/tokens.py decode.
|
|
1535
|
+
|
|
1536
|
+
# Native Mat GPT-2 inference (DistilGPT2 / GPT-2 family).
|
|
1537
|
+
#
|
|
1538
|
+
gpt2: demos/gpt2
|
|
1539
|
+
demos/gpt2: demos/gpt2.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_gpt2.rb lib/toy/io/loaders/toy_gpt2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1540
|
+
$(SPINEL) $< -o $@
|
|
1541
|
+
|
|
1542
|
+
# SmolLM2-135M (llama-family) inference via Toy::SmolLM2.
|
|
1543
|
+
# Tokenization is host-side: ./prep/smollm2_tokens.py encode "..."
|
|
1544
|
+
smollm2: demos/smollm2
|
|
1545
|
+
demos/smollm2: demos/smollm2.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1546
|
+
$(SPINEL) $< -o $@
|
|
1547
|
+
|
|
1548
|
+
# SmolLM2-135M FFI KV-cache (CPU).
|
|
1549
|
+
smollm2_kv: demos/smollm2_kv
|
|
1550
|
+
demos/smollm2_kv: demos/smollm2_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1551
|
+
$(SPINEL) $< -o $@
|
|
1552
|
+
|
|
1553
|
+
# Qwen2.5 Mat-mediated KV-cache (CPU). The slow, correct reference path.
|
|
1554
|
+
# Run with `GGUF=data/qwen25-1.5b-f32.gguf ./demos/qwen25_kv` etc.
|
|
1555
|
+
qwen25_kv: demos/qwen25_kv
|
|
1556
|
+
demos/qwen25_kv: demos/qwen25_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1557
|
+
$(SPINEL) $< -o $@
|
|
1558
|
+
|
|
1559
|
+
# Qwen2.5 Phase-2 mmap inference (CPU). Canonical performance path.
|
|
1560
|
+
qwen25_native_mmap: demos/qwen25_native_mmap
|
|
1561
|
+
demos/qwen25_native_mmap: demos/qwen25_native_mmap.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1562
|
+
$(SPINEL) $< -o $@
|
|
1563
|
+
|
|
1564
|
+
# Phase 0.7 acceptance gates: 0.5B (f32 + Q8) + 1.5B + 3B greedy-decode
|
|
1565
|
+
# parity against locked-in golden token-ID sequences. Run before tagging
|
|
1566
|
+
# a release; see docs/design/phase-07-acceptance.md.
|
|
1567
|
+
qwen25_acceptance: demos/qwen25_acceptance
|
|
1568
|
+
demos/qwen25_acceptance: demos/qwen25_acceptance.rb lib/toy/models/arch.rb lib/toy/models/transformer_lm.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1569
|
+
$(SPINEL) $< -o $@
|
|
1570
|
+
|
|
1571
|
+
# Inference bench (CPU). Long warmup + long prefill + per-token stats.
|
|
1572
|
+
# Pick model via GGUF env; see docs/design/bench-cuda-2026-05-21.md.
|
|
1573
|
+
qwen25_bench_cpu: demos/qwen25_bench_cpu
|
|
1574
|
+
demos/qwen25_bench_cpu: demos/qwen25_bench_cpu.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1575
|
+
$(SPINEL) $< -o $@
|
|
1576
|
+
|
|
1577
|
+
# Inference bench (CUDA). Same shape as the CPU bench for side-by-side.
|
|
1578
|
+
qwen25_bench_cuda: demos/qwen25_bench_cuda
|
|
1579
|
+
demos/qwen25_bench_cuda: demos/qwen25_bench_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
|
|
1580
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1581
|
+
|
|
1582
|
+
# F1.2 step 2: LoRA-Q forward-parity gate. Loads SmolLM2-135M twice
|
|
1583
|
+
# (baseline + LoRA r=16 B=0), asserts bit-identical generated IDs.
|
|
1584
|
+
smollm2_lora_forward: demos/smollm2_lora_forward
|
|
1585
|
+
demos/smollm2_lora_forward: demos/smollm2_lora_forward.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1586
|
+
$(SPINEL) $< -o $@
|
|
1587
|
+
|
|
1588
|
+
# F1.2 step 3: backward through the full SmolLM2 decode graph,
|
|
1589
|
+
# layer-0 LoRA-Q updated via SGD. Requires the vendored CONCAT
|
|
1590
|
+
# backward in vendor/ggml/src/ggml.c.
|
|
1591
|
+
smollm2_lora_train_step: demos/smollm2_lora_train_step
|
|
1592
|
+
demos/smollm2_lora_train_step: demos/smollm2_lora_train_step.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1593
|
+
$(SPINEL) $< -o $@
|
|
1594
|
+
|
|
1595
|
+
# F1.2 step 4: all-layers LoRA-Q SGD on real CE loss against a rare
|
|
1596
|
+
# target token. 540 opt_step nodes (30 layers × 9 heads × 2 params).
|
|
1597
|
+
# Acceptance: monotonic decrease over 20 steps.
|
|
1598
|
+
smollm2_lora_train_ce: demos/smollm2_lora_train_ce
|
|
1599
|
+
demos/smollm2_lora_train_ce: demos/smollm2_lora_train_ce.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1600
|
+
$(SPINEL) $< -o $@
|
|
1601
|
+
|
|
1602
|
+
# F2 step 1: CUDA mirror of the LoRA forward parity gate.
|
|
1603
|
+
smollm2_lora_forward_cuda: demos/smollm2_lora_forward_cuda
|
|
1604
|
+
demos/smollm2_lora_forward_cuda: demos/smollm2_lora_forward_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1605
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1606
|
+
|
|
1607
|
+
# F2 step 2: CUDA mirror of the multi-layer SGD CE training smoke.
|
|
1608
|
+
smollm2_lora_train_ce_cuda: demos/smollm2_lora_train_ce_cuda
|
|
1609
|
+
demos/smollm2_lora_train_ce_cuda: demos/smollm2_lora_train_ce_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1610
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1611
|
+
|
|
1612
|
+
# Task #70 diagnostic — same CE smoke but with every graph_b node
|
|
1613
|
+
# pinned. Confirms sched intermediate-grad aliasing is the CPU
|
|
1614
|
+
# divergence's root cause. See docs/design/task70-root-cause-2026-05-21.md.
|
|
1615
|
+
smollm2_lora_train_ce_pinned: demos/smollm2_lora_train_ce_pinned
|
|
1616
|
+
demos/smollm2_lora_train_ce_pinned: demos/smollm2_lora_train_ce_pinned.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1617
|
+
$(SPINEL) $< -o $@
|
|
1618
|
+
|
|
1619
|
+
# F1.2 step 5: AdamW training with per-step m/v preservation via
|
|
1620
|
+
# tnn_graph_reset_grads_only. Converges 7.5 → 0.09 in 20 SGD steps
|
|
1621
|
+
# at LR=1e-3 — proper SFT-shaped learning curve.
|
|
1622
|
+
smollm2_lora_train_adamw_cuda: demos/smollm2_lora_train_adamw_cuda
|
|
1623
|
+
demos/smollm2_lora_train_adamw_cuda: demos/smollm2_lora_train_adamw_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1624
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1625
|
+
|
|
1626
|
+
# F1.2 step 6a: multi-target AdamW SFT-shaped training. Cycles through
|
|
1627
|
+
# 5 target tokens × 10 epochs at the same prefix; expects loss to
|
|
1628
|
+
# drop on average + per-target. 10.8 → 3.6 in 10 epochs. Foundation
|
|
1629
|
+
# for step 6b (multi-position) and step 7 (real alpaca dataset).
|
|
1630
|
+
smollm2_lora_sft_multi_cuda: demos/smollm2_lora_sft_multi_cuda
|
|
1631
|
+
demos/smollm2_lora_sft_multi_cuda: demos/smollm2_lora_sft_multi_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1632
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1633
|
+
|
|
1634
|
+
# F1.2 step 6b — multi-position SFT (cycle pos4 / pos5). Validates
|
|
1635
|
+
# that persistent Adam m/v (allocated by enable_lora_q_adamw! +
|
|
1636
|
+
# realize_for_mmap) survive tnn_reset_for_rebuild between cycles.
|
|
1637
|
+
smollm2_lora_sft_multipos_cuda: demos/smollm2_lora_sft_multipos_cuda
|
|
1638
|
+
demos/smollm2_lora_sft_multipos_cuda: demos/smollm2_lora_sft_multipos_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1639
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1640
|
+
|
|
1641
|
+
# M3 step 1 — sequence-mode forward parity at T=1.
|
|
1642
|
+
# LlamaSeqForwardFFICache.forward([id], [0]) must match
|
|
1643
|
+
# SmolLM2KVFFICache + decode_step(id, 0). See
|
|
1644
|
+
# docs/design/m3-seq-forward-2026-05-21.md.
|
|
1645
|
+
smollm2_seq_parity: demos/smollm2_seq_parity
|
|
1646
|
+
demos/smollm2_seq_parity: demos/smollm2_seq_parity.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1647
|
+
$(SPINEL) $< -o $@
|
|
1648
|
+
|
|
1649
|
+
# M3 step 2 — T=4 trajectory parity (CPU). Per-position seq logits must
|
|
1650
|
+
# match the decode_step trajectory; proves causal-mask + multi-pos RoPE.
|
|
1651
|
+
smollm2_seq_parity_t4: demos/smollm2_seq_parity_t4
|
|
1652
|
+
demos/smollm2_seq_parity_t4: demos/smollm2_seq_parity_t4.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1653
|
+
$(SPINEL) $< -o $@
|
|
1654
|
+
|
|
1655
|
+
# M3 step 2 — CUDA mirror. T=1 and T=4 vs CPU decode_step trajectory.
|
|
1656
|
+
smollm2_seq_parity_cuda: demos/smollm2_seq_parity_cuda
|
|
1657
|
+
demos/smollm2_seq_parity_cuda: demos/smollm2_seq_parity_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1658
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1659
|
+
|
|
1660
|
+
smollm2_seq_parity_t4_cuda: demos/smollm2_seq_parity_t4_cuda
|
|
1661
|
+
demos/smollm2_seq_parity_t4_cuda: demos/smollm2_seq_parity_t4_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1662
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1663
|
+
|
|
1664
|
+
# M3 step 3 — seq-mode LoRA training smoke (CPU). One forward + backward
|
|
1665
|
+
# + opt_step over T positions; loss should decrease over N steps.
|
|
1666
|
+
smollm2_seq_train: demos/smollm2_seq_train
|
|
1667
|
+
demos/smollm2_seq_train: demos/smollm2_seq_train.rb lib/toy/llm/engine/llama_seq_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1668
|
+
$(SPINEL) $< -o $@
|
|
1669
|
+
|
|
1670
|
+
smollm2_seq_train_cuda: demos/smollm2_seq_train_cuda
|
|
1671
|
+
demos/smollm2_seq_train_cuda: demos/smollm2_seq_train_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1672
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1673
|
+
|
|
1674
|
+
# F3 — full fine-tune on CUDA. Every per-block weight tensor is
|
|
1675
|
+
# writable F32 + AdamW state; opt_step on each. See
|
|
1676
|
+
# docs/roadmap/f3-full-finetune-2026-05-21.md.
|
|
1677
|
+
smollm2_seq_full_finetune_cuda: demos/smollm2_seq_full_finetune_cuda
|
|
1678
|
+
demos/smollm2_seq_full_finetune_cuda: demos/smollm2_seq_full_finetune_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1679
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1680
|
+
|
|
1681
|
+
# F4 (QLoRA) on CUDA via realize_for_q8_copy. Q8 base in standard
|
|
1682
|
+
# CUDA buffer + F32 LoRA adapter; bypasses the BYO-pointer padding bug.
|
|
1683
|
+
smollm2_seq_qlora_cuda: demos/smollm2_seq_qlora_cuda
|
|
1684
|
+
demos/smollm2_seq_qlora_cuda: demos/smollm2_seq_qlora_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1685
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1686
|
+
|
|
1687
|
+
# Training step-time bench. MODE=lora|ft; STEPS=N; GGUF=path.
|
|
1688
|
+
# toy#77: the seq-engine mirror requires the primitives/blocks/archs
|
|
1689
|
+
# mirrors; without them in the dep list a FRESH checkout generates only
|
|
1690
|
+
# llama_seq_engine_cuda.rb, its require_relatives dangle (Spinel ignores
|
|
1691
|
+
# them with a warning), every engine type degrades to int and .new
|
|
1692
|
+
# returns nil — the demo then segfaults in the first attr setter.
|
|
1693
|
+
seq_train_bench_cuda: demos/seq_train_bench_cuda
|
|
1694
|
+
demos/seq_train_bench_cuda: demos/seq_train_bench_cuda.rb lib/toy/llm/engine/llama_seq_engine_cuda.rb lib/toy/llm/primitives/rms_norm_cuda.rb lib/toy/llm/primitives/rope_cuda.rb lib/toy/llm/primitives/swiglu_cuda.rb lib/toy/llm/primitives/gqa_cuda.rb lib/toy/llm/blocks/transformer_block_cuda.rb lib/toy/llm/archs/llama_arch_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a $(SPINEL_DEPS)
|
|
1695
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1696
|
+
|
|
1697
|
+
# Per-phase training-step bench (CPU + CUDA). Times graph_reset /
|
|
1698
|
+
# uploads / compute_backward / download separately. Doc:
|
|
1699
|
+
# docs/design/bench-train-2026-05-21.md.
|
|
1700
|
+
smollm2_lora_train_bench: demos/smollm2_lora_train_bench
|
|
1701
|
+
demos/smollm2_lora_train_bench: demos/smollm2_lora_train_bench.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1702
|
+
$(SPINEL) $< -o $@
|
|
1703
|
+
|
|
1704
|
+
smollm2_lora_train_bench_cuda: demos/smollm2_lora_train_bench_cuda
|
|
1705
|
+
demos/smollm2_lora_train_bench_cuda: demos/smollm2_lora_train_bench_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1706
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1707
|
+
|
|
1708
|
+
# Task #70 grad-magnitude probes (per-layer maxabs(grad_A), maxabs(grad_B)).
|
|
1709
|
+
smollm2_lora_grad_probe: demos/smollm2_lora_grad_probe
|
|
1710
|
+
demos/smollm2_lora_grad_probe: demos/smollm2_lora_grad_probe.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1711
|
+
$(SPINEL) $< -o $@
|
|
1712
|
+
|
|
1713
|
+
smollm2_lora_grad_probe_cuda: demos/smollm2_lora_grad_probe_cuda
|
|
1714
|
+
demos/smollm2_lora_grad_probe_cuda: demos/smollm2_lora_grad_probe_cuda.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1715
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1716
|
+
|
|
1717
|
+
# Qwen2.5 Phase-2 mmap inference (CUDA). Requires `make setup-ggml-cuda`.
|
|
1718
|
+
qwen25_native_mmap_cuda: demos/qwen25_native_mmap_cuda
|
|
1719
|
+
demos/qwen25_native_mmap_cuda: demos/qwen25_native_mmap_cuda.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1720
|
+
$(SPINEL) --cc='cc -Wl,-u,tnn_cuda_force_link' $< -o $@
|
|
1721
|
+
|
|
1722
|
+
# SmolLM2-135M FFI KV-cache (CUDA).
|
|
1723
|
+
smollm2_kv_cuda: demos/smollm2_kv_cuda
|
|
1724
|
+
demos/smollm2_kv_cuda: demos/smollm2_kv_cuda.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1725
|
+
$(SPINEL) $< -o $@
|
|
1726
|
+
|
|
1727
|
+
# TinyLlama-1.1B demo. Uses the same Toy::SmolLM2 / FFI KV CUDA stack
|
|
1728
|
+
# (llama-family architecture); just configured for the larger shape.
|
|
1729
|
+
tinyllama_kv_cuda: demos/tinyllama_kv_cuda
|
|
1730
|
+
demos/tinyllama_kv_cuda: demos/tinyllama_kv_cuda.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine_cuda.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1731
|
+
$(SPINEL) $< -o $@
|
|
1732
|
+
|
|
1733
|
+
tinyllama: demos/tinyllama
|
|
1734
|
+
demos/tinyllama: demos/tinyllama.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1735
|
+
$(SPINEL) $< -o $@
|
|
1736
|
+
|
|
1737
|
+
tinyllama_kv: demos/tinyllama_kv
|
|
1738
|
+
demos/tinyllama_kv: demos/tinyllama_kv.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/llm/engine/llama_kv_engine.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1739
|
+
$(SPINEL) $< -o $@
|
|
1740
|
+
|
|
1741
|
+
# Print the Phuong–Hutter algorithm cards for both models. No
|
|
1742
|
+
# inference — just emit the structured pseudocode. Source-of-truth
|
|
1743
|
+
# for the round-trip work (task #33).
|
|
1744
|
+
algorithm_cards: demos/algorithm_cards
|
|
1745
|
+
demos/algorithm_cards: demos/algorithm_cards.rb lib/toy/dev/toy_card.rb lib/toy.rb lib/toy/models/toy_gpt2.rb lib/toy/models/toy_smollm2.rb lib/toy/io/loaders/toy_gpt2_loader.rb lib/toy/io/loaders/toy_smollm2_loader.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1746
|
+
$(SPINEL) $< -o $@
|
|
1747
|
+
|
|
1748
|
+
# TinyStories from-scratch training via Toy::Trainer.
|
|
1749
|
+
#
|
|
1750
|
+
train: demos/train
|
|
1751
|
+
demos/train: demos/train.rb lib/toy/train/toy_trainer.rb lib/toy/models/transformer.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1752
|
+
$(SPINEL) $< -o $@
|
|
1753
|
+
|
|
1754
|
+
# Parity probe: one forward at distilgpt2 shape, dump last-row logits
|
|
1755
|
+
# to data/ours_logits.txt. Pair with prep/parity.py for the HF reference.
|
|
1756
|
+
gpt2-parity: tinynn/gpt2_parity
|
|
1757
|
+
./tinynn/gpt2_parity
|
|
1758
|
+
|
|
1759
|
+
tinynn/gpt2_parity: tinynn/gpt2_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1760
|
+
$(SPINEL) tinynn/gpt2_parity.rb -o tinynn/gpt2_parity
|
|
1761
|
+
|
|
1762
|
+
# FFI parity probe: persistent ggml graph with LayerNorm + biases.
|
|
1763
|
+
# Dumps last-row logits to data/ours_ffi_logits.txt.
|
|
1764
|
+
gpt2-ffi-parity: tinynn/gpt2_ffi_parity
|
|
1765
|
+
./tinynn/gpt2_ffi_parity
|
|
1766
|
+
|
|
1767
|
+
tinynn/gpt2_ffi_parity: tinynn/gpt2_ffi_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_fwd_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1768
|
+
$(SPINEL) tinynn/gpt2_ffi_parity.rb -o tinynn/gpt2_ffi_parity
|
|
1769
|
+
|
|
1770
|
+
# Apples-to-apples bench: native Mat vs FFI on the same forward.
|
|
1771
|
+
# Re-encode data/prompt_ids.txt first so prompt length matches T_SEQ=5.
|
|
1772
|
+
gpt2-bench: tinynn/gpt2_bench
|
|
1773
|
+
./tinynn/gpt2_bench
|
|
1774
|
+
|
|
1775
|
+
tinynn/gpt2_bench: tinynn/gpt2_bench.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_fwd_engine.rb lib/toy/llm/engine/gpt2_kv_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1776
|
+
$(SPINEL) tinynn/gpt2_bench.rb -o tinynn/gpt2_bench
|
|
1777
|
+
|
|
1778
|
+
# Ruby BPE smoke: load vocab/merges, encode + roundtrip-decode some
|
|
1779
|
+
# fixed prompts. Compare against prep/tokens.py output.
|
|
1780
|
+
bpe-smoke: tinynn/bpe_smoke
|
|
1781
|
+
./tinynn/bpe_smoke
|
|
1782
|
+
|
|
1783
|
+
tinynn/bpe_smoke: tinynn/bpe_smoke.rb lib/toy/models/transformer.rb lib/toy/io/bpe.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1784
|
+
$(SPINEL) tinynn/bpe_smoke.rb -o tinynn/bpe_smoke
|
|
1785
|
+
|
|
1786
|
+
# KV-cache parity probe: prefill the prompt one token at a time through
|
|
1787
|
+
# GPT2KVFFICache, dump last-position logits.
|
|
1788
|
+
gpt2-kv-parity: tinynn/gpt2_kv_parity
|
|
1789
|
+
./tinynn/gpt2_kv_parity
|
|
1790
|
+
|
|
1791
|
+
tinynn/gpt2_kv_parity: tinynn/gpt2_kv_parity.rb lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/llm/engine/gpt2_kv_engine.rb lib/toy/io/gguf_load.rb lib/toy/train/training.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1792
|
+
$(SPINEL) tinynn/gpt2_kv_parity.rb -o tinynn/gpt2_kv_parity
|
|
1793
|
+
|
|
1794
|
+
# --- CUDA mirrors of the GPT-2 demos / parity / bench --------------
|
|
1795
|
+
# All require `make setup-ggml-cuda` to have produced
|
|
1796
|
+
# vendor/ggml/build-cuda first. Built on the gx10 (NVIDIA GB10);
|
|
1797
|
+
# the Mac build doesn't have CUDA.
|
|
1798
|
+
|
|
1799
|
+
CUDA_GPT2_DEPS = lib/toy/models/transformer.rb lib/toy/models/gpt2.rb lib/toy/io/gguf_load.rb \
|
|
1800
|
+
lib/toy/train/training.rb lib/toy/ffi/tinynn.rb lib/toy/ffi/tinynn_cuda.rb \
|
|
1801
|
+
tinynn/libtinynn_ggml.a tinynn/libtinynn_ggml_cuda.a
|
|
1802
|
+
|
|
1803
|
+
gpt2-ffi-parity-cuda: tinynn/gpt2_ffi_parity_cuda
|
|
1804
|
+
./tinynn/gpt2_ffi_parity_cuda
|
|
1805
|
+
|
|
1806
|
+
tinynn/gpt2_ffi_parity_cuda: tinynn/gpt2_ffi_parity_cuda.rb lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb $(CUDA_GPT2_DEPS)
|
|
1807
|
+
$(SPINEL) tinynn/gpt2_ffi_parity_cuda.rb -o tinynn/gpt2_ffi_parity_cuda
|
|
1808
|
+
|
|
1809
|
+
gpt2-kv-parity-cuda: tinynn/gpt2_kv_parity_cuda
|
|
1810
|
+
./tinynn/gpt2_kv_parity_cuda
|
|
1811
|
+
|
|
1812
|
+
tinynn/gpt2_kv_parity_cuda: tinynn/gpt2_kv_parity_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb $(CUDA_GPT2_DEPS)
|
|
1813
|
+
$(SPINEL) tinynn/gpt2_kv_parity_cuda.rb -o tinynn/gpt2_kv_parity_cuda
|
|
1814
|
+
|
|
1815
|
+
gpt2-bench-cuda: tinynn/gpt2_bench_cuda
|
|
1816
|
+
./tinynn/gpt2_bench_cuda
|
|
1817
|
+
|
|
1818
|
+
tinynn/gpt2_bench_cuda: tinynn/gpt2_bench_cuda.rb lib/toy/llm/engine/gpt2_fwd_engine_cuda.rb lib/toy/llm/engine/gpt2_kv_engine_cuda.rb $(CUDA_GPT2_DEPS)
|
|
1819
|
+
$(SPINEL) tinynn/gpt2_bench_cuda.rb -o tinynn/gpt2_bench_cuda
|
|
1820
|
+
|
|
1821
|
+
ab-smoke-embed: tinynn/ab_smoke_embed
|
|
1822
|
+
./tinynn/ab_smoke_embed
|
|
1823
|
+
|
|
1824
|
+
tinynn/ab_smoke_embed: tinynn/ab_smoke_embed.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1825
|
+
$(SPINEL) tinynn/ab_smoke_embed.rb -o tinynn/ab_smoke_embed
|
|
1826
|
+
|
|
1827
|
+
ab-smoke-sgd: tinynn/ab_smoke_sgd
|
|
1828
|
+
./tinynn/ab_smoke_sgd
|
|
1829
|
+
|
|
1830
|
+
tinynn/ab_smoke_sgd: tinynn/ab_smoke_sgd.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1831
|
+
$(SPINEL) tinynn/ab_smoke_sgd.rb -o tinynn/ab_smoke_sgd
|
|
1832
|
+
|
|
1833
|
+
# F1.2 step 1: multi-step LoRA convergence via the F1.1 in-graph
|
|
1834
|
+
# optimizer. Toy shape; SGD; 60 steps; asserts final loss < 10% of
|
|
1835
|
+
# initial (passes at ~10e-13 of initial).
|
|
1836
|
+
ab-smoke-lora-train: tinynn/ab_smoke_lora_train
|
|
1837
|
+
./tinynn/ab_smoke_lora_train
|
|
1838
|
+
|
|
1839
|
+
tinynn/ab_smoke_lora_train: tinynn/ab_smoke_lora_train.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1840
|
+
$(SPINEL) tinynn/ab_smoke_lora_train.rb -o tinynn/ab_smoke_lora_train
|
|
1841
|
+
|
|
1842
|
+
# Forward-only smoke: does TransformerLM#forward run at current Spinel
|
|
1843
|
+
# master? (The #473 SIGBUS is in backward; forward might be OK.)
|
|
1844
|
+
forward-smoke: tinynn/forward_smoke
|
|
1845
|
+
./tinynn/forward_smoke
|
|
1846
|
+
|
|
1847
|
+
tinynn/forward_smoke: tinynn/forward_smoke.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1848
|
+
$(SPINEL) tinynn/forward_smoke.rb -o tinynn/forward_smoke
|
|
1849
|
+
|
|
1850
|
+
persistent-bench: tinynn/persistent_bench
|
|
1851
|
+
./tinynn/persistent_bench
|
|
1852
|
+
|
|
1853
|
+
tinynn/persistent_bench: tinynn/persistent_bench.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1854
|
+
$(SPINEL) tinynn/persistent_bench.rb -o tinynn/persistent_bench
|
|
1855
|
+
|
|
1856
|
+
persistent-bench-cuda: tinynn/persistent_bench_cuda
|
|
1857
|
+
./tinynn/persistent_bench_cuda
|
|
1858
|
+
|
|
1859
|
+
tinynn/persistent_bench_cuda: tinynn/persistent_bench_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
|
|
1860
|
+
$(SPINEL) tinynn/persistent_bench_cuda.rb -o tinynn/persistent_bench_cuda
|
|
1861
|
+
|
|
1862
|
+
persistent-bench-big: tinynn/persistent_bench_big
|
|
1863
|
+
./tinynn/persistent_bench_big
|
|
1864
|
+
|
|
1865
|
+
tinynn/persistent_bench_big: tinynn/persistent_bench_big.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn.rb tinynn/libtinynn_ggml.a
|
|
1866
|
+
$(SPINEL) tinynn/persistent_bench_big.rb -o tinynn/persistent_bench_big
|
|
1867
|
+
|
|
1868
|
+
# A/B parity test against CUDA backend on the local GPU (sm_121 / GB10).
|
|
1869
|
+
# Requires `make setup-ggml-cuda` to have produced vendor/ggml/build-cuda.
|
|
1870
|
+
ab-smoke-cuda: tinynn/ab_smoke_cuda
|
|
1871
|
+
./tinynn/ab_smoke_cuda
|
|
1872
|
+
|
|
1873
|
+
tinynn/tinynn_backend_cuda.o: tinynn/tinynn_backend_cuda.c
|
|
1874
|
+
$(CC) $(CFLAGS) $(GGML_INC) -I$(CUDA_DIR)/include -c $< -o $@
|
|
1875
|
+
|
|
1876
|
+
# Only the CUDA backend init goes into the CUDA archive. Common
|
|
1877
|
+
# wrappers stay in tinynn_ggml.o (CPU archive), referenced from CUDA
|
|
1878
|
+
# programs via a weak link. Avoids the multi-archive multi-definition
|
|
1879
|
+
# linker conflict that older two-fat-archive layout had.
|
|
1880
|
+
tinynn/libtinynn_ggml_cuda.a: tinynn/tinynn_backend_cuda.o
|
|
1881
|
+
ar $(ARFLAGS) $@ $<
|
|
1882
|
+
|
|
1883
|
+
# Metal backend mirror — same archive-isolation pattern as CUDA. The
|
|
1884
|
+
# source is .m (Objective-C) since the Metal frameworks are ObjC; we
|
|
1885
|
+
# compile with -fobjc-arc off (the file holds no ObjC objects of its
|
|
1886
|
+
# own, just a C function calling into ggml-metal). Header search adds
|
|
1887
|
+
# the Metal build dir so ggml-metal.h is reachable.
|
|
1888
|
+
tinynn/tinynn_backend_metal.o: tinynn/tinynn_backend_metal.m
|
|
1889
|
+
ifneq ($(UNAME_S),Darwin)
|
|
1890
|
+
@echo "tinynn_backend_metal.o: macOS-only (Objective-C + Metal frameworks); uname -s = $(UNAME_S)"; exit 1
|
|
1891
|
+
endif
|
|
1892
|
+
$(CC) $(CFLAGS) -x objective-c $(GGML_INC) -c $< -o $@
|
|
1893
|
+
|
|
1894
|
+
tinynn/libtinynn_ggml_metal.a: tinynn/tinynn_backend_metal.o
|
|
1895
|
+
ar $(ARFLAGS) $@ $<
|
|
1896
|
+
|
|
1897
|
+
tinynn/ab_smoke_cuda: tinynn/ab_smoke_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
|
|
1898
|
+
$(SPINEL) tinynn/ab_smoke_cuda.rb -o tinynn/ab_smoke_cuda
|
|
1899
|
+
|
|
1900
|
+
# Consolidated CUDA parity test: matmul + add + gelu + rms_norm + softmax + scale + ffn_pipeline.
|
|
1901
|
+
ab-smoke-all-cuda: tinynn/ab_smoke_all_cuda
|
|
1902
|
+
./tinynn/ab_smoke_all_cuda
|
|
1903
|
+
|
|
1904
|
+
tinynn/ab_smoke_all_cuda: tinynn/ab_smoke_all_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
|
|
1905
|
+
$(SPINEL) tinynn/ab_smoke_all_cuda.rb -o tinynn/ab_smoke_all_cuda
|
|
1906
|
+
|
|
1907
|
+
# Transformer-shape parity + wallclock bench on CUDA (GB10).
|
|
1908
|
+
ab-smoke-big-cuda: tinynn/ab_smoke_big_cuda
|
|
1909
|
+
./tinynn/ab_smoke_big_cuda
|
|
1910
|
+
|
|
1911
|
+
tinynn/ab_smoke_big_cuda: tinynn/ab_smoke_big_cuda.rb lib/toy/models/transformer.rb lib/toy/ffi/tinynn_cuda.rb tinynn/libtinynn_ggml.a
|
|
1912
|
+
$(SPINEL) tinynn/ab_smoke_big_cuda.rb -o tinynn/ab_smoke_big_cuda
|
|
1913
|
+
|
|
1914
|
+
# --- maintenance ------------------------------------------------------------
|
|
1915
|
+
clean:
|
|
1916
|
+
rm -f demos/train_minimal demos/train_tinystories \
|
|
1917
|
+
demos/inference_demo demos/inference_demo_cuda \
|
|
1918
|
+
demos/distilgpt2_demo demos/distilgpt2_demo_ffi \
|
|
1919
|
+
demos/distilgpt2_demo_kv demos/distilgpt2_demo_text \
|
|
1920
|
+
demos/distilgpt2_demo_ffi_cuda demos/distilgpt2_demo_kv_cuda \
|
|
1921
|
+
tinynn/tinynn_ggml.o tinynn/libtinynn_ggml.a \
|
|
1922
|
+
tinynn/tinynn_backend_cuda.o tinynn/libtinynn_ggml_cuda.a \
|
|
1923
|
+
tinynn/tinynn_backend_metal.o tinynn/libtinynn_ggml_metal.a \
|
|
1924
|
+
examples/example_inference_metal \
|
|
1925
|
+
tinynn/smoke tinynn/ab_smoke tinynn/ab_smoke_cuda tinynn/ab_smoke_all_cuda \
|
|
1926
|
+
tinynn/ab_smoke_add tinynn/ab_smoke_gelu tinynn/ab_smoke_rms_norm \
|
|
1927
|
+
tinynn/ab_smoke_softmax tinynn/ab_smoke_transpose tinynn/ab_smoke_scale \
|
|
1928
|
+
tinynn/ab_smoke_pipeline tinynn/ab_smoke_big tinynn/ab_smoke_big_cuda \
|
|
1929
|
+
tinynn/ab_smoke_matmul_variants tinynn/ab_smoke_back tinynn/ab_smoke_embed \
|
|
1930
|
+
tinynn/ab_smoke_sgd tinynn/ab_smoke_gelu_back tinynn/ab_smoke_cegrad \
|
|
1931
|
+
tinynn/ab_smoke_adam tinynn/forward_smoke tinynn/persistent_bench \
|
|
1932
|
+
tinynn/persistent_bench_cuda tinynn/persistent_bench_big \
|
|
1933
|
+
examples/example_train_from_scratch \
|
|
1934
|
+
examples/example_train_from_scratch_cpu \
|
|
1935
|
+
examples/example_train_from_scratch_cuda \
|
|
1936
|
+
examples/example_finetune examples/example_finetune_cuda \
|
|
1937
|
+
libexec/toy-infer libexec/toy-train libexec/toy-train-cuda libexec/toy-train-lora-cuda libexec/toy-eval libexec/toy-eval-lmc libexec/toy-serve examples/example_train \
|
|
1938
|
+
libexec/toy-infer-metal libexec/toy-eval-metal libexec/toy-train-metal
|
|
1939
|
+
|
|
1940
|
+
distclean: clean
|
|
1941
|
+
rm -rf $(GGML_DIR)/build $(GGML_DIR)/build-cuda $(GGML_DIR)/build-metal
|
|
1942
|
+
|
|
1943
|
+
# --- Algorithm-card drift gate -----------------------------------------------
|
|
1944
|
+
# Sanity-check that every Toy:: class with both `def forward` and
|
|
1945
|
+
# `def algorithm` keeps the two in lock-step. Catches the common
|
|
1946
|
+
# drift case where someone changes the forward without updating the
|
|
1947
|
+
# card (or vice versa). Pure-Ruby, runs in a fraction of a second.
|
|
1948
|
+
check-cards:
|
|
1949
|
+
ruby prep/card_drift_check.rb
|
|
1950
|
+
|
|
1951
|
+
# --- Perf regression gate -----------------------------------------------------
|
|
1952
|
+
# Runs each bench/*.rb (LoRA step, inference, tokenizer) and compares the
|
|
1953
|
+
# emitted BENCH lines against bench/baselines.csv. Exit 1 on any metric that
|
|
1954
|
+
# regresses past its per-metric tolerance. `bench-update` re-records the
|
|
1955
|
+
# current values as the new baseline.
|
|
1956
|
+
#
|
|
1957
|
+
# Run before pushing perf-sensitive changes; baselines.csv lives in the repo
|
|
1958
|
+
# so anyone can re-run on the same hardware and compare.
|
|
1959
|
+
bench: tinynn/libtinynn_ggml.a
|
|
1960
|
+
ruby bench/check.rb
|
|
1961
|
+
|
|
1962
|
+
bench-update: tinynn/libtinynn_ggml.a
|
|
1963
|
+
ruby bench/check.rb --update
|
|
1964
|
+
|
|
1965
|
+
bench-report: tinynn/libtinynn_ggml.a
|
|
1966
|
+
ruby bench/check.rb --report
|
|
1967
|
+
|
|
1968
|
+
# Routine comparison vs PyTorch — the "old-stable" yardstick — in the
|
|
1969
|
+
# single-machine single-GPU case. Runs ON gx10: toy CUDA benches run
|
|
1970
|
+
# native, the PyTorch reference (bench/ref_pytorch.py) runs in the
|
|
1971
|
+
# dev-pytorch container. Gates the toy/PyTorch *ratio* (not absolute
|
|
1972
|
+
# ms, which is machine-dependent) so a design change that quietly
|
|
1973
|
+
# widens the gap fails. Budget in bench/baselines_vs_pytorch.csv;
|
|
1974
|
+
# `--update` re-records it. Override the torch invocation with PT_CMD.
|
|
1975
|
+
bench-vs-pytorch: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1976
|
+
ruby bench/check_vs_pytorch.rb
|
|
1977
|
+
|
|
1978
|
+
bench-vs-pytorch-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1979
|
+
ruby bench/check_vs_pytorch.rb --update
|
|
1980
|
+
|
|
1981
|
+
bench-vs-pytorch-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1982
|
+
ruby bench/check_vs_pytorch.rb --report
|
|
1983
|
+
|
|
1984
|
+
# Heavy bench — ambitious workloads that exercise the libs (LoRA on
|
|
1985
|
+
# Qwen2.5-1.5B at seq=256, decode on Qwen2.5-7B-Q8 with KV_Q8+FLASH).
|
|
1986
|
+
# ~3-5 min wallclock; meant as a yardstick for choosing between
|
|
1987
|
+
# optimization strategies, not for every-commit gating.
|
|
1988
|
+
# bench-heavy — toy-only, fast iteration loop (no PyTorch)
|
|
1989
|
+
# bench-vs-pytorch-heavy — same workloads + PyTorch ratio gate
|
|
1990
|
+
bench-heavy: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1991
|
+
ruby bench/check_heavy.rb
|
|
1992
|
+
|
|
1993
|
+
bench-heavy-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1994
|
+
ruby bench/check_heavy.rb --update
|
|
1995
|
+
|
|
1996
|
+
bench-heavy-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
1997
|
+
ruby bench/check_heavy.rb --report
|
|
1998
|
+
|
|
1999
|
+
bench-vs-pytorch-heavy: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
2000
|
+
ruby bench/check_vs_pytorch.rb --heavy
|
|
2001
|
+
|
|
2002
|
+
bench-vs-pytorch-heavy-update: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
2003
|
+
ruby bench/check_vs_pytorch.rb --heavy --update
|
|
2004
|
+
|
|
2005
|
+
bench-vs-pytorch-heavy-report: demos/seq_train_bench_cuda demos/qwen25_bench_cuda
|
|
2006
|
+
ruby bench/check_vs_pytorch.rb --heavy --report
|
|
2007
|
+
|
|
2008
|
+
.PHONY: all clean distclean setup-ggml setup-ggml-cuda setup-ggml-metal smoke \
|
|
2009
|
+
example_inference_metal \
|
|
2010
|
+
toy-infer-metal toy-eval-metal toy-train-metal gate-metal \
|
|
2011
|
+
ab-smoke ab-smoke-add ab-smoke-gelu ab-smoke-rms-norm \
|
|
2012
|
+
ab-smoke-softmax ab-smoke-transpose ab-smoke-scale ab-smoke-silu \
|
|
2013
|
+
ab-smoke-mul ab-smoke-pipeline ab-smoke-big ab-smoke-cuda \
|
|
2014
|
+
ab-smoke-all-cuda ab-smoke-big-cuda test \
|
|
2015
|
+
gpt2 smollm2 smollm2_kv smollm2_kv_cuda \
|
|
2016
|
+
tinyllama tinyllama_kv tinyllama_kv_cuda \
|
|
2017
|
+
train algorithm_cards \
|
|
2018
|
+
examples gen-mirrors verify-mirrors \
|
|
2019
|
+
bench bench-update bench-report check-cards \
|
|
2020
|
+
bench-vs-pytorch bench-vs-pytorch-update bench-vs-pytorch-report \
|
|
2021
|
+
bench-heavy bench-heavy-update bench-heavy-report \
|
|
2022
|
+
bench-vs-pytorch-heavy bench-vs-pytorch-heavy-update bench-vs-pytorch-heavy-report
|