npm - @fugood/llama.node - Versions diffs - 0.3.16 → 0.4.0 - Mend

@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (281) hide show

package/CMakeLists.txt +6 -1
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +44 -2
package/lib/index.js +132 -1
package/lib/index.ts +203 -3
package/package.json +2 -1
package/src/EmbeddingWorker.cpp +1 -1
package/src/LlamaCompletionWorker.cpp +374 -19
package/src/LlamaCompletionWorker.h +31 -10
package/src/LlamaContext.cpp +216 -7
package/src/LlamaContext.h +12 -0
package/src/common.hpp +15 -0
package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
package/src/llama.cpp/.github/workflows/build.yml +89 -767
package/src/llama.cpp/.github/workflows/docker.yml +9 -6
package/src/llama.cpp/.github/workflows/release.yml +716 -0
package/src/llama.cpp/.github/workflows/server.yml +19 -23
package/src/llama.cpp/CMakeLists.txt +11 -1
package/src/llama.cpp/cmake/build-info.cmake +8 -2
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
package/src/llama.cpp/common/CMakeLists.txt +35 -4
package/src/llama.cpp/common/arg.cpp +844 -121
package/src/llama.cpp/common/arg.h +9 -0
package/src/llama.cpp/common/chat.cpp +129 -107
package/src/llama.cpp/common/chat.h +2 -0
package/src/llama.cpp/common/common.cpp +64 -518
package/src/llama.cpp/common/common.h +35 -45
package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
package/src/llama.cpp/common/llguidance.cpp +31 -47
package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
package/src/llama.cpp/common/minja/minja.hpp +186 -127
package/src/llama.cpp/common/regex-partial.cpp +204 -0
package/src/llama.cpp/common/regex-partial.h +56 -0
package/src/llama.cpp/common/sampling.cpp +60 -50
package/src/llama.cpp/docs/build.md +122 -7
package/src/llama.cpp/examples/CMakeLists.txt +2 -32
package/src/llama.cpp/examples/batched/batched.cpp +1 -1
package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
package/src/llama.cpp/examples/sycl/build.sh +2 -2
package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/training/finetune.cpp +96 -0
package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
package/src/llama.cpp/ggml/include/ggml.h +76 -106
package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
package/src/llama.cpp/ggml/src/ggml.c +170 -265
package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
package/src/llama.cpp/include/llama.h +82 -22
package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
package/src/llama.cpp/requirements/requirements-all.txt +5 -3
package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
package/src/llama.cpp/scripts/xxd.cmake +1 -1
package/src/llama.cpp/src/CMakeLists.txt +4 -2
package/src/llama.cpp/src/llama-adapter.cpp +43 -1
package/src/llama.cpp/src/llama-arch.cpp +163 -17
package/src/llama.cpp/src/llama-arch.h +16 -0
package/src/llama.cpp/src/llama-batch.cpp +5 -1
package/src/llama.cpp/src/llama-batch.h +2 -1
package/src/llama.cpp/src/llama-chat.cpp +91 -16
package/src/llama.cpp/src/llama-chat.h +7 -2
package/src/llama.cpp/src/llama-context.cpp +479 -575
package/src/llama.cpp/src/llama-context.h +44 -33
package/src/llama.cpp/src/llama-cparams.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +209 -157
package/src/llama.cpp/src/llama-graph.h +38 -14
package/src/llama.cpp/src/llama-hparams.h +13 -0
package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
package/src/llama.cpp/src/llama-kv-cache.h +283 -171
package/src/llama.cpp/src/llama-memory.h +12 -2
package/src/llama.cpp/src/llama-mmap.cpp +1 -1
package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
package/src/llama.cpp/src/llama-model-loader.h +5 -3
package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
package/src/llama.cpp/src/llama-model-saver.h +37 -0
package/src/llama.cpp/src/llama-model.cpp +1803 -330
package/src/llama.cpp/src/llama-model.h +21 -2
package/src/llama.cpp/src/llama-quant.cpp +33 -10
package/src/llama.cpp/src/llama-sampling.cpp +25 -7
package/src/llama.cpp/src/llama-vocab.cpp +86 -10
package/src/llama.cpp/src/llama-vocab.h +6 -0
package/src/llama.cpp/src/llama.cpp +15 -1
package/src/llama.cpp/tests/CMakeLists.txt +52 -31
package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
package/src/llama.cpp/tests/test-chat.cpp +15 -3
package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
package/src/llama.cpp/tests/test-opt.cpp +33 -21
package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
package/src/llama.cpp/tests/test-sampling.cpp +1 -1
package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
package/src/llama.cpp/tools/CMakeLists.txt +39 -0
package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
package/src/llama.cpp/tools/mtmd/clip.h +99 -0
package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/infill.cpp +0 -590
package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
package/src/llama.cpp/examples/llava/clip.h +0 -118
package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
package/src/llama.cpp/examples/llava/llava.cpp +0 -574
package/src/llama.cpp/examples/llava/llava.h +0 -49
package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
/package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0

package/src/llama.cpp/ggml/src/gguf.cpp CHANGED Viewed

@@ -299,10 +299,10 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
                 return false;
             }
         } catch (std::length_error &) {
-            fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
+            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
             return false;
         } catch (std::bad_alloc &) {
-            fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
+            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
             return false;
         }
         kv.emplace_back(key, value);
@@ -328,14 +328,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
         ok = ok && gr.read(magic, 4);
         if (!ok) {
-            fprintf(stderr, "%s: failed to read magic\n", __func__);
+            GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
             gguf_free(ctx);
             return nullptr;
         }
         for (uint32_t i = 0; i < magic.size(); i++) {
             if (magic[i] != GGUF_MAGIC[i]) {
-                fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
+                GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
                 gguf_free(ctx);
                 return nullptr;
             }
@@ -348,11 +348,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     if (ok && gr.read(ctx->version)) {
         if (ctx->version == 1) {
-            fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
+            GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
             ok = false;
         }
         if (ctx->version > GGUF_VERSION) {
-            fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
+            GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
                 __func__, ctx->version, GGUF_VERSION);
             ok = false;
         }
@@ -363,7 +363,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     if (ok && gr.read(n_tensors)) {
         static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
         if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
-            fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
+            GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
                 __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
             ok = false;
         }
@@ -374,7 +374,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     if (ok && gr.read(n_kv)) {
         static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
         if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
-            fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
+            GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
                     __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
             ok = false;
         }
@@ -383,7 +383,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     }
     if (!ok) {
-        fprintf(stderr, "%s: failed to read header\n", __func__);
+        GGML_LOG_ERROR("%s: failed to read header\n", __func__);
         gguf_free(ctx);
         return nullptr;
     }
@@ -399,15 +399,15 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             try {
                 ok = ok && gr.read(key);
             } catch (std::length_error &) {
-                fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
+                GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
                 ok = false;
             } catch (std::bad_alloc &) {
-                fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
+                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
                 ok = false;
             }
             for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
                 if (key == ctx->kv[j].key) {
-                    fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
+                    GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
                     ok = false;
                 }
             }
@@ -441,14 +441,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
                 case GGUF_TYPE_ARRAY:
                 default:
                     {
-                        fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
+                        GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
                         ok = false;
                     } break;
             }
         }
         if (!ok) {
-            fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
+            GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
             gguf_free(ctx);
             return nullptr;
         }
@@ -458,7 +458,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
         ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
         if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
-            fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
+            GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
             gguf_free(ctx);
             return nullptr;
         }
@@ -474,14 +474,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             try {
                 ok = ok && gr.read(name);
             } catch (std::length_error &) {
-                fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
+                GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
                 ok = false;
             } catch (std::bad_alloc &) {
-                fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
+                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
                 ok = false;
             }
             if (name.length() >= GGML_MAX_NAME) {
-                fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
+                GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
                 ok = false;
                 break;
             }
@@ -490,7 +490,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             // make sure there are no duplicate tensor names
             for (int64_t j = 0; ok && j < i; ++j) {
                 if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
-                    fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
+                    GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
                     ok = false;
                     break;
                 }
@@ -505,7 +505,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             uint32_t n_dims = -1;
             ok = ok && gr.read(n_dims);
             if (n_dims > GGML_MAX_DIMS) {
-                fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
+                GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
                     __func__, info.t.name, n_dims, GGML_MAX_DIMS);
                 ok = false;
                 break;
@@ -518,7 +518,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
                 // check that all ne are non-negative
                 if (info.t.ne[j] < 0) {
-                    fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
+                    GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
                         __func__, info.t.name, j, info.t.ne[j]);
                     ok = false;
                     break;
@@ -530,7 +530,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
                        (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
                        (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
-                fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
+                GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
                     "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
                     __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
                 ok = false;
@@ -547,7 +547,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             // check that tensor type is within defined range
             if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
-                fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
+                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
                     __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
                 ok = false;
                 break;
@@ -557,7 +557,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             // check that row size is divisible by block size
             if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
-                fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
+                GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
                     "not a multiple of block size (%" PRId64 ")\n",
                     __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
                 ok = false;
@@ -582,7 +582,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     }
     if (!ok) {
-        fprintf(stderr, "%s: failed to read tensor info\n", __func__);
+        GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
         gguf_free(ctx);
         return nullptr;
     }
@@ -590,7 +590,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     // we require the data section to be aligned, so take into account any padding
     if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
-        fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
+        GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
         gguf_free(ctx);
         return nullptr;
     }
@@ -604,9 +604,9 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
         for (size_t i = 0; i < ctx->info.size(); ++i) {
             const gguf_tensor_info & ti = ctx->info[i];
             if (ti.offset != ctx->size) {
-                fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
+                GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
                     __func__, ti.t.name, ti.offset, ctx->size);
-                fprintf(stderr, "%s: failed to read tensor data\n", __func__);
+                GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
                 gguf_free(ctx);
                 return nullptr;
             }
@@ -634,7 +634,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
         *params.ctx = ggml_init(pdata);
         if (*params.ctx == nullptr) {
-            fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
+            GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
             gguf_free(ctx);
             return nullptr;
         }
@@ -656,7 +656,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
             ok = ok && gr.read(data->data, ctx->size);
             if (!ok) {
-                fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
+                GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
                 ggml_free(ctx_data);
                 *params.ctx = nullptr;
                 gguf_free(ctx);
@@ -689,7 +689,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
         }
         if (!ok) {
-            fprintf(stderr, "%s: failed to create tensors\n", __func__);
+            GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
             ggml_free(ctx_data);
             *params.ctx = nullptr;
             gguf_free(ctx);
@@ -706,7 +706,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     FILE * file = ggml_fopen(fname, "rb");
     if (!file) {
-        fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
+        GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
         return nullptr;
     }
@@ -932,6 +932,7 @@ static void gguf_check_reserved_keys(const std::string & key, const T val) {
         if constexpr (std::is_same<T, uint32_t>::value) {
             GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
         } else {
+            GGML_UNUSED(val);
             GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
         }
     }
@@ -1304,7 +1305,7 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
     FILE * file = ggml_fopen(fname, "wb");
     if (!file) {
-        fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
+        GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
         return false;
     }

package/src/llama.cpp/include/llama.h CHANGED Viewed

@@ -4,6 +4,7 @@
 #include "ggml.h"
 #include "ggml-cpu.h"
 #include "ggml-backend.h"
+#include "ggml-opt.h"
 #include <stddef.h>
 #include <stdint.h>
@@ -107,6 +108,12 @@ extern "C" {
         LLAMA_VOCAB_PRE_TYPE_MINERVA        = 27,
         LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM  = 28,
         LLAMA_VOCAB_PRE_TYPE_GPT4O          = 29,
+        LLAMA_VOCAB_PRE_TYPE_SUPERBPE       = 30,
+        LLAMA_VOCAB_PRE_TYPE_TRILLION       = 31,
+        LLAMA_VOCAB_PRE_TYPE_BAILINGMOE     = 32,
+        LLAMA_VOCAB_PRE_TYPE_LLAMA4         = 33,
+        LLAMA_VOCAB_PRE_TYPE_PIXTRAL        = 34,
+        LLAMA_VOCAB_PRE_TYPE_SEED_CODER     = 35,
     };
     enum llama_rope_type {
@@ -277,10 +284,18 @@ extern "C" {
         };
     };
+    struct llama_model_tensor_buft_override {
+        const char * pattern;
+        ggml_backend_buffer_type_t buft;
+    };
     struct llama_model_params {
         // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
         ggml_backend_dev_t * devices;
+        // NULL-terminated list of buffer types to use for tensors that match a pattern
+        const struct llama_model_tensor_buft_override * tensor_buft_overrides;
         int32_t n_gpu_layers; // number of layers to store in VRAM
         enum llama_split_mode split_mode; // how to split the model across multiple GPUs
@@ -330,7 +345,7 @@ extern "C" {
         float    yarn_beta_fast;   // YaRN low correction dim
         float    yarn_beta_slow;   // YaRN high correction dim
         uint32_t yarn_orig_ctx;    // YaRN original context size
-        float    defrag_thold;     // defragment the KV cache if holes/size > thold, < 0 disabled (default)
+        float    defrag_thold;     // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
         ggml_backend_sched_eval_callback cb_eval;
         void * cb_eval_user_data;
@@ -338,34 +353,34 @@ extern "C" {
         enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
         enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
-        // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
-        // TODO: move at the end of the struct
-        bool logits_all;  // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
-        bool embeddings;  // if true, extract embeddings (together with logits)
-        bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
-        bool flash_attn;  // whether to use flash attention [EXPERIMENTAL]
-        bool no_perf;     // whether to measure performance timings
         // Abort callback
         // if it returns true, execution of llama_decode() will be aborted
         // currently works only with CPU execution
         ggml_abort_callback abort_callback;
         void *              abort_callback_data;
+        // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
+        bool embeddings;  // if true, extract embeddings (together with logits)
+        bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
+        bool flash_attn;  // whether to use flash attention [EXPERIMENTAL]
+        bool no_perf;     // whether to measure performance timings
+        bool op_offload;  // whether to offload host tensor operations to device
     };
     // model quantization parameters
     typedef struct llama_model_quantize_params {
-        int32_t nthread;                     // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
-        enum llama_ftype ftype;              // quantize to this llama_ftype
-        enum ggml_type output_tensor_type;   // output tensor type
-        enum ggml_type token_embedding_type; // token embeddings tensor type
-        bool allow_requantize;               // allow quantizing non-f32/f16 tensors
-        bool quantize_output_tensor;         // quantize output.weight
-        bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
-        bool pure;                           // quantize all tensors to the default type
-        bool keep_split;                     // quantize to the same number of shards
-        void * imatrix;                      // pointer to importance matrix data
-        void * kv_overrides;                 // pointer to vector containing overrides
+        int32_t nthread;                      // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
+        enum llama_ftype ftype;               // quantize to this llama_ftype
+        enum ggml_type output_tensor_type;    // output tensor type
+        enum ggml_type token_embedding_type;  // token embeddings tensor type
+        bool allow_requantize;                // allow quantizing non-f32/f16 tensors
+        bool quantize_output_tensor;          // quantize output.weight
+        bool only_copy;                       // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
+        bool pure;                            // quantize all tensors to the default type
+        bool keep_split;                      // quantize to the same number of shards
+        void * imatrix;                       // pointer to importance matrix data
+        void * kv_overrides;                  // pointer to vector containing overrides
+        void * tensor_types;                  // pointer to vector containing tensor types
     } llama_model_quantize_params;
     typedef struct llama_logit_bias {
@@ -431,6 +446,10 @@ extern "C" {
                                  size_t    n_paths,
               struct llama_model_params    params);
+    LLAMA_API void llama_model_save_to_file(
+            const struct llama_model * model,
+                        const char * path_model);
     DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
             "use llama_model_free instead");
@@ -910,14 +929,19 @@ extern "C" {
     // Frees a batch of tokens allocated with llama_batch_init()
     LLAMA_API void llama_batch_free(struct llama_batch batch);
-    // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
-    // Stores the encoder output internally for later use by the decoder cross-attention layers.
+    // Process a batch of tokens.
+    // In contrast to llama_decode() - this call does not use KV cache.
+    // For encode-decoder contexts, processes the batch using the encoder.
+    // Can store the encoder output internally for later use by the decoder's cross-attention layers.
     //   0 - success
     // < 0 - error. the KV cache state is restored to the state before this call
     LLAMA_API int32_t llama_encode(
             struct llama_context * ctx,
               struct llama_batch   batch);
+    // Process a batch of tokens.
+    // Requires KV cache.
+    // For encode-decoder contexts, processes the batch using the decoder.
     // Positive return values does not mean a fatal error, but rather a warning.
     //   0 - success
     //   1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
@@ -1218,6 +1242,7 @@ extern "C" {
         "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
     /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+    /// Setting k <= 0 makes this a noop
     LLAMA_API struct llama_sampler * llama_sampler_init_top_k      (int32_t k);
     /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
@@ -1264,6 +1289,10 @@ extern "C" {
                                float   tau,
                                float   eta);
+    /// @details Intializes a GBNF grammar, see grammars/README.md for details.
+    /// @param vocab The vocabulary that this grammar will be used with.
+    /// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
+    /// @param grammar_root The name of the start symbol for the grammar.
     LLAMA_API struct llama_sampler * llama_sampler_init_grammar(
             const struct llama_vocab * vocab,
                           const char * grammar_str,
@@ -1409,6 +1438,37 @@ extern "C" {
     LLAMA_API void                           llama_perf_sampler_print(const struct llama_sampler * chain);
     LLAMA_API void                           llama_perf_sampler_reset(      struct llama_sampler * chain);
+    //
+    // training
+    //
+    // function that returns whether or not a given tensor contains trainable parameters
+    typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
+    // always returns true
+    LLAMA_API bool llama_opt_param_filter_all(const struct ggml_tensor * tensor, void * userdata);
+    struct llama_opt_params {
+        uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
+        llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
+        void * param_filter_ud;              // userdata for determining which tensors contain trainable parameters
+        ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
+        void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters
+    };
+    LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
+    LLAMA_API void llama_opt_epoch(
+            struct llama_context    * lctx,
+            ggml_opt_dataset_t        dataset,
+            ggml_opt_result_t         result_train,
+            ggml_opt_result_t         result_eval,
+            int64_t                   idata_split,
+            ggml_opt_epoch_callback   callback_train,
+            ggml_opt_epoch_callback   callback_eval);
 #ifdef __cplusplus
 }
 #endif

package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp ADDED Viewed

@@ -0,0 +1,112 @@
+ied 4 ½ months
+__ggml_vocab_test__
+Führer
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+__ggml_vocab_test__
+Hello world
+__ggml_vocab_test__
+ Hello world
+__ggml_vocab_test__
+Hello World
+__ggml_vocab_test__
+ Hello World
+__ggml_vocab_test__
+ Hello World!
+__ggml_vocab_test__
+Hello, world!
+__ggml_vocab_test__
+ Hello, world!
+__ggml_vocab_test__
+ this is 🦙.cpp
+__ggml_vocab_test__
+w048 7tuijk dsdfhu
+__ggml_vocab_test__
+нещо на Български
+__ggml_vocab_test__
+កាន់តែពិសេសអាចខលចេញ
+__ggml_vocab_test__
+🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
+__ggml_vocab_test__
+Hello
+__ggml_vocab_test__
+ Hello
+__ggml_vocab_test__
+  Hello
+__ggml_vocab_test__
+   Hello
+__ggml_vocab_test__
+    Hello
+__ggml_vocab_test__
+    Hello
+    Hello
+__ggml_vocab_test__
+ (
+__ggml_vocab_test__
+ =
+__ggml_vocab_test__
+' era
+__ggml_vocab_test__
+Hello, y'all! How are you 😁 ?我想在apple工作1314151天～
+__ggml_vocab_test__
+!!!!!!
+__ggml_vocab_test__
+3
+__ggml_vocab_test__
+33
+__ggml_vocab_test__
+333
+__ggml_vocab_test__
+3333
+__ggml_vocab_test__
+33333
+__ggml_vocab_test__
+333333
+__ggml_vocab_test__
+3333333
+__ggml_vocab_test__
+33333333
+__ggml_vocab_test__
+333333333
+__ggml_vocab_test__
+Cửa Việt
+__ggml_vocab_test__
+ discards
+__ggml_vocab_test__
+🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天～ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
+__ggml_vocab_test__

package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out ADDED Viewed

@@ -0,0 +1,46 @@
+ 1190 220 32 220 18215 7112
+ 50 16800 258
+ 220
+ 256
+ 277
+ 197
+ 198
+ 368
+ 2946
+ 3271
+ 19873 3817
+ 39715 3817
+ 19873 7353
+ 39715 7353
+ 39715 7353 13
+ 19873 24 3817 13
+ 39715 24 3817 13
+ 544 373 9522 112 247 26 36315
+ 99 39923 220 35 9607 21498 21470 3679 9433
+ 1595 7653 633 79829 34051 1636
+ 8755 102595 115960 21125 148305 96819 102816 39048 14105 22528 160234
+ 114590 222 330 14879 21 51358 127 12817 93293 117 24204 330 68239 881 120327 170428 21 89101 330 7384 88230 511 947 1492 3742 7233 21
+ 19873
+ 39715
+ 220 39715
+ 256 39715
+ 277 39715
+ 277 39715 198 277 39715
+ 330
+ 198 319
+ 19 7359
+ 19873 24 386 87799 13 2403 583 650 51358 223 1663 155736 1522 42056 7544 13336 28785 29 4412 20645
+ 17931 4959
+ 31
+ 1922
+ 12325
+ 12325 31
+ 12325 1922
+ 12325 12325
+ 12325 12325 31
+ 12325 12325 1922
+ 12325 12325 12325
+ 47 19811 12077
+ 3260 3579
+ 198 7283 51499 191231 20192 3271 3322 9287 2143 17860 114590 222 330 14879 21 51358 127 12817 93293 117 24204 330 68239 881 120327 170428 21 89101 9522 112 247 172394 247 220 31 220 1922 220 12325 220 12325 31 220 12325 1922 220 12325 12325 220 12325 12325 31 220 12325 12325 1922 220 31 26 31 220 31 396 31 220 31 1043 31 117131 102595 115960 21125 148305 96819 102816 80883 223 1663 155736 1522 42056 7544 13336 28785 29 4412 20645 79745 150278 117079 633 79829 34051 1636 25611 41990 109428 1488 91054 24072 17931 4959 29795 9296 16517 1806 481 96 1386 36633 1609 24 481 1109 650 5074 43 481 57 702 5074 27088 2170 536 24 481 48 650 1933 1696 30262 43 1665 19 32818 262 27236 56