@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -299,10 +299,10 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
|
|
|
299
299
|
return false;
|
|
300
300
|
}
|
|
301
301
|
} catch (std::length_error &) {
|
|
302
|
-
|
|
302
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
|
|
303
303
|
return false;
|
|
304
304
|
} catch (std::bad_alloc &) {
|
|
305
|
-
|
|
305
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
|
|
306
306
|
return false;
|
|
307
307
|
}
|
|
308
308
|
kv.emplace_back(key, value);
|
|
@@ -328,14 +328,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
328
328
|
ok = ok && gr.read(magic, 4);
|
|
329
329
|
|
|
330
330
|
if (!ok) {
|
|
331
|
-
|
|
331
|
+
GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
|
|
332
332
|
gguf_free(ctx);
|
|
333
333
|
return nullptr;
|
|
334
334
|
}
|
|
335
335
|
|
|
336
336
|
for (uint32_t i = 0; i < magic.size(); i++) {
|
|
337
337
|
if (magic[i] != GGUF_MAGIC[i]) {
|
|
338
|
-
|
|
338
|
+
GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
|
339
339
|
gguf_free(ctx);
|
|
340
340
|
return nullptr;
|
|
341
341
|
}
|
|
@@ -348,11 +348,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
348
348
|
|
|
349
349
|
if (ok && gr.read(ctx->version)) {
|
|
350
350
|
if (ctx->version == 1) {
|
|
351
|
-
|
|
351
|
+
GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
|
|
352
352
|
ok = false;
|
|
353
353
|
}
|
|
354
354
|
if (ctx->version > GGUF_VERSION) {
|
|
355
|
-
|
|
355
|
+
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
|
356
356
|
__func__, ctx->version, GGUF_VERSION);
|
|
357
357
|
ok = false;
|
|
358
358
|
}
|
|
@@ -363,7 +363,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
363
363
|
if (ok && gr.read(n_tensors)) {
|
|
364
364
|
static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
|
365
365
|
if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
|
|
366
|
-
|
|
366
|
+
GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
|
|
367
367
|
__func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
|
|
368
368
|
ok = false;
|
|
369
369
|
}
|
|
@@ -374,7 +374,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
374
374
|
if (ok && gr.read(n_kv)) {
|
|
375
375
|
static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
|
376
376
|
if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
|
|
377
|
-
|
|
377
|
+
GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
|
|
378
378
|
__func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
|
|
379
379
|
ok = false;
|
|
380
380
|
}
|
|
@@ -383,7 +383,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
383
383
|
}
|
|
384
384
|
|
|
385
385
|
if (!ok) {
|
|
386
|
-
|
|
386
|
+
GGML_LOG_ERROR("%s: failed to read header\n", __func__);
|
|
387
387
|
gguf_free(ctx);
|
|
388
388
|
return nullptr;
|
|
389
389
|
}
|
|
@@ -399,15 +399,15 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
399
399
|
try {
|
|
400
400
|
ok = ok && gr.read(key);
|
|
401
401
|
} catch (std::length_error &) {
|
|
402
|
-
|
|
402
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
|
|
403
403
|
ok = false;
|
|
404
404
|
} catch (std::bad_alloc &) {
|
|
405
|
-
|
|
405
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
|
|
406
406
|
ok = false;
|
|
407
407
|
}
|
|
408
408
|
for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
|
|
409
409
|
if (key == ctx->kv[j].key) {
|
|
410
|
-
|
|
410
|
+
GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
|
|
411
411
|
ok = false;
|
|
412
412
|
}
|
|
413
413
|
}
|
|
@@ -441,14 +441,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
441
441
|
case GGUF_TYPE_ARRAY:
|
|
442
442
|
default:
|
|
443
443
|
{
|
|
444
|
-
|
|
444
|
+
GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
|
|
445
445
|
ok = false;
|
|
446
446
|
} break;
|
|
447
447
|
}
|
|
448
448
|
}
|
|
449
449
|
|
|
450
450
|
if (!ok) {
|
|
451
|
-
|
|
451
|
+
GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
|
|
452
452
|
gguf_free(ctx);
|
|
453
453
|
return nullptr;
|
|
454
454
|
}
|
|
@@ -458,7 +458,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
458
458
|
ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
|
|
459
459
|
|
|
460
460
|
if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
|
|
461
|
-
|
|
461
|
+
GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
|
|
462
462
|
gguf_free(ctx);
|
|
463
463
|
return nullptr;
|
|
464
464
|
}
|
|
@@ -474,14 +474,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
474
474
|
try {
|
|
475
475
|
ok = ok && gr.read(name);
|
|
476
476
|
} catch (std::length_error &) {
|
|
477
|
-
|
|
477
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
|
|
478
478
|
ok = false;
|
|
479
479
|
} catch (std::bad_alloc &) {
|
|
480
|
-
|
|
480
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
|
|
481
481
|
ok = false;
|
|
482
482
|
}
|
|
483
483
|
if (name.length() >= GGML_MAX_NAME) {
|
|
484
|
-
|
|
484
|
+
GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
|
|
485
485
|
ok = false;
|
|
486
486
|
break;
|
|
487
487
|
}
|
|
@@ -490,7 +490,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
490
490
|
// make sure there are no duplicate tensor names
|
|
491
491
|
for (int64_t j = 0; ok && j < i; ++j) {
|
|
492
492
|
if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
|
|
493
|
-
|
|
493
|
+
GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
|
|
494
494
|
ok = false;
|
|
495
495
|
break;
|
|
496
496
|
}
|
|
@@ -505,7 +505,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
505
505
|
uint32_t n_dims = -1;
|
|
506
506
|
ok = ok && gr.read(n_dims);
|
|
507
507
|
if (n_dims > GGML_MAX_DIMS) {
|
|
508
|
-
|
|
508
|
+
GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
|
|
509
509
|
__func__, info.t.name, n_dims, GGML_MAX_DIMS);
|
|
510
510
|
ok = false;
|
|
511
511
|
break;
|
|
@@ -518,7 +518,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
518
518
|
|
|
519
519
|
// check that all ne are non-negative
|
|
520
520
|
if (info.t.ne[j] < 0) {
|
|
521
|
-
|
|
521
|
+
GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
|
|
522
522
|
__func__, info.t.name, j, info.t.ne[j]);
|
|
523
523
|
ok = false;
|
|
524
524
|
break;
|
|
@@ -530,7 +530,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
530
530
|
(INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
|
|
531
531
|
(INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
|
|
532
532
|
|
|
533
|
-
|
|
533
|
+
GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
|
|
534
534
|
"(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
|
|
535
535
|
__func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
|
|
536
536
|
ok = false;
|
|
@@ -547,7 +547,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
547
547
|
|
|
548
548
|
// check that tensor type is within defined range
|
|
549
549
|
if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
|
|
550
|
-
|
|
550
|
+
GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
|
|
551
551
|
__func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
|
|
552
552
|
ok = false;
|
|
553
553
|
break;
|
|
@@ -557,7 +557,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
557
557
|
|
|
558
558
|
// check that row size is divisible by block size
|
|
559
559
|
if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
|
|
560
|
-
|
|
560
|
+
GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
|
|
561
561
|
"not a multiple of block size (%" PRId64 ")\n",
|
|
562
562
|
__func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
|
|
563
563
|
ok = false;
|
|
@@ -582,7 +582,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
582
582
|
}
|
|
583
583
|
|
|
584
584
|
if (!ok) {
|
|
585
|
-
|
|
585
|
+
GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
|
|
586
586
|
gguf_free(ctx);
|
|
587
587
|
return nullptr;
|
|
588
588
|
}
|
|
@@ -590,7 +590,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
590
590
|
|
|
591
591
|
// we require the data section to be aligned, so take into account any padding
|
|
592
592
|
if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
|
|
593
|
-
|
|
593
|
+
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
|
|
594
594
|
gguf_free(ctx);
|
|
595
595
|
return nullptr;
|
|
596
596
|
}
|
|
@@ -604,9 +604,9 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
604
604
|
for (size_t i = 0; i < ctx->info.size(); ++i) {
|
|
605
605
|
const gguf_tensor_info & ti = ctx->info[i];
|
|
606
606
|
if (ti.offset != ctx->size) {
|
|
607
|
-
|
|
607
|
+
GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
|
|
608
608
|
__func__, ti.t.name, ti.offset, ctx->size);
|
|
609
|
-
|
|
609
|
+
GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
|
|
610
610
|
gguf_free(ctx);
|
|
611
611
|
return nullptr;
|
|
612
612
|
}
|
|
@@ -634,7 +634,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
634
634
|
|
|
635
635
|
*params.ctx = ggml_init(pdata);
|
|
636
636
|
if (*params.ctx == nullptr) {
|
|
637
|
-
|
|
637
|
+
GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
|
|
638
638
|
gguf_free(ctx);
|
|
639
639
|
return nullptr;
|
|
640
640
|
}
|
|
@@ -656,7 +656,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
656
656
|
ok = ok && gr.read(data->data, ctx->size);
|
|
657
657
|
|
|
658
658
|
if (!ok) {
|
|
659
|
-
|
|
659
|
+
GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
|
|
660
660
|
ggml_free(ctx_data);
|
|
661
661
|
*params.ctx = nullptr;
|
|
662
662
|
gguf_free(ctx);
|
|
@@ -689,7 +689,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
689
689
|
}
|
|
690
690
|
|
|
691
691
|
if (!ok) {
|
|
692
|
-
|
|
692
|
+
GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
|
|
693
693
|
ggml_free(ctx_data);
|
|
694
694
|
*params.ctx = nullptr;
|
|
695
695
|
gguf_free(ctx);
|
|
@@ -706,7 +706,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
706
706
|
FILE * file = ggml_fopen(fname, "rb");
|
|
707
707
|
|
|
708
708
|
if (!file) {
|
|
709
|
-
|
|
709
|
+
GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
|
|
710
710
|
return nullptr;
|
|
711
711
|
}
|
|
712
712
|
|
|
@@ -932,6 +932,7 @@ static void gguf_check_reserved_keys(const std::string & key, const T val) {
|
|
|
932
932
|
if constexpr (std::is_same<T, uint32_t>::value) {
|
|
933
933
|
GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
|
|
934
934
|
} else {
|
|
935
|
+
GGML_UNUSED(val);
|
|
935
936
|
GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
|
|
936
937
|
}
|
|
937
938
|
}
|
|
@@ -1304,7 +1305,7 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
|
|
|
1304
1305
|
FILE * file = ggml_fopen(fname, "wb");
|
|
1305
1306
|
|
|
1306
1307
|
if (!file) {
|
|
1307
|
-
|
|
1308
|
+
GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
|
|
1308
1309
|
return false;
|
|
1309
1310
|
}
|
|
1310
1311
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include "ggml.h"
|
|
5
5
|
#include "ggml-cpu.h"
|
|
6
6
|
#include "ggml-backend.h"
|
|
7
|
+
#include "ggml-opt.h"
|
|
7
8
|
|
|
8
9
|
#include <stddef.h>
|
|
9
10
|
#include <stdint.h>
|
|
@@ -107,6 +108,12 @@ extern "C" {
|
|
|
107
108
|
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
108
109
|
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
109
110
|
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
111
|
+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
112
|
+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
113
|
+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
114
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
115
|
+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
116
|
+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
110
117
|
};
|
|
111
118
|
|
|
112
119
|
enum llama_rope_type {
|
|
@@ -277,10 +284,18 @@ extern "C" {
|
|
|
277
284
|
};
|
|
278
285
|
};
|
|
279
286
|
|
|
287
|
+
struct llama_model_tensor_buft_override {
|
|
288
|
+
const char * pattern;
|
|
289
|
+
ggml_backend_buffer_type_t buft;
|
|
290
|
+
};
|
|
291
|
+
|
|
280
292
|
struct llama_model_params {
|
|
281
293
|
// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
|
282
294
|
ggml_backend_dev_t * devices;
|
|
283
295
|
|
|
296
|
+
// NULL-terminated list of buffer types to use for tensors that match a pattern
|
|
297
|
+
const struct llama_model_tensor_buft_override * tensor_buft_overrides;
|
|
298
|
+
|
|
284
299
|
int32_t n_gpu_layers; // number of layers to store in VRAM
|
|
285
300
|
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
|
|
286
301
|
|
|
@@ -330,7 +345,7 @@ extern "C" {
|
|
|
330
345
|
float yarn_beta_fast; // YaRN low correction dim
|
|
331
346
|
float yarn_beta_slow; // YaRN high correction dim
|
|
332
347
|
uint32_t yarn_orig_ctx; // YaRN original context size
|
|
333
|
-
float defrag_thold; // defragment the KV cache if holes/size > thold,
|
|
348
|
+
float defrag_thold; // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
|
|
334
349
|
|
|
335
350
|
ggml_backend_sched_eval_callback cb_eval;
|
|
336
351
|
void * cb_eval_user_data;
|
|
@@ -338,34 +353,34 @@ extern "C" {
|
|
|
338
353
|
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
|
|
339
354
|
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
|
|
340
355
|
|
|
341
|
-
// Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
|
|
342
|
-
// TODO: move at the end of the struct
|
|
343
|
-
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
|
344
|
-
bool embeddings; // if true, extract embeddings (together with logits)
|
|
345
|
-
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
|
346
|
-
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
|
347
|
-
bool no_perf; // whether to measure performance timings
|
|
348
|
-
|
|
349
356
|
// Abort callback
|
|
350
357
|
// if it returns true, execution of llama_decode() will be aborted
|
|
351
358
|
// currently works only with CPU execution
|
|
352
359
|
ggml_abort_callback abort_callback;
|
|
353
360
|
void * abort_callback_data;
|
|
361
|
+
|
|
362
|
+
// Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
|
|
363
|
+
bool embeddings; // if true, extract embeddings (together with logits)
|
|
364
|
+
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
|
365
|
+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
|
366
|
+
bool no_perf; // whether to measure performance timings
|
|
367
|
+
bool op_offload; // whether to offload host tensor operations to device
|
|
354
368
|
};
|
|
355
369
|
|
|
356
370
|
// model quantization parameters
|
|
357
371
|
typedef struct llama_model_quantize_params {
|
|
358
|
-
int32_t nthread;
|
|
359
|
-
enum llama_ftype ftype;
|
|
360
|
-
enum ggml_type output_tensor_type;
|
|
361
|
-
enum ggml_type token_embedding_type;
|
|
362
|
-
bool allow_requantize;
|
|
363
|
-
bool quantize_output_tensor;
|
|
364
|
-
bool only_copy;
|
|
365
|
-
bool pure;
|
|
366
|
-
bool keep_split;
|
|
367
|
-
void * imatrix;
|
|
368
|
-
void * kv_overrides;
|
|
372
|
+
int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
|
|
373
|
+
enum llama_ftype ftype; // quantize to this llama_ftype
|
|
374
|
+
enum ggml_type output_tensor_type; // output tensor type
|
|
375
|
+
enum ggml_type token_embedding_type; // token embeddings tensor type
|
|
376
|
+
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
|
377
|
+
bool quantize_output_tensor; // quantize output.weight
|
|
378
|
+
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
|
379
|
+
bool pure; // quantize all tensors to the default type
|
|
380
|
+
bool keep_split; // quantize to the same number of shards
|
|
381
|
+
void * imatrix; // pointer to importance matrix data
|
|
382
|
+
void * kv_overrides; // pointer to vector containing overrides
|
|
383
|
+
void * tensor_types; // pointer to vector containing tensor types
|
|
369
384
|
} llama_model_quantize_params;
|
|
370
385
|
|
|
371
386
|
typedef struct llama_logit_bias {
|
|
@@ -431,6 +446,10 @@ extern "C" {
|
|
|
431
446
|
size_t n_paths,
|
|
432
447
|
struct llama_model_params params);
|
|
433
448
|
|
|
449
|
+
LLAMA_API void llama_model_save_to_file(
|
|
450
|
+
const struct llama_model * model,
|
|
451
|
+
const char * path_model);
|
|
452
|
+
|
|
434
453
|
DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
|
|
435
454
|
"use llama_model_free instead");
|
|
436
455
|
|
|
@@ -910,14 +929,19 @@ extern "C" {
|
|
|
910
929
|
// Frees a batch of tokens allocated with llama_batch_init()
|
|
911
930
|
LLAMA_API void llama_batch_free(struct llama_batch batch);
|
|
912
931
|
|
|
913
|
-
//
|
|
914
|
-
//
|
|
932
|
+
// Process a batch of tokens.
|
|
933
|
+
// In contrast to llama_decode() - this call does not use KV cache.
|
|
934
|
+
// For encode-decoder contexts, processes the batch using the encoder.
|
|
935
|
+
// Can store the encoder output internally for later use by the decoder's cross-attention layers.
|
|
915
936
|
// 0 - success
|
|
916
937
|
// < 0 - error. the KV cache state is restored to the state before this call
|
|
917
938
|
LLAMA_API int32_t llama_encode(
|
|
918
939
|
struct llama_context * ctx,
|
|
919
940
|
struct llama_batch batch);
|
|
920
941
|
|
|
942
|
+
// Process a batch of tokens.
|
|
943
|
+
// Requires KV cache.
|
|
944
|
+
// For encode-decoder contexts, processes the batch using the decoder.
|
|
921
945
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
922
946
|
// 0 - success
|
|
923
947
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
@@ -1218,6 +1242,7 @@ extern "C" {
|
|
|
1218
1242
|
"will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
|
|
1219
1243
|
|
|
1220
1244
|
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
|
1245
|
+
/// Setting k <= 0 makes this a noop
|
|
1221
1246
|
LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
|
|
1222
1247
|
|
|
1223
1248
|
/// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
|
@@ -1264,6 +1289,10 @@ extern "C" {
|
|
|
1264
1289
|
float tau,
|
|
1265
1290
|
float eta);
|
|
1266
1291
|
|
|
1292
|
+
/// @details Intializes a GBNF grammar, see grammars/README.md for details.
|
|
1293
|
+
/// @param vocab The vocabulary that this grammar will be used with.
|
|
1294
|
+
/// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
|
|
1295
|
+
/// @param grammar_root The name of the start symbol for the grammar.
|
|
1267
1296
|
LLAMA_API struct llama_sampler * llama_sampler_init_grammar(
|
|
1268
1297
|
const struct llama_vocab * vocab,
|
|
1269
1298
|
const char * grammar_str,
|
|
@@ -1409,6 +1438,37 @@ extern "C" {
|
|
|
1409
1438
|
LLAMA_API void llama_perf_sampler_print(const struct llama_sampler * chain);
|
|
1410
1439
|
LLAMA_API void llama_perf_sampler_reset( struct llama_sampler * chain);
|
|
1411
1440
|
|
|
1441
|
+
//
|
|
1442
|
+
// training
|
|
1443
|
+
//
|
|
1444
|
+
|
|
1445
|
+
// function that returns whether or not a given tensor contains trainable parameters
|
|
1446
|
+
typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
|
|
1447
|
+
|
|
1448
|
+
// always returns true
|
|
1449
|
+
LLAMA_API bool llama_opt_param_filter_all(const struct ggml_tensor * tensor, void * userdata);
|
|
1450
|
+
|
|
1451
|
+
struct llama_opt_params {
|
|
1452
|
+
uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
|
|
1453
|
+
|
|
1454
|
+
llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
|
|
1455
|
+
void * param_filter_ud; // userdata for determining which tensors contain trainable parameters
|
|
1456
|
+
|
|
1457
|
+
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
|
1458
|
+
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
|
1459
|
+
};
|
|
1460
|
+
|
|
1461
|
+
LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
|
|
1462
|
+
|
|
1463
|
+
LLAMA_API void llama_opt_epoch(
|
|
1464
|
+
struct llama_context * lctx,
|
|
1465
|
+
ggml_opt_dataset_t dataset,
|
|
1466
|
+
ggml_opt_result_t result_train,
|
|
1467
|
+
ggml_opt_result_t result_eval,
|
|
1468
|
+
int64_t idata_split,
|
|
1469
|
+
ggml_opt_epoch_callback callback_train,
|
|
1470
|
+
ggml_opt_epoch_callback callback_eval);
|
|
1471
|
+
|
|
1412
1472
|
#ifdef __cplusplus
|
|
1413
1473
|
}
|
|
1414
1474
|
#endif
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
ied 4 ½ months
|
|
2
|
+
__ggml_vocab_test__
|
|
3
|
+
Führer
|
|
4
|
+
__ggml_vocab_test__
|
|
5
|
+
|
|
6
|
+
__ggml_vocab_test__
|
|
7
|
+
|
|
8
|
+
__ggml_vocab_test__
|
|
9
|
+
|
|
10
|
+
__ggml_vocab_test__
|
|
11
|
+
|
|
12
|
+
__ggml_vocab_test__
|
|
13
|
+
|
|
14
|
+
__ggml_vocab_test__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__ggml_vocab_test__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__ggml_vocab_test__
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__ggml_vocab_test__
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__ggml_vocab_test__
|
|
30
|
+
Hello world
|
|
31
|
+
__ggml_vocab_test__
|
|
32
|
+
Hello world
|
|
33
|
+
__ggml_vocab_test__
|
|
34
|
+
Hello World
|
|
35
|
+
__ggml_vocab_test__
|
|
36
|
+
Hello World
|
|
37
|
+
__ggml_vocab_test__
|
|
38
|
+
Hello World!
|
|
39
|
+
__ggml_vocab_test__
|
|
40
|
+
Hello, world!
|
|
41
|
+
__ggml_vocab_test__
|
|
42
|
+
Hello, world!
|
|
43
|
+
__ggml_vocab_test__
|
|
44
|
+
this is 🦙.cpp
|
|
45
|
+
__ggml_vocab_test__
|
|
46
|
+
w048 7tuijk dsdfhu
|
|
47
|
+
__ggml_vocab_test__
|
|
48
|
+
нещо на Български
|
|
49
|
+
__ggml_vocab_test__
|
|
50
|
+
កាន់តែពិសេសអាចខលចេញ
|
|
51
|
+
__ggml_vocab_test__
|
|
52
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
|
|
53
|
+
__ggml_vocab_test__
|
|
54
|
+
Hello
|
|
55
|
+
__ggml_vocab_test__
|
|
56
|
+
Hello
|
|
57
|
+
__ggml_vocab_test__
|
|
58
|
+
Hello
|
|
59
|
+
__ggml_vocab_test__
|
|
60
|
+
Hello
|
|
61
|
+
__ggml_vocab_test__
|
|
62
|
+
Hello
|
|
63
|
+
__ggml_vocab_test__
|
|
64
|
+
Hello
|
|
65
|
+
Hello
|
|
66
|
+
__ggml_vocab_test__
|
|
67
|
+
(
|
|
68
|
+
__ggml_vocab_test__
|
|
69
|
+
|
|
70
|
+
=
|
|
71
|
+
__ggml_vocab_test__
|
|
72
|
+
' era
|
|
73
|
+
__ggml_vocab_test__
|
|
74
|
+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
|
|
75
|
+
__ggml_vocab_test__
|
|
76
|
+
!!!!!!
|
|
77
|
+
__ggml_vocab_test__
|
|
78
|
+
3
|
|
79
|
+
__ggml_vocab_test__
|
|
80
|
+
33
|
|
81
|
+
__ggml_vocab_test__
|
|
82
|
+
333
|
|
83
|
+
__ggml_vocab_test__
|
|
84
|
+
3333
|
|
85
|
+
__ggml_vocab_test__
|
|
86
|
+
33333
|
|
87
|
+
__ggml_vocab_test__
|
|
88
|
+
333333
|
|
89
|
+
__ggml_vocab_test__
|
|
90
|
+
3333333
|
|
91
|
+
__ggml_vocab_test__
|
|
92
|
+
33333333
|
|
93
|
+
__ggml_vocab_test__
|
|
94
|
+
333333333
|
|
95
|
+
__ggml_vocab_test__
|
|
96
|
+
Cửa Việt
|
|
97
|
+
__ggml_vocab_test__
|
|
98
|
+
discards
|
|
99
|
+
__ggml_vocab_test__
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
|
|
112
|
+
__ggml_vocab_test__
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
1190 220 32 220 18215 7112
|
|
2
|
+
50 16800 258
|
|
3
|
+
|
|
4
|
+
220
|
|
5
|
+
256
|
|
6
|
+
277
|
|
7
|
+
197
|
|
8
|
+
198
|
|
9
|
+
368
|
|
10
|
+
2946
|
|
11
|
+
3271
|
|
12
|
+
19873 3817
|
|
13
|
+
39715 3817
|
|
14
|
+
19873 7353
|
|
15
|
+
39715 7353
|
|
16
|
+
39715 7353 13
|
|
17
|
+
19873 24 3817 13
|
|
18
|
+
39715 24 3817 13
|
|
19
|
+
544 373 9522 112 247 26 36315
|
|
20
|
+
99 39923 220 35 9607 21498 21470 3679 9433
|
|
21
|
+
1595 7653 633 79829 34051 1636
|
|
22
|
+
8755 102595 115960 21125 148305 96819 102816 39048 14105 22528 160234
|
|
23
|
+
114590 222 330 14879 21 51358 127 12817 93293 117 24204 330 68239 881 120327 170428 21 89101 330 7384 88230 511 947 1492 3742 7233 21
|
|
24
|
+
19873
|
|
25
|
+
39715
|
|
26
|
+
220 39715
|
|
27
|
+
256 39715
|
|
28
|
+
277 39715
|
|
29
|
+
277 39715 198 277 39715
|
|
30
|
+
330
|
|
31
|
+
198 319
|
|
32
|
+
19 7359
|
|
33
|
+
19873 24 386 87799 13 2403 583 650 51358 223 1663 155736 1522 42056 7544 13336 28785 29 4412 20645
|
|
34
|
+
17931 4959
|
|
35
|
+
31
|
|
36
|
+
1922
|
|
37
|
+
12325
|
|
38
|
+
12325 31
|
|
39
|
+
12325 1922
|
|
40
|
+
12325 12325
|
|
41
|
+
12325 12325 31
|
|
42
|
+
12325 12325 1922
|
|
43
|
+
12325 12325 12325
|
|
44
|
+
47 19811 12077
|
|
45
|
+
3260 3579
|
|
46
|
+
198 7283 51499 191231 20192 3271 3322 9287 2143 17860 114590 222 330 14879 21 51358 127 12817 93293 117 24204 330 68239 881 120327 170428 21 89101 9522 112 247 172394 247 220 31 220 1922 220 12325 220 12325 31 220 12325 1922 220 12325 12325 220 12325 12325 31 220 12325 12325 1922 220 31 26 31 220 31 396 31 220 31 1043 31 117131 102595 115960 21125 148305 96819 102816 80883 223 1663 155736 1522 42056 7544 13336 28785 29 4412 20645 79745 150278 117079 633 79829 34051 1636 25611 41990 109428 1488 91054 24072 17931 4959 29795 9296 16517 1806 481 96 1386 36633 1609 24 481 1109 650 5074 43 481 57 702 5074 27088 2170 536 24 481 48 650 1933 1696 30262 43 1665 19 32818 262 27236 56
|