@fugood/llama.node 0.6.3 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +40 -30
- package/README.md +4 -1
- package/lib/binding.js +41 -29
- package/lib/binding.ts +26 -25
- package/package.json +40 -7
- package/scripts/build.js +47 -0
- package/scripts/llama.cpp.patch +109 -0
- package/src/anyascii.c +22223 -0
- package/src/anyascii.h +42 -0
- package/src/tts_utils.cpp +20 -7
- package/src/tts_utils.h +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
- package/src/llama.cpp/.github/workflows/build.yml +0 -1078
- package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
- package/src/llama.cpp/.github/workflows/docker.yml +0 -178
- package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
- package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
- package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
- package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
- package/src/llama.cpp/.github/workflows/release.yml +0 -739
- package/src/llama.cpp/.github/workflows/server.yml +0 -237
- package/src/llama.cpp/.github/workflows/winget.yml +0 -42
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
- package/src/llama.cpp/cmake/build-info.cmake +0 -64
- package/src/llama.cpp/cmake/common.cmake +0 -35
- package/src/llama.cpp/cmake/git-vars.cmake +0 -22
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
- package/src/llama.cpp/common/build-info.cpp.in +0 -4
- package/src/llama.cpp/docs/build.md +0 -561
- package/src/llama.cpp/examples/CMakeLists.txt +0 -43
- package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/batched/batched.cpp +0 -246
- package/src/llama.cpp/examples/chat-13B.bat +0 -57
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
- package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
- package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple/simple.cpp +0 -206
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/sycl/build.sh +0 -23
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
- package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
- package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
- package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/training/finetune.cpp +0 -96
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
- package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
- package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
- package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
- package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
- package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
- package/src/llama.cpp/ggml/src/ggml.c +0 -6550
- package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
- package/src/llama.cpp/models/.editorconfig +0 -1
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
- package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
- package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
- package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
- package/src/llama.cpp/prompts/alpaca.txt +0 -1
- package/src/llama.cpp/prompts/assistant.txt +0 -31
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
- package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
- package/src/llama.cpp/prompts/chat.txt +0 -28
- package/src/llama.cpp/prompts/dan-modified.txt +0 -1
- package/src/llama.cpp/prompts/dan.txt +0 -1
- package/src/llama.cpp/prompts/mnemonics.txt +0 -93
- package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
- package/src/llama.cpp/prompts/reason-act.txt +0 -18
- package/src/llama.cpp/requirements/requirements-all.txt +0 -15
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
- package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
- package/src/llama.cpp/requirements.txt +0 -13
- package/src/llama.cpp/scripts/build-info.sh +0 -30
- package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
- package/src/llama.cpp/scripts/xxd.cmake +0 -16
- package/src/llama.cpp/tests/CMakeLists.txt +0 -177
- package/src/llama.cpp/tests/get-model.cpp +0 -21
- package/src/llama.cpp/tests/get-model.h +0 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
- package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
- package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
- package/src/llama.cpp/tests/test-barrier.cpp +0 -94
- package/src/llama.cpp/tests/test-c.c +0 -7
- package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
- package/src/llama.cpp/tests/test-chat.cpp +0 -985
- package/src/llama.cpp/tests/test-double-float.cpp +0 -57
- package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
- package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
- package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
- package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
- package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
- package/src/llama.cpp/tests/test-log.cpp +0 -39
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
- package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
- package/src/llama.cpp/tests/test-opt.cpp +0 -904
- package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
- package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
- package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
- package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
- package/src/llama.cpp/tests/test-rope.cpp +0 -262
- package/src/llama.cpp/tests/test-sampling.cpp +0 -399
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
- package/src/llama.cpp/tools/CMakeLists.txt +0 -39
- package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
- package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
- package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
- package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
- package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
- package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
- package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
- package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
- package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
- package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
- package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
- package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/main/main.cpp +0 -977
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
- package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
- package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
- package/src/llama.cpp/tools/mtmd/clip.h +0 -101
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
- package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
- package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
- package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
- package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
- package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
- package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
- package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
- package/src/llama.cpp/tools/run/run.cpp +0 -1261
- package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
- package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
- package/src/llama.cpp/tools/server/httplib.h +0 -10506
- package/src/llama.cpp/tools/server/server.cpp +0 -4966
- package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
- package/src/llama.cpp/tools/server/utils.hpp +0 -1337
- package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
- package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
|
@@ -1,583 +0,0 @@
|
|
|
1
|
-
#include "ggml.h"
|
|
2
|
-
#include "gguf.h"
|
|
3
|
-
#include "llama.h"
|
|
4
|
-
#include "common.h"
|
|
5
|
-
|
|
6
|
-
#include <algorithm>
|
|
7
|
-
#include <cinttypes>
|
|
8
|
-
#include <climits>
|
|
9
|
-
#include <cstdio>
|
|
10
|
-
#include <cstdlib>
|
|
11
|
-
#include <stdexcept>
|
|
12
|
-
#include <cstring>
|
|
13
|
-
#include <fstream>
|
|
14
|
-
#include <string>
|
|
15
|
-
#include <vector>
|
|
16
|
-
|
|
17
|
-
#if defined(_WIN32)
|
|
18
|
-
#include <windows.h>
|
|
19
|
-
#ifndef PATH_MAX
|
|
20
|
-
#define PATH_MAX MAX_PATH
|
|
21
|
-
#endif
|
|
22
|
-
#include <io.h>
|
|
23
|
-
#endif
|
|
24
|
-
|
|
25
|
-
enum split_operation : uint8_t {
|
|
26
|
-
OP_NONE,
|
|
27
|
-
OP_SPLIT,
|
|
28
|
-
OP_MERGE,
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
enum split_mode : uint8_t {
|
|
32
|
-
MODE_NONE,
|
|
33
|
-
MODE_TENSOR,
|
|
34
|
-
MODE_SIZE,
|
|
35
|
-
};
|
|
36
|
-
|
|
37
|
-
struct split_params {
|
|
38
|
-
split_operation operation = OP_NONE;
|
|
39
|
-
split_mode mode = MODE_NONE;
|
|
40
|
-
size_t n_bytes_split = 0;
|
|
41
|
-
int n_split_tensors = 128;
|
|
42
|
-
std::string input;
|
|
43
|
-
std::string output;
|
|
44
|
-
bool no_tensor_first_split = false;
|
|
45
|
-
bool dry_run = false;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
static void split_print_usage(const char * executable) {
|
|
49
|
-
const split_params default_params;
|
|
50
|
-
printf("\n");
|
|
51
|
-
printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
|
|
52
|
-
printf("\n");
|
|
53
|
-
printf("Apply a GGUF operation on IN to OUT.");
|
|
54
|
-
printf("\n");
|
|
55
|
-
printf("options:\n");
|
|
56
|
-
printf(" -h, --help show this help message and exit\n");
|
|
57
|
-
printf(" --version show version and build info\n");
|
|
58
|
-
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
|
59
|
-
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
|
60
|
-
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
|
61
|
-
printf(" --split-max-size N(M|G) max size per split\n");
|
|
62
|
-
printf(" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
|
|
63
|
-
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
|
|
64
|
-
printf("\n");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// return convert string, for example "128M" or "4G" to number of bytes
|
|
68
|
-
static size_t split_str_to_n_bytes(std::string str) {
|
|
69
|
-
size_t n_bytes = 0;
|
|
70
|
-
int n;
|
|
71
|
-
if (str.back() == 'M') {
|
|
72
|
-
sscanf(str.c_str(), "%d", &n);
|
|
73
|
-
n_bytes = (size_t)n * 1000 * 1000; // megabytes
|
|
74
|
-
} else if (str.back() == 'G') {
|
|
75
|
-
sscanf(str.c_str(), "%d", &n);
|
|
76
|
-
n_bytes = (size_t)n * 1000 * 1000 * 1000; // gigabytes
|
|
77
|
-
} else {
|
|
78
|
-
throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
|
|
79
|
-
}
|
|
80
|
-
if (n <= 0) {
|
|
81
|
-
throw std::invalid_argument("error: size must be a positive value");
|
|
82
|
-
}
|
|
83
|
-
return n_bytes;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
|
87
|
-
std::string arg;
|
|
88
|
-
const std::string arg_prefix = "--";
|
|
89
|
-
bool invalid_param = false;
|
|
90
|
-
|
|
91
|
-
int arg_idx = 1;
|
|
92
|
-
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
|
93
|
-
arg = argv[arg_idx];
|
|
94
|
-
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
|
95
|
-
std::replace(arg.begin(), arg.end(), '_', '-');
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
bool arg_found = false;
|
|
99
|
-
if (arg == "-h" || arg == "--help") {
|
|
100
|
-
split_print_usage(argv[0]);
|
|
101
|
-
exit(0);
|
|
102
|
-
} else if (arg == "--version") {
|
|
103
|
-
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
|
|
104
|
-
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
|
105
|
-
exit(0);
|
|
106
|
-
} else if (arg == "--dry-run") {
|
|
107
|
-
arg_found = true;
|
|
108
|
-
params.dry_run = true;
|
|
109
|
-
} else if (arg == "--no-tensor-first-split") {
|
|
110
|
-
arg_found = true;
|
|
111
|
-
params.no_tensor_first_split = true;
|
|
112
|
-
} else if (arg == "--merge") {
|
|
113
|
-
arg_found = true;
|
|
114
|
-
if (params.operation != OP_NONE && params.operation != OP_MERGE) {
|
|
115
|
-
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
116
|
-
}
|
|
117
|
-
params.operation = OP_MERGE;
|
|
118
|
-
} else if (arg == "--split") {
|
|
119
|
-
arg_found = true;
|
|
120
|
-
if (params.operation != OP_NONE && params.operation != OP_SPLIT) {
|
|
121
|
-
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
122
|
-
}
|
|
123
|
-
params.operation = OP_SPLIT;
|
|
124
|
-
} else if (arg == "--split-max-tensors") {
|
|
125
|
-
if (++arg_idx >= argc) {
|
|
126
|
-
invalid_param = true;
|
|
127
|
-
break;
|
|
128
|
-
}
|
|
129
|
-
arg_found = true;
|
|
130
|
-
if (params.mode != MODE_NONE && params.mode != MODE_TENSOR) {
|
|
131
|
-
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
132
|
-
}
|
|
133
|
-
params.mode = MODE_TENSOR;
|
|
134
|
-
params.n_split_tensors = atoi(argv[arg_idx]);
|
|
135
|
-
} else if (arg == "--split-max-size") {
|
|
136
|
-
if (++arg_idx >= argc) {
|
|
137
|
-
invalid_param = true;
|
|
138
|
-
break;
|
|
139
|
-
}
|
|
140
|
-
arg_found = true;
|
|
141
|
-
if (params.mode != MODE_NONE && params.mode != MODE_SIZE) {
|
|
142
|
-
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
143
|
-
}
|
|
144
|
-
params.mode = MODE_SIZE;
|
|
145
|
-
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
if (!arg_found) {
|
|
149
|
-
throw std::invalid_argument("error: unknown argument: " + arg);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// the operation is split if not specified
|
|
154
|
-
if (params.operation == OP_NONE) {
|
|
155
|
-
params.operation = OP_SPLIT;
|
|
156
|
-
}
|
|
157
|
-
// the split mode is by tensor if not specified
|
|
158
|
-
if (params.mode == MODE_NONE) {
|
|
159
|
-
params.mode = MODE_TENSOR;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (invalid_param) {
|
|
163
|
-
throw std::invalid_argument("error: invalid parameter for argument: " + arg);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
if (argc - arg_idx != 2) {
|
|
167
|
-
throw std::invalid_argument("error: bad arguments");
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
params.input = argv[arg_idx++];
|
|
171
|
-
params.output = argv[arg_idx++];
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
|
175
|
-
bool result = true;
|
|
176
|
-
try {
|
|
177
|
-
split_params_parse_ex(argc, argv, params);
|
|
178
|
-
}
|
|
179
|
-
catch (const std::invalid_argument & ex) {
|
|
180
|
-
fprintf(stderr, "%s\n", ex.what());
|
|
181
|
-
split_print_usage(argv[0]);
|
|
182
|
-
exit(EXIT_FAILURE);
|
|
183
|
-
}
|
|
184
|
-
return result;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
static void zeros(std::ofstream & file, size_t n) {
|
|
188
|
-
char zero = 0;
|
|
189
|
-
for (size_t i = 0; i < n; ++i) {
|
|
190
|
-
file.write(&zero, 1);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
struct split_strategy {
|
|
195
|
-
const split_params params;
|
|
196
|
-
std::ifstream & f_input;
|
|
197
|
-
struct gguf_context * ctx_gguf;
|
|
198
|
-
struct ggml_context * ctx_meta = NULL;
|
|
199
|
-
const int n_tensors;
|
|
200
|
-
|
|
201
|
-
// one ctx_out per one output file
|
|
202
|
-
std::vector<struct gguf_context *> ctx_outs;
|
|
203
|
-
|
|
204
|
-
// temporary buffer for reading in tensor data
|
|
205
|
-
std::vector<uint8_t> read_buf;
|
|
206
|
-
|
|
207
|
-
split_strategy(const split_params & params,
|
|
208
|
-
std::ifstream & f_input,
|
|
209
|
-
struct gguf_context * ctx_gguf,
|
|
210
|
-
struct ggml_context * ctx_meta) :
|
|
211
|
-
params(params),
|
|
212
|
-
f_input(f_input),
|
|
213
|
-
ctx_gguf(ctx_gguf),
|
|
214
|
-
ctx_meta(ctx_meta),
|
|
215
|
-
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
|
216
|
-
|
|
217
|
-
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
|
218
|
-
int i_split = -1;
|
|
219
|
-
struct gguf_context * ctx_out = NULL;
|
|
220
|
-
auto new_ctx_out = [&](bool allow_no_tensors) {
|
|
221
|
-
i_split++;
|
|
222
|
-
if (ctx_out != NULL) {
|
|
223
|
-
if (gguf_get_n_tensors(ctx_out) == 0 && !allow_no_tensors) {
|
|
224
|
-
fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
|
|
225
|
-
exit(EXIT_FAILURE);
|
|
226
|
-
}
|
|
227
|
-
ctx_outs.push_back(ctx_out);
|
|
228
|
-
}
|
|
229
|
-
ctx_out = gguf_init_empty();
|
|
230
|
-
// Save all metadata in first split only
|
|
231
|
-
if (i_split == 0) {
|
|
232
|
-
gguf_set_kv(ctx_out, ctx_gguf);
|
|
233
|
-
}
|
|
234
|
-
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
|
|
235
|
-
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
|
|
236
|
-
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
|
|
237
|
-
};
|
|
238
|
-
|
|
239
|
-
// initialize ctx_out for the first split
|
|
240
|
-
new_ctx_out(false);
|
|
241
|
-
|
|
242
|
-
// skip first split if no_tensor_first_split is set
|
|
243
|
-
if (params.no_tensor_first_split) {
|
|
244
|
-
new_ctx_out(true);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// process tensors one by one
|
|
248
|
-
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
|
249
|
-
for (int i = 0; i < n_tensors; ++i) {
|
|
250
|
-
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
|
251
|
-
// calculate the "imaginary" size = the current size + next tensor size
|
|
252
|
-
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
|
253
|
-
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
|
254
|
-
if (should_split(i, next_tensors_size)) {
|
|
255
|
-
new_ctx_out(false);
|
|
256
|
-
curr_tensors_size = n_bytes;
|
|
257
|
-
} else {
|
|
258
|
-
curr_tensors_size = next_tensors_size;
|
|
259
|
-
}
|
|
260
|
-
gguf_add_tensor(ctx_out, t);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// push the last ctx_out
|
|
264
|
-
ctx_outs.push_back(ctx_out);
|
|
265
|
-
|
|
266
|
-
// set the correct n_split for all ctx_out
|
|
267
|
-
for (auto & ctx : ctx_outs) {
|
|
268
|
-
gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
~split_strategy() {
|
|
273
|
-
for (auto & ctx_out : ctx_outs) {
|
|
274
|
-
gguf_free(ctx_out);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
bool should_split(int i_tensor, size_t next_size) {
|
|
279
|
-
if (params.mode == MODE_SIZE) {
|
|
280
|
-
// split by max size per file
|
|
281
|
-
return next_size > params.n_bytes_split;
|
|
282
|
-
} else if (params.mode == MODE_TENSOR) {
|
|
283
|
-
// split by number of tensors per file
|
|
284
|
-
return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
|
285
|
-
}
|
|
286
|
-
// should never happen
|
|
287
|
-
GGML_ABORT("invalid mode");
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
void print_info() {
|
|
291
|
-
printf("n_split: %zu\n", ctx_outs.size());
|
|
292
|
-
int i_split = 0;
|
|
293
|
-
for (auto & ctx_out : ctx_outs) {
|
|
294
|
-
// re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
|
|
295
|
-
size_t total_size = gguf_get_meta_size(ctx_out);
|
|
296
|
-
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
|
297
|
-
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
|
|
298
|
-
total_size += ggml_nbytes(t);
|
|
299
|
-
}
|
|
300
|
-
total_size = total_size / 1000 / 1000; // convert to megabytes
|
|
301
|
-
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
|
302
|
-
i_split++;
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
void write() {
|
|
307
|
-
int i_split = 0;
|
|
308
|
-
int n_split = ctx_outs.size();
|
|
309
|
-
for (auto & ctx_out : ctx_outs) {
|
|
310
|
-
// construct file path
|
|
311
|
-
char split_path[PATH_MAX] = {0};
|
|
312
|
-
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
|
313
|
-
|
|
314
|
-
// open the output file
|
|
315
|
-
printf("Writing file %s ... ", split_path);
|
|
316
|
-
fflush(stdout);
|
|
317
|
-
std::ofstream fout = std::ofstream(split_path, std::ios::binary);
|
|
318
|
-
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
319
|
-
|
|
320
|
-
// write metadata
|
|
321
|
-
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
|
322
|
-
gguf_get_meta_data(ctx_out, data.data());
|
|
323
|
-
fout.write((const char *)data.data(), data.size());
|
|
324
|
-
|
|
325
|
-
// write tensors
|
|
326
|
-
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
|
327
|
-
// read tensor meta and prepare buffer
|
|
328
|
-
const char * t_name = gguf_get_tensor_name(ctx_out, i);
|
|
329
|
-
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
330
|
-
auto n_bytes = ggml_nbytes(t);
|
|
331
|
-
read_buf.resize(n_bytes);
|
|
332
|
-
|
|
333
|
-
// calculate offset
|
|
334
|
-
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
|
|
335
|
-
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
|
|
336
|
-
|
|
337
|
-
// copy tensor from input to output file
|
|
338
|
-
copy_file_to_file(f_input, fout, offset, n_bytes);
|
|
339
|
-
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
printf("done\n");
|
|
343
|
-
// close the file
|
|
344
|
-
fout.close();
|
|
345
|
-
i_split++;
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
|
|
350
|
-
// TODO: detect OS and use copy_file_range() here for better performance
|
|
351
|
-
if (read_buf.size() < len) {
|
|
352
|
-
read_buf.resize(len);
|
|
353
|
-
}
|
|
354
|
-
f_in.seekg(in_offset);
|
|
355
|
-
f_in.read((char *)read_buf.data(), len);
|
|
356
|
-
f_out.write((const char *)read_buf.data(), len);
|
|
357
|
-
}
|
|
358
|
-
};
|
|
359
|
-
|
|
360
|
-
static void gguf_split(const split_params & split_params) {
|
|
361
|
-
struct ggml_context * ctx_meta = NULL;
|
|
362
|
-
|
|
363
|
-
struct gguf_init_params params = {
|
|
364
|
-
/*.no_alloc = */ true,
|
|
365
|
-
/*.ctx = */ &ctx_meta,
|
|
366
|
-
};
|
|
367
|
-
|
|
368
|
-
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
|
|
369
|
-
if (!f_input.is_open()) {
|
|
370
|
-
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
371
|
-
exit(EXIT_FAILURE);
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
|
|
375
|
-
if (!ctx_gguf) {
|
|
376
|
-
fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
377
|
-
exit(EXIT_FAILURE);
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
// prepare the strategy
|
|
381
|
-
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
|
382
|
-
int n_split = strategy.ctx_outs.size();
|
|
383
|
-
strategy.print_info();
|
|
384
|
-
|
|
385
|
-
if (!split_params.dry_run) {
|
|
386
|
-
// write all output splits
|
|
387
|
-
strategy.write();
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
// done, clean up
|
|
391
|
-
gguf_free(ctx_gguf);
|
|
392
|
-
f_input.close();
|
|
393
|
-
|
|
394
|
-
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
|
395
|
-
__func__, n_split, strategy.n_tensors);
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
static void gguf_merge(const split_params & split_params) {
|
|
399
|
-
fprintf(stderr, "%s: %s -> %s\n",
|
|
400
|
-
__func__, split_params.input.c_str(),
|
|
401
|
-
split_params.output.c_str());
|
|
402
|
-
int n_split = 1;
|
|
403
|
-
int total_tensors = 0;
|
|
404
|
-
|
|
405
|
-
// avoid overwriting existing output file
|
|
406
|
-
if (std::ifstream(split_params.output.c_str())) {
|
|
407
|
-
fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str());
|
|
408
|
-
exit(EXIT_FAILURE);
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
auto * ctx_out = gguf_init_empty();
|
|
413
|
-
|
|
414
|
-
std::vector<uint8_t> read_data;
|
|
415
|
-
std::vector<ggml_context *> ctx_metas;
|
|
416
|
-
std::vector<gguf_context *> ctx_ggufs;
|
|
417
|
-
|
|
418
|
-
char split_path[PATH_MAX] = {0};
|
|
419
|
-
strncpy(split_path, split_params.input.c_str(), sizeof(split_path) - 1);
|
|
420
|
-
char split_prefix[PATH_MAX] = {0};
|
|
421
|
-
|
|
422
|
-
// First pass to find KV and tensors metadata
|
|
423
|
-
for (int i_split = 0; i_split < n_split; i_split++) {
|
|
424
|
-
struct ggml_context * ctx_meta = NULL;
|
|
425
|
-
|
|
426
|
-
struct gguf_init_params params = {
|
|
427
|
-
/*.no_alloc = */ true,
|
|
428
|
-
/*.ctx = */ &ctx_meta,
|
|
429
|
-
};
|
|
430
|
-
|
|
431
|
-
if (i_split > 0) {
|
|
432
|
-
llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
|
|
433
|
-
}
|
|
434
|
-
fprintf(stderr, "%s: reading metadata %s ...", __func__, split_path);
|
|
435
|
-
|
|
436
|
-
auto * ctx_gguf = gguf_init_from_file(split_path, params);
|
|
437
|
-
if (!ctx_gguf) {
|
|
438
|
-
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
439
|
-
exit(EXIT_FAILURE);
|
|
440
|
-
}
|
|
441
|
-
ctx_ggufs.push_back(ctx_gguf);
|
|
442
|
-
ctx_metas.push_back(ctx_meta);
|
|
443
|
-
|
|
444
|
-
if (i_split == 0) {
|
|
445
|
-
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
|
|
446
|
-
if (key_n_split < 0) {
|
|
447
|
-
fprintf(stderr,
|
|
448
|
-
"\n%s: input file does not contain %s metadata\n",
|
|
449
|
-
__func__,
|
|
450
|
-
LLM_KV_SPLIT_COUNT);
|
|
451
|
-
gguf_free(ctx_gguf);
|
|
452
|
-
ggml_free(ctx_meta);
|
|
453
|
-
gguf_free(ctx_out);
|
|
454
|
-
exit(EXIT_FAILURE);
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
|
|
458
|
-
if (n_split < 1) {
|
|
459
|
-
fprintf(stderr,
|
|
460
|
-
"\n%s: input file does not contain a valid split count %d\n",
|
|
461
|
-
__func__,
|
|
462
|
-
n_split);
|
|
463
|
-
gguf_free(ctx_gguf);
|
|
464
|
-
ggml_free(ctx_meta);
|
|
465
|
-
gguf_free(ctx_out);
|
|
466
|
-
exit(EXIT_FAILURE);
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
// Verify the file naming and extract split_prefix
|
|
470
|
-
if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
|
|
471
|
-
fprintf(stderr, "\n%s: unexpected input file name: %s"
|
|
472
|
-
" i_split=%d"
|
|
473
|
-
" n_split=%d\n", __func__,
|
|
474
|
-
split_path, i_split, n_split);
|
|
475
|
-
gguf_free(ctx_gguf);
|
|
476
|
-
ggml_free(ctx_meta);
|
|
477
|
-
gguf_free(ctx_out);
|
|
478
|
-
exit(EXIT_FAILURE);
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// Do not trigger merge if we try to merge again the output
|
|
482
|
-
gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
|
|
483
|
-
|
|
484
|
-
// Set metadata from the first split
|
|
485
|
-
gguf_set_kv(ctx_out, ctx_gguf);
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
|
489
|
-
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
|
490
|
-
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
|
491
|
-
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
492
|
-
gguf_add_tensor(ctx_out, t);
|
|
493
|
-
}
|
|
494
|
-
total_tensors += n_tensors;
|
|
495
|
-
|
|
496
|
-
fprintf(stderr, "\033[3Ddone\n");
|
|
497
|
-
}
|
|
498
|
-
std::ofstream fout;
|
|
499
|
-
if (!split_params.dry_run) {
|
|
500
|
-
fout.open(split_params.output.c_str(), std::ios::binary);
|
|
501
|
-
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
502
|
-
// placeholder for the meta data
|
|
503
|
-
auto meta_size = gguf_get_meta_size(ctx_out);
|
|
504
|
-
::zeros(fout, meta_size);
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
// Write tensors data
|
|
508
|
-
for (int i_split = 0; i_split < n_split; i_split++) {
|
|
509
|
-
llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
|
|
510
|
-
std::ifstream f_input(split_path, std::ios::binary);
|
|
511
|
-
if (!f_input.is_open()) {
|
|
512
|
-
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path);
|
|
513
|
-
for (uint32_t i = 0; i < ctx_ggufs.size(); i++) {
|
|
514
|
-
gguf_free(ctx_ggufs[i]);
|
|
515
|
-
ggml_free(ctx_metas[i]);
|
|
516
|
-
}
|
|
517
|
-
gguf_free(ctx_out);
|
|
518
|
-
if (!split_params.dry_run) {
|
|
519
|
-
fout.close();
|
|
520
|
-
}
|
|
521
|
-
exit(EXIT_FAILURE);
|
|
522
|
-
}
|
|
523
|
-
fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
|
|
524
|
-
|
|
525
|
-
auto * ctx_gguf = ctx_ggufs[i_split];
|
|
526
|
-
auto * ctx_meta = ctx_metas[i_split];
|
|
527
|
-
|
|
528
|
-
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
|
529
|
-
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
|
530
|
-
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
|
531
|
-
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
532
|
-
|
|
533
|
-
auto n_bytes = ggml_nbytes(t);
|
|
534
|
-
|
|
535
|
-
if (read_data.size() < n_bytes) {
|
|
536
|
-
read_data.resize(n_bytes);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
|
540
|
-
f_input.seekg(offset);
|
|
541
|
-
f_input.read((char *)read_data.data(), n_bytes);
|
|
542
|
-
if (!split_params.dry_run) {
|
|
543
|
-
// write tensor data + padding
|
|
544
|
-
fout.write((const char *)read_data.data(), n_bytes);
|
|
545
|
-
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
gguf_free(ctx_gguf);
|
|
550
|
-
ggml_free(ctx_meta);
|
|
551
|
-
f_input.close();
|
|
552
|
-
fprintf(stderr, "\033[3Ddone\n");
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
if (!split_params.dry_run) {
|
|
556
|
-
// go back to beginning of file and write the updated metadata
|
|
557
|
-
fout.seekp(0);
|
|
558
|
-
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
|
559
|
-
gguf_get_meta_data(ctx_out, data.data());
|
|
560
|
-
fout.write((const char *)data.data(), data.size());
|
|
561
|
-
fout.close();
|
|
562
|
-
}
|
|
563
|
-
gguf_free(ctx_out);
|
|
564
|
-
|
|
565
|
-
fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
|
|
566
|
-
__func__, split_params.output.c_str(), n_split, total_tensors);
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
int main(int argc, const char ** argv) {
|
|
570
|
-
split_params params;
|
|
571
|
-
split_params_parse(argc, argv, params);
|
|
572
|
-
|
|
573
|
-
switch (params.operation) {
|
|
574
|
-
case OP_SPLIT: gguf_split(params);
|
|
575
|
-
break;
|
|
576
|
-
case OP_MERGE: gguf_merge(params);
|
|
577
|
-
break;
|
|
578
|
-
default: split_print_usage(argv[0]);
|
|
579
|
-
exit(EXIT_FAILURE);
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
return 0;
|
|
583
|
-
}
|