@fugood/llama.node 0.6.3 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +40 -30
- package/README.md +4 -1
- package/lib/binding.js +41 -29
- package/lib/binding.ts +26 -25
- package/package.json +40 -7
- package/scripts/build.js +47 -0
- package/scripts/llama.cpp.patch +109 -0
- package/src/anyascii.c +22223 -0
- package/src/anyascii.h +42 -0
- package/src/tts_utils.cpp +20 -7
- package/src/tts_utils.h +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
- package/src/llama.cpp/.github/workflows/build.yml +0 -1078
- package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
- package/src/llama.cpp/.github/workflows/docker.yml +0 -178
- package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
- package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
- package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
- package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
- package/src/llama.cpp/.github/workflows/release.yml +0 -739
- package/src/llama.cpp/.github/workflows/server.yml +0 -237
- package/src/llama.cpp/.github/workflows/winget.yml +0 -42
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
- package/src/llama.cpp/cmake/build-info.cmake +0 -64
- package/src/llama.cpp/cmake/common.cmake +0 -35
- package/src/llama.cpp/cmake/git-vars.cmake +0 -22
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
- package/src/llama.cpp/common/build-info.cpp.in +0 -4
- package/src/llama.cpp/docs/build.md +0 -561
- package/src/llama.cpp/examples/CMakeLists.txt +0 -43
- package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/batched/batched.cpp +0 -246
- package/src/llama.cpp/examples/chat-13B.bat +0 -57
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
- package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
- package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple/simple.cpp +0 -206
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/sycl/build.sh +0 -23
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
- package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
- package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
- package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/training/finetune.cpp +0 -96
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
- package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
- package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
- package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
- package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
- package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
- package/src/llama.cpp/ggml/src/ggml.c +0 -6550
- package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
- package/src/llama.cpp/models/.editorconfig +0 -1
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
- package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
- package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
- package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
- package/src/llama.cpp/prompts/alpaca.txt +0 -1
- package/src/llama.cpp/prompts/assistant.txt +0 -31
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
- package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
- package/src/llama.cpp/prompts/chat.txt +0 -28
- package/src/llama.cpp/prompts/dan-modified.txt +0 -1
- package/src/llama.cpp/prompts/dan.txt +0 -1
- package/src/llama.cpp/prompts/mnemonics.txt +0 -93
- package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
- package/src/llama.cpp/prompts/reason-act.txt +0 -18
- package/src/llama.cpp/requirements/requirements-all.txt +0 -15
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
- package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
- package/src/llama.cpp/requirements.txt +0 -13
- package/src/llama.cpp/scripts/build-info.sh +0 -30
- package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
- package/src/llama.cpp/scripts/xxd.cmake +0 -16
- package/src/llama.cpp/tests/CMakeLists.txt +0 -177
- package/src/llama.cpp/tests/get-model.cpp +0 -21
- package/src/llama.cpp/tests/get-model.h +0 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
- package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
- package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
- package/src/llama.cpp/tests/test-barrier.cpp +0 -94
- package/src/llama.cpp/tests/test-c.c +0 -7
- package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
- package/src/llama.cpp/tests/test-chat.cpp +0 -985
- package/src/llama.cpp/tests/test-double-float.cpp +0 -57
- package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
- package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
- package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
- package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
- package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
- package/src/llama.cpp/tests/test-log.cpp +0 -39
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
- package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
- package/src/llama.cpp/tests/test-opt.cpp +0 -904
- package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
- package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
- package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
- package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
- package/src/llama.cpp/tests/test-rope.cpp +0 -262
- package/src/llama.cpp/tests/test-sampling.cpp +0 -399
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
- package/src/llama.cpp/tools/CMakeLists.txt +0 -39
- package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
- package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
- package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
- package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
- package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
- package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
- package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
- package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
- package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
- package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
- package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
- package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/main/main.cpp +0 -977
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
- package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
- package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
- package/src/llama.cpp/tools/mtmd/clip.h +0 -101
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
- package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
- package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
- package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
- package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
- package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
- package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
- package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
- package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
- package/src/llama.cpp/tools/run/run.cpp +0 -1261
- package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
- package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
- package/src/llama.cpp/tools/server/httplib.h +0 -10506
- package/src/llama.cpp/tools/server/server.cpp +0 -4966
- package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
- package/src/llama.cpp/tools/server/utils.hpp +0 -1337
- package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
- package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
- package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
// Warns users that this filename was deprecated, and provides a link for more information.
|
|
2
|
-
|
|
3
|
-
#include <cstdio>
|
|
4
|
-
#include <string>
|
|
5
|
-
#include <unordered_map>
|
|
6
|
-
|
|
7
|
-
// Main
|
|
8
|
-
int main(int argc, char** argv) {
|
|
9
|
-
std::string filename = "main";
|
|
10
|
-
if (argc >= 1) {
|
|
11
|
-
filename = argv[0];
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
// Get only the program name from the full path
|
|
15
|
-
auto pos = filename.find_last_of("/\\");
|
|
16
|
-
if (pos != std::string::npos) {
|
|
17
|
-
filename = filename.substr(pos+1);
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// Append "llama-" to the beginning of filename to get the replacemnt filename
|
|
21
|
-
auto replacement_filename = "llama-" + filename;
|
|
22
|
-
|
|
23
|
-
// The exception is if the filename is "main", then our replacement filename is "llama-cli"
|
|
24
|
-
if (filename == "main") {
|
|
25
|
-
replacement_filename = "llama-cli";
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
fprintf(stdout, "\n");
|
|
29
|
-
fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
|
|
30
|
-
fprintf(stdout, " Please use '%s' instead.\n", replacement_filename.c_str());
|
|
31
|
-
fprintf(stdout, " See https://github.com/ggerganov/llama.cpp/tree/master/examples/deprecation-warning/README.md for more information.\n");
|
|
32
|
-
fprintf(stdout, "\n");
|
|
33
|
-
|
|
34
|
-
return EXIT_FAILURE;
|
|
35
|
-
}
|
|
@@ -1,323 +0,0 @@
|
|
|
1
|
-
#include "arg.h"
|
|
2
|
-
#include "common.h"
|
|
3
|
-
#include "log.h"
|
|
4
|
-
#include "llama.h"
|
|
5
|
-
|
|
6
|
-
#include <ctime>
|
|
7
|
-
#include <algorithm>
|
|
8
|
-
|
|
9
|
-
#if defined(_MSC_VER)
|
|
10
|
-
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
11
|
-
#endif
|
|
12
|
-
|
|
13
|
-
static std::vector<std::string> split_lines(const std::string & s, const std::string & separator = "\n") {
|
|
14
|
-
std::vector<std::string> lines;
|
|
15
|
-
size_t start = 0;
|
|
16
|
-
size_t end = s.find(separator);
|
|
17
|
-
|
|
18
|
-
while (end != std::string::npos) {
|
|
19
|
-
lines.push_back(s.substr(start, end - start));
|
|
20
|
-
start = end + separator.length();
|
|
21
|
-
end = s.find(separator, start);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
lines.push_back(s.substr(start)); // Add the last part
|
|
25
|
-
|
|
26
|
-
return lines;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, llama_seq_id seq_id) {
|
|
30
|
-
size_t n_tokens = tokens.size();
|
|
31
|
-
for (size_t i = 0; i < n_tokens; i++) {
|
|
32
|
-
common_batch_add(batch, tokens[i], i, { seq_id }, true);
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) {
|
|
37
|
-
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
|
38
|
-
|
|
39
|
-
// clear previous kv_cache values (irrelevant for embeddings)
|
|
40
|
-
llama_kv_self_clear(ctx);
|
|
41
|
-
|
|
42
|
-
// run model
|
|
43
|
-
LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq);
|
|
44
|
-
if (llama_encode(ctx, batch) < 0) {
|
|
45
|
-
LOG_ERR("%s : failed to encode\n", __func__);
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
for (int i = 0; i < batch.n_tokens; i++) {
|
|
49
|
-
if (!batch.logits[i]) {
|
|
50
|
-
continue;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
const float * embd = nullptr;
|
|
54
|
-
int embd_pos = 0;
|
|
55
|
-
|
|
56
|
-
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
|
57
|
-
// try to get token embeddings
|
|
58
|
-
embd = llama_get_embeddings_ith(ctx, i);
|
|
59
|
-
embd_pos = i;
|
|
60
|
-
GGML_ASSERT(embd != NULL && "failed to get token embeddings");
|
|
61
|
-
} else {
|
|
62
|
-
// try to get sequence embeddings - supported only when pooling_type is not NONE
|
|
63
|
-
embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
|
64
|
-
embd_pos = batch.seq_id[i][0];
|
|
65
|
-
GGML_ASSERT(embd != NULL && "failed to get sequence embeddings");
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
float * out = output + embd_pos * n_embd;
|
|
69
|
-
common_embd_normalize(embd, out, n_embd, embd_norm);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
int main(int argc, char ** argv) {
|
|
74
|
-
common_params params;
|
|
75
|
-
|
|
76
|
-
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EMBEDDING)) {
|
|
77
|
-
return 1;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
common_init();
|
|
81
|
-
|
|
82
|
-
params.embedding = true;
|
|
83
|
-
|
|
84
|
-
// utilize the full context
|
|
85
|
-
if (params.n_batch < params.n_ctx) {
|
|
86
|
-
LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);
|
|
87
|
-
params.n_batch = params.n_ctx;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// For non-causal models, batch size must be equal to ubatch size
|
|
91
|
-
params.n_ubatch = params.n_batch;
|
|
92
|
-
|
|
93
|
-
llama_backend_init();
|
|
94
|
-
llama_numa_init(params.numa);
|
|
95
|
-
|
|
96
|
-
// load the model
|
|
97
|
-
common_init_result llama_init = common_init_from_params(params);
|
|
98
|
-
|
|
99
|
-
llama_model * model = llama_init.model.get();
|
|
100
|
-
llama_context * ctx = llama_init.context.get();
|
|
101
|
-
|
|
102
|
-
if (model == NULL) {
|
|
103
|
-
LOG_ERR("%s: unable to load model\n", __func__);
|
|
104
|
-
return 1;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
108
|
-
|
|
109
|
-
const int n_ctx_train = llama_model_n_ctx_train(model);
|
|
110
|
-
const int n_ctx = llama_n_ctx(ctx);
|
|
111
|
-
|
|
112
|
-
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
|
113
|
-
|
|
114
|
-
if (llama_model_has_encoder(model) && llama_model_has_decoder(model)) {
|
|
115
|
-
LOG_ERR("%s: computing embeddings in encoder-decoder models is not supported\n", __func__);
|
|
116
|
-
return 1;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
if (n_ctx > n_ctx_train) {
|
|
120
|
-
LOG_WRN("%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
|
121
|
-
__func__, n_ctx_train, n_ctx);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// print system information
|
|
125
|
-
{
|
|
126
|
-
LOG_INF("\n");
|
|
127
|
-
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// split the prompt into lines
|
|
131
|
-
std::vector<std::string> prompts = split_lines(params.prompt, params.embd_sep);
|
|
132
|
-
|
|
133
|
-
// max batch size
|
|
134
|
-
const uint64_t n_batch = params.n_batch;
|
|
135
|
-
|
|
136
|
-
// tokenize the prompts and trim
|
|
137
|
-
std::vector<std::vector<int32_t>> inputs;
|
|
138
|
-
for (const auto & prompt : prompts) {
|
|
139
|
-
auto inp = common_tokenize(ctx, prompt, true, true);
|
|
140
|
-
if (inp.size() > n_batch) {
|
|
141
|
-
LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
|
|
142
|
-
__func__, (long long int) inp.size(), (long long int) n_batch);
|
|
143
|
-
return 1;
|
|
144
|
-
}
|
|
145
|
-
inputs.push_back(inp);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// check if the last token is SEP
|
|
149
|
-
// it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true'
|
|
150
|
-
for (auto & inp : inputs) {
|
|
151
|
-
if (inp.empty() || inp.back() != llama_vocab_sep(vocab)) {
|
|
152
|
-
LOG_WRN("%s: last token in the prompt is not SEP\n", __func__);
|
|
153
|
-
LOG_WRN("%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__);
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
// tokenization stats
|
|
158
|
-
if (params.verbose_prompt) {
|
|
159
|
-
for (int i = 0; i < (int) inputs.size(); i++) {
|
|
160
|
-
LOG_INF("%s: prompt %d: '%s'\n", __func__, i, prompts[i].c_str());
|
|
161
|
-
LOG_INF("%s: number of tokens in prompt = %zu\n", __func__, inputs[i].size());
|
|
162
|
-
for (int j = 0; j < (int) inputs[i].size(); j++) {
|
|
163
|
-
LOG("%6d -> '%s'\n", inputs[i][j], common_token_to_piece(ctx, inputs[i][j]).c_str());
|
|
164
|
-
}
|
|
165
|
-
LOG("\n\n");
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// initialize batch
|
|
170
|
-
const int n_prompts = prompts.size();
|
|
171
|
-
struct llama_batch batch = llama_batch_init(n_batch, 0, 1);
|
|
172
|
-
|
|
173
|
-
// count number of embeddings
|
|
174
|
-
int n_embd_count = 0;
|
|
175
|
-
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
|
176
|
-
for (int k = 0; k < n_prompts; k++) {
|
|
177
|
-
n_embd_count += inputs[k].size();
|
|
178
|
-
}
|
|
179
|
-
} else {
|
|
180
|
-
n_embd_count = n_prompts;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// allocate output
|
|
184
|
-
const int n_embd = llama_model_n_embd(model);
|
|
185
|
-
std::vector<float> embeddings(n_embd_count * n_embd, 0);
|
|
186
|
-
float * emb = embeddings.data();
|
|
187
|
-
|
|
188
|
-
// break into batches
|
|
189
|
-
int e = 0; // number of embeddings already stored
|
|
190
|
-
int s = 0; // number of prompts in current batch
|
|
191
|
-
for (int k = 0; k < n_prompts; k++) {
|
|
192
|
-
// clamp to n_batch tokens
|
|
193
|
-
auto & inp = inputs[k];
|
|
194
|
-
|
|
195
|
-
const uint64_t n_toks = inp.size();
|
|
196
|
-
|
|
197
|
-
// encode if at capacity
|
|
198
|
-
if (batch.n_tokens + n_toks > n_batch) {
|
|
199
|
-
float * out = emb + e * n_embd;
|
|
200
|
-
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
|
|
201
|
-
e += pooling_type == LLAMA_POOLING_TYPE_NONE ? batch.n_tokens : s;
|
|
202
|
-
s = 0;
|
|
203
|
-
common_batch_clear(batch);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// add to batch
|
|
207
|
-
batch_add_seq(batch, inp, s);
|
|
208
|
-
s += 1;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// final batch
|
|
212
|
-
float * out = emb + e * n_embd;
|
|
213
|
-
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
|
|
214
|
-
|
|
215
|
-
if (params.embd_out.empty()) {
|
|
216
|
-
LOG("\n");
|
|
217
|
-
|
|
218
|
-
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
|
219
|
-
for (int j = 0; j < n_embd_count; j++) {
|
|
220
|
-
LOG("embedding %d: ", j);
|
|
221
|
-
for (int i = 0; i < std::min(3, n_embd); i++) {
|
|
222
|
-
if (params.embd_normalize == 0) {
|
|
223
|
-
LOG("%6.0f ", emb[j * n_embd + i]);
|
|
224
|
-
} else {
|
|
225
|
-
LOG("%9.6f ", emb[j * n_embd + i]);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
LOG(" ... ");
|
|
229
|
-
for (int i = n_embd - 3; i < n_embd; i++) {
|
|
230
|
-
if (params.embd_normalize == 0) {
|
|
231
|
-
LOG("%6.0f ", emb[j * n_embd + i]);
|
|
232
|
-
} else {
|
|
233
|
-
LOG("%9.6f ", emb[j * n_embd + i]);
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
LOG("\n");
|
|
237
|
-
}
|
|
238
|
-
} else if (pooling_type == LLAMA_POOLING_TYPE_RANK) {
|
|
239
|
-
for (int j = 0; j < n_embd_count; j++) {
|
|
240
|
-
// NOTE: if you change this log - update the tests in ci/run.sh
|
|
241
|
-
LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]);
|
|
242
|
-
}
|
|
243
|
-
} else {
|
|
244
|
-
// print the first part of the embeddings or for a single prompt, the full embedding
|
|
245
|
-
for (int j = 0; j < n_prompts; j++) {
|
|
246
|
-
LOG("embedding %d: ", j);
|
|
247
|
-
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
|
|
248
|
-
if (params.embd_normalize == 0) {
|
|
249
|
-
LOG("%6.0f ", emb[j * n_embd + i]);
|
|
250
|
-
} else {
|
|
251
|
-
LOG("%9.6f ", emb[j * n_embd + i]);
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
LOG("\n");
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// print cosine similarity matrix
|
|
258
|
-
if (n_prompts > 1) {
|
|
259
|
-
LOG("\n");
|
|
260
|
-
LOG("cosine similarity matrix:\n\n");
|
|
261
|
-
for (int i = 0; i < n_prompts; i++) {
|
|
262
|
-
LOG("%6.6s ", prompts[i].c_str());
|
|
263
|
-
}
|
|
264
|
-
LOG("\n");
|
|
265
|
-
for (int i = 0; i < n_prompts; i++) {
|
|
266
|
-
for (int j = 0; j < n_prompts; j++) {
|
|
267
|
-
float sim = common_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
|
268
|
-
LOG("%6.2f ", sim);
|
|
269
|
-
}
|
|
270
|
-
LOG("%1.10s", prompts[i].c_str());
|
|
271
|
-
LOG("\n");
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
if (params.embd_out == "json" || params.embd_out == "json+" || params.embd_out == "array") {
|
|
278
|
-
const bool notArray = params.embd_out != "array";
|
|
279
|
-
|
|
280
|
-
LOG(notArray ? "{\n \"object\": \"list\",\n \"data\": [\n" : "[");
|
|
281
|
-
for (int j = 0;;) { // at least one iteration (one prompt)
|
|
282
|
-
if (notArray) LOG(" {\n \"object\": \"embedding\",\n \"index\": %d,\n \"embedding\": ",j);
|
|
283
|
-
LOG("[");
|
|
284
|
-
for (int i = 0;;) { // at least one iteration (n_embd > 0)
|
|
285
|
-
LOG(params.embd_normalize == 0 ? "%1.0f" : "%1.7f", emb[j * n_embd + i]);
|
|
286
|
-
i++;
|
|
287
|
-
if (i < n_embd) LOG(","); else break;
|
|
288
|
-
}
|
|
289
|
-
LOG(notArray ? "]\n }" : "]");
|
|
290
|
-
j++;
|
|
291
|
-
if (j < n_embd_count) LOG(notArray ? ",\n" : ","); else break;
|
|
292
|
-
}
|
|
293
|
-
LOG(notArray ? "\n ]" : "]\n");
|
|
294
|
-
|
|
295
|
-
if (params.embd_out == "json+" && n_prompts > 1) {
|
|
296
|
-
LOG(",\n \"cosineSimilarity\": [\n");
|
|
297
|
-
for (int i = 0;;) { // at least two iteration (n_embd_count > 1)
|
|
298
|
-
LOG(" [");
|
|
299
|
-
for (int j = 0;;) { // at least two iteration (n_embd_count > 1)
|
|
300
|
-
float sim = common_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
|
301
|
-
LOG("%6.2f", sim);
|
|
302
|
-
j++;
|
|
303
|
-
if (j < n_embd_count) LOG(", "); else break;
|
|
304
|
-
}
|
|
305
|
-
LOG(" ]");
|
|
306
|
-
i++;
|
|
307
|
-
if (i < n_embd_count) LOG(",\n"); else break;
|
|
308
|
-
}
|
|
309
|
-
LOG("\n ]");
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
if (notArray) LOG("\n}\n");
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
LOG("\n");
|
|
316
|
-
llama_perf_context_print(ctx);
|
|
317
|
-
|
|
318
|
-
// clean up
|
|
319
|
-
llama_batch_free(batch);
|
|
320
|
-
llama_backend_free();
|
|
321
|
-
|
|
322
|
-
return 0;
|
|
323
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
set(TARGET llama-eval-callback)
|
|
2
|
-
add_executable(${TARGET} eval-callback.cpp)
|
|
3
|
-
install(TARGETS ${TARGET} RUNTIME)
|
|
4
|
-
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
5
|
-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
6
|
-
|
|
7
|
-
set(TEST_TARGET test-eval-callback)
|
|
8
|
-
add_test(NAME ${TEST_TARGET}
|
|
9
|
-
COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
|
|
10
|
-
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
#include "arg.h"
|
|
2
|
-
#include "common.h"
|
|
3
|
-
#include "log.h"
|
|
4
|
-
#include "llama.h"
|
|
5
|
-
#include "ggml.h"
|
|
6
|
-
|
|
7
|
-
#include <cstdio>
|
|
8
|
-
#include <string>
|
|
9
|
-
#include <vector>
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* This the arbitrary data which will be passed to each callback.
|
|
13
|
-
* Later on we can for example add operation or tensor name filter from the CLI arg, or a file descriptor to dump the tensor.
|
|
14
|
-
*/
|
|
15
|
-
struct callback_data {
|
|
16
|
-
std::vector<uint8_t> data;
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
static std::string ggml_ne_string(const ggml_tensor * t) {
|
|
20
|
-
std::string str;
|
|
21
|
-
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
|
22
|
-
str += std::to_string(t->ne[i]);
|
|
23
|
-
if (i + 1 < GGML_MAX_DIMS) {
|
|
24
|
-
str += ", ";
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
return str;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
|
|
31
|
-
GGML_ASSERT(n > 0);
|
|
32
|
-
float sum = 0;
|
|
33
|
-
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
|
|
34
|
-
LOG(" [\n");
|
|
35
|
-
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
|
|
36
|
-
if (i2 == n && ne[2] > 2*n) {
|
|
37
|
-
LOG(" ..., \n");
|
|
38
|
-
i2 = ne[2] - n;
|
|
39
|
-
}
|
|
40
|
-
LOG(" [\n");
|
|
41
|
-
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
|
|
42
|
-
if (i1 == n && ne[1] > 2*n) {
|
|
43
|
-
LOG(" ..., \n");
|
|
44
|
-
i1 = ne[1] - n;
|
|
45
|
-
}
|
|
46
|
-
LOG(" [");
|
|
47
|
-
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
|
|
48
|
-
if (i0 == n && ne[0] > 2*n) {
|
|
49
|
-
LOG("..., ");
|
|
50
|
-
i0 = ne[0] - n;
|
|
51
|
-
}
|
|
52
|
-
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
|
|
53
|
-
float v;
|
|
54
|
-
if (type == GGML_TYPE_F16) {
|
|
55
|
-
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
|
|
56
|
-
} else if (type == GGML_TYPE_F32) {
|
|
57
|
-
v = *(float *) &data[i];
|
|
58
|
-
} else if (type == GGML_TYPE_I32) {
|
|
59
|
-
v = (float) *(int32_t *) &data[i];
|
|
60
|
-
} else if (type == GGML_TYPE_I16) {
|
|
61
|
-
v = (float) *(int16_t *) &data[i];
|
|
62
|
-
} else if (type == GGML_TYPE_I8) {
|
|
63
|
-
v = (float) *(int8_t *) &data[i];
|
|
64
|
-
} else {
|
|
65
|
-
GGML_ABORT("fatal error");
|
|
66
|
-
}
|
|
67
|
-
LOG("%12.4f", v);
|
|
68
|
-
sum += v;
|
|
69
|
-
if (i0 < ne[0] - 1) LOG(", ");
|
|
70
|
-
}
|
|
71
|
-
LOG("],\n");
|
|
72
|
-
}
|
|
73
|
-
LOG(" ],\n");
|
|
74
|
-
}
|
|
75
|
-
LOG(" ]\n");
|
|
76
|
-
LOG(" sum = %f\n", sum);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* GGML operations callback during the graph execution.
|
|
82
|
-
*
|
|
83
|
-
* @param t current tensor
|
|
84
|
-
* @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor
|
|
85
|
-
* if we return true, a follow-up call will be made with ask=false in which we can do the actual collection.
|
|
86
|
-
* see ggml_backend_sched_eval_callback
|
|
87
|
-
* @param user_data user data to pass at each call back
|
|
88
|
-
* @return true to receive data or continue the graph, false otherwise
|
|
89
|
-
*/
|
|
90
|
-
static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
|
|
91
|
-
auto * cb_data = (callback_data *) user_data;
|
|
92
|
-
|
|
93
|
-
const struct ggml_tensor * src0 = t->src[0];
|
|
94
|
-
const struct ggml_tensor * src1 = t->src[1];
|
|
95
|
-
|
|
96
|
-
if (ask) {
|
|
97
|
-
return true; // Always retrieve data
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
char src1_str[128] = {0};
|
|
101
|
-
if (src1) {
|
|
102
|
-
snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, ggml_ne_string(src1).c_str());
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__,
|
|
106
|
-
t->name, ggml_type_name(t->type), ggml_op_desc(t),
|
|
107
|
-
src0->name, ggml_ne_string(src0).c_str(),
|
|
108
|
-
src1 ? src1_str : "",
|
|
109
|
-
ggml_ne_string(t).c_str());
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
// copy the data from the GPU memory if needed
|
|
113
|
-
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
|
|
114
|
-
|
|
115
|
-
if (!is_host) {
|
|
116
|
-
auto n_bytes = ggml_nbytes(t);
|
|
117
|
-
cb_data->data.resize(n_bytes);
|
|
118
|
-
ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
if (!ggml_is_quantized(t->type)) {
|
|
122
|
-
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
|
|
123
|
-
ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return true;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
static bool run(llama_context * ctx, const common_params & params) {
|
|
130
|
-
const llama_model * model = llama_get_model(ctx);
|
|
131
|
-
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
132
|
-
|
|
133
|
-
const bool add_bos = llama_vocab_get_add_bos(vocab);
|
|
134
|
-
|
|
135
|
-
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, add_bos);
|
|
136
|
-
|
|
137
|
-
if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size()))) {
|
|
138
|
-
LOG_ERR("%s : failed to eval\n", __func__);
|
|
139
|
-
return false;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return true;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
int main(int argc, char ** argv) {
|
|
146
|
-
callback_data cb_data;
|
|
147
|
-
|
|
148
|
-
common_params params;
|
|
149
|
-
|
|
150
|
-
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
|
151
|
-
return 1;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
common_init();
|
|
155
|
-
|
|
156
|
-
llama_backend_init();
|
|
157
|
-
llama_numa_init(params.numa);
|
|
158
|
-
|
|
159
|
-
// pass the callback to the backend scheduler
|
|
160
|
-
// it will be executed for each node during the graph computation
|
|
161
|
-
params.cb_eval = ggml_debug;
|
|
162
|
-
params.cb_eval_user_data = &cb_data;
|
|
163
|
-
params.warmup = false;
|
|
164
|
-
|
|
165
|
-
// init
|
|
166
|
-
common_init_result llama_init = common_init_from_params(params);
|
|
167
|
-
|
|
168
|
-
llama_model * model = llama_init.model.get();
|
|
169
|
-
llama_context * ctx = llama_init.context.get();
|
|
170
|
-
|
|
171
|
-
if (model == nullptr || ctx == nullptr) {
|
|
172
|
-
LOG_ERR("%s : failed to init\n", __func__);
|
|
173
|
-
return 1;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// print system information
|
|
177
|
-
{
|
|
178
|
-
LOG_INF("\n");
|
|
179
|
-
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
|
|
180
|
-
LOG_INF("\n");
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
bool OK = run(ctx, params);
|
|
184
|
-
if (!OK) {
|
|
185
|
-
return 1;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
LOG("\n");
|
|
189
|
-
llama_perf_context_print(ctx);
|
|
190
|
-
|
|
191
|
-
llama_backend_free();
|
|
192
|
-
|
|
193
|
-
return 0;
|
|
194
|
-
}
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
#include "arg.h"
|
|
2
|
-
#include "common.h"
|
|
3
|
-
|
|
4
|
-
#include <fstream>
|
|
5
|
-
#include <string>
|
|
6
|
-
|
|
7
|
-
// Export usage message (-h) to markdown format
|
|
8
|
-
|
|
9
|
-
static void write_table_header(std::ofstream & file) {
|
|
10
|
-
file << "| Argument | Explanation |\n";
|
|
11
|
-
file << "| -------- | ----------- |\n";
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
static void write_table_entry(std::ofstream & file, const common_arg & opt) {
|
|
15
|
-
file << "| `";
|
|
16
|
-
// args
|
|
17
|
-
for (const auto & arg : opt.args) {
|
|
18
|
-
if (arg == opt.args.front()) {
|
|
19
|
-
file << arg;
|
|
20
|
-
if (opt.args.size() > 1) file << ", ";
|
|
21
|
-
} else {
|
|
22
|
-
file << arg << (arg != opt.args.back() ? ", " : "");
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
// value hint
|
|
26
|
-
if (opt.value_hint) {
|
|
27
|
-
std::string md_value_hint(opt.value_hint);
|
|
28
|
-
string_replace_all(md_value_hint, "|", "\\|");
|
|
29
|
-
file << " " << md_value_hint;
|
|
30
|
-
}
|
|
31
|
-
if (opt.value_hint_2) {
|
|
32
|
-
std::string md_value_hint_2(opt.value_hint_2);
|
|
33
|
-
string_replace_all(md_value_hint_2, "|", "\\|");
|
|
34
|
-
file << " " << md_value_hint_2;
|
|
35
|
-
}
|
|
36
|
-
// help text
|
|
37
|
-
std::string md_help(opt.help);
|
|
38
|
-
string_replace_all(md_help, "\n", "<br/>");
|
|
39
|
-
string_replace_all(md_help, "|", "\\|");
|
|
40
|
-
file << "` | " << md_help << " |\n";
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
static void write_table(std::ofstream & file, std::vector<common_arg *> & opts) {
|
|
44
|
-
write_table_header(file);
|
|
45
|
-
for (const auto & opt : opts) {
|
|
46
|
-
write_table_entry(file, *opt);
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
static void export_md(std::string fname, llama_example ex) {
|
|
51
|
-
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
|
|
52
|
-
|
|
53
|
-
common_params params;
|
|
54
|
-
auto ctx_arg = common_params_parser_init(params, ex);
|
|
55
|
-
|
|
56
|
-
std::vector<common_arg *> common_options;
|
|
57
|
-
std::vector<common_arg *> sparam_options;
|
|
58
|
-
std::vector<common_arg *> specific_options;
|
|
59
|
-
for (auto & opt : ctx_arg.options) {
|
|
60
|
-
// in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
|
|
61
|
-
if (opt.is_sparam) {
|
|
62
|
-
sparam_options.push_back(&opt);
|
|
63
|
-
} else if (opt.in_example(ctx_arg.ex)) {
|
|
64
|
-
specific_options.push_back(&opt);
|
|
65
|
-
} else {
|
|
66
|
-
common_options.push_back(&opt);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
file << "**Common params**\n\n";
|
|
71
|
-
write_table(file, common_options);
|
|
72
|
-
file << "\n\n**Sampling params**\n\n";
|
|
73
|
-
write_table(file, sparam_options);
|
|
74
|
-
file << "\n\n**Example-specific params**\n\n";
|
|
75
|
-
write_table(file, specific_options);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
int main(int, char **) {
|
|
79
|
-
export_md("autogen-main.md", LLAMA_EXAMPLE_MAIN);
|
|
80
|
-
export_md("autogen-server.md", LLAMA_EXAMPLE_SERVER);
|
|
81
|
-
|
|
82
|
-
return 0;
|
|
83
|
-
}
|