@fugood/llama.node 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +8 -1
- package/package.json +1 -1
- package/patches/llama.patch +12 -12
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/LlamaContext.cpp +33 -1
- package/src/LlamaContext.h +1 -0
- package/src/LoadSessionWorker.cpp +1 -0
- package/src/llama.cpp/.github/workflows/bench.yml +310 -0
- package/src/llama.cpp/.github/workflows/build.yml +1315 -0
- package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
- package/src/llama.cpp/.github/workflows/docker.yml +116 -0
- package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
- package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
- package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
- package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
- package/src/llama.cpp/.github/workflows/server.yml +183 -0
- package/src/llama.cpp/CMakeLists.txt +91 -1245
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
- package/src/llama.cpp/cmake/build-info.cmake +58 -0
- package/src/llama.cpp/cmake/git-vars.cmake +22 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -3
- package/src/llama.cpp/common/build-info.cpp.in +4 -0
- package/src/llama.cpp/common/common.cpp +1116 -877
- package/src/llama.cpp/common/common.h +191 -77
- package/src/llama.cpp/common/grammar-parser.cpp +118 -31
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
- package/src/llama.cpp/common/log.h +1 -1
- package/src/llama.cpp/common/ngram-cache.h +10 -3
- package/src/llama.cpp/common/sampling.cpp +19 -10
- package/src/llama.cpp/docs/build.md +353 -0
- package/src/llama.cpp/examples/CMakeLists.txt +22 -22
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +52 -55
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/chat-13B.bat +57 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
- package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
- package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
- package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
- package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
- package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +38 -153
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
- package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
- package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
- package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
- package/src/llama.cpp/examples/llava/clip.cpp +23 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
- package/src/llama.cpp/examples/llava/requirements.txt +3 -2
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +98 -75
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
- package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
- package/src/llama.cpp/examples/server/server.cpp +274 -671
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +28 -29
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +21 -29
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +23 -0
- package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
- package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
- package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
- package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
- package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
- package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
- package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
- package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
- package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
- package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
- package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
- package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
- package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
- package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
- package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
- package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
- package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
- package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
- package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
- package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
- package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
- package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
- package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
- package/src/llama.cpp/models/.editorconfig +1 -0
- package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/requirements/requirements-all.txt +12 -0
- package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
- package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
- package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
- package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
- package/src/llama.cpp/requirements.txt +5 -4
- package/src/llama.cpp/scripts/build-info.sh +30 -0
- package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
- package/src/llama.cpp/src/CMakeLists.txt +33 -0
- package/src/llama.cpp/src/llama-grammar.cpp +539 -0
- package/src/llama.cpp/src/llama-grammar.h +39 -0
- package/src/llama.cpp/src/llama-impl.h +26 -0
- package/src/llama.cpp/src/llama-sampling.cpp +635 -0
- package/src/llama.cpp/src/llama-sampling.h +56 -0
- package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
- package/src/llama.cpp/src/llama-vocab.h +130 -0
- package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
- package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
- package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
- package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
- package/src/llama.cpp/tests/CMakeLists.txt +19 -20
- package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
- package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
- package/src/llama.cpp/tests/test-double-float.cpp +2 -2
- package/src/llama.cpp/tests/test-grad0.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
- package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
- package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
- package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
- package/src/llama.cpp/tests/test-rope.cpp +3 -4
- package/src/llama.cpp/tests/test-sampling.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
- package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
- package/src/llama.cpp/ggml-opencl.cpp +0 -2305
- package/src/llama.cpp/ggml-opencl.h +0 -36
- package/src/llama.cpp/ggml-sycl.cpp +0 -17340
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
- /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
- /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
- /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
- /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
- /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
|
@@ -254,18 +254,8 @@
|
|
|
254
254
|
|
|
255
255
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
256
256
|
|
|
257
|
-
#define GGML_ASSERT(x) \
|
|
258
|
-
do { \
|
|
259
|
-
if (!(x)) { \
|
|
260
|
-
fflush(stdout); \
|
|
261
|
-
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
|
262
|
-
ggml_print_backtrace(); \
|
|
263
|
-
abort(); \
|
|
264
|
-
} \
|
|
265
|
-
} while (0)
|
|
266
|
-
|
|
267
257
|
#ifndef NDEBUG
|
|
268
|
-
#define GGML_UNREACHABLE()
|
|
258
|
+
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
|
269
259
|
#elif defined(__GNUC__)
|
|
270
260
|
#define GGML_UNREACHABLE() __builtin_unreachable()
|
|
271
261
|
#elif defined(_MSC_VER)
|
|
@@ -274,6 +264,17 @@
|
|
|
274
264
|
#define GGML_UNREACHABLE() ((void) 0)
|
|
275
265
|
#endif
|
|
276
266
|
|
|
267
|
+
#ifdef __cplusplus
|
|
268
|
+
#define GGML_NORETURN [[noreturn]]
|
|
269
|
+
#elif defined(_MSC_VER)
|
|
270
|
+
#define GGML_NORETURN __declspec(noreturn)
|
|
271
|
+
#else
|
|
272
|
+
#define GGML_NORETURN _Noreturn
|
|
273
|
+
#endif
|
|
274
|
+
|
|
275
|
+
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
|
276
|
+
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
|
|
277
|
+
|
|
277
278
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
|
278
279
|
// main purpose is to reduce code duplication and improve readability.
|
|
279
280
|
//
|
|
@@ -312,10 +313,19 @@
|
|
|
312
313
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
|
313
314
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
|
314
315
|
|
|
316
|
+
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
|
317
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
|
318
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
|
319
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
|
320
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
|
321
|
+
|
|
315
322
|
#ifdef __cplusplus
|
|
316
323
|
extern "C" {
|
|
317
324
|
#endif
|
|
318
325
|
|
|
326
|
+
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
327
|
+
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
328
|
+
|
|
319
329
|
enum ggml_status {
|
|
320
330
|
GGML_STATUS_ALLOC_FAILED = -2,
|
|
321
331
|
GGML_STATUS_FAILED = -1,
|
|
@@ -377,6 +387,9 @@ extern "C" {
|
|
|
377
387
|
GGML_TYPE_F64 = 28,
|
|
378
388
|
GGML_TYPE_IQ1_M = 29,
|
|
379
389
|
GGML_TYPE_BF16 = 30,
|
|
390
|
+
GGML_TYPE_Q4_0_4_4 = 31,
|
|
391
|
+
GGML_TYPE_Q4_0_4_8 = 32,
|
|
392
|
+
GGML_TYPE_Q4_0_8_8 = 33,
|
|
380
393
|
GGML_TYPE_COUNT,
|
|
381
394
|
};
|
|
382
395
|
|
|
@@ -418,6 +431,9 @@ extern "C" {
|
|
|
418
431
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
419
432
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
420
433
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
434
|
+
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
|
|
435
|
+
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
|
|
436
|
+
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
|
|
421
437
|
};
|
|
422
438
|
|
|
423
439
|
// available tensor operations:
|
|
@@ -585,11 +601,7 @@ extern "C" {
|
|
|
585
601
|
struct ggml_tensor * grad;
|
|
586
602
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
|
587
603
|
|
|
588
|
-
//
|
|
589
|
-
int perf_runs;
|
|
590
|
-
int64_t perf_cycles;
|
|
591
|
-
int64_t perf_time_us;
|
|
592
|
-
|
|
604
|
+
// source tensor and offset for views
|
|
593
605
|
struct ggml_tensor * view_src;
|
|
594
606
|
size_t view_offs;
|
|
595
607
|
|
|
@@ -599,7 +611,7 @@ extern "C" {
|
|
|
599
611
|
|
|
600
612
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
601
613
|
|
|
602
|
-
char padding[
|
|
614
|
+
// char padding[4];
|
|
603
615
|
};
|
|
604
616
|
|
|
605
617
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
|
@@ -628,8 +640,11 @@ extern "C" {
|
|
|
628
640
|
GGML_CGRAPH_EVAL_ORDER_COUNT
|
|
629
641
|
};
|
|
630
642
|
|
|
643
|
+
typedef uint32_t ggml_bitset_t;
|
|
644
|
+
|
|
631
645
|
struct ggml_hash_set {
|
|
632
646
|
size_t size;
|
|
647
|
+
ggml_bitset_t * used;
|
|
633
648
|
struct ggml_tensor ** keys;
|
|
634
649
|
};
|
|
635
650
|
|
|
@@ -643,14 +658,9 @@ extern "C" {
|
|
|
643
658
|
struct ggml_tensor ** grads;
|
|
644
659
|
struct ggml_tensor ** leafs;
|
|
645
660
|
|
|
646
|
-
struct ggml_hash_set
|
|
661
|
+
struct ggml_hash_set visited_hash_set;
|
|
647
662
|
|
|
648
663
|
enum ggml_cgraph_eval_order order;
|
|
649
|
-
|
|
650
|
-
// performance
|
|
651
|
-
int perf_runs;
|
|
652
|
-
int64_t perf_cycles;
|
|
653
|
-
int64_t perf_time_us;
|
|
654
664
|
};
|
|
655
665
|
|
|
656
666
|
// scratch buffer
|
|
@@ -667,28 +677,6 @@ extern "C" {
|
|
|
667
677
|
bool no_alloc; // don't allocate memory for the tensor data
|
|
668
678
|
};
|
|
669
679
|
|
|
670
|
-
|
|
671
|
-
// compute types
|
|
672
|
-
|
|
673
|
-
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
|
674
|
-
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
|
675
|
-
enum ggml_task_type {
|
|
676
|
-
GGML_TASK_TYPE_INIT = 0,
|
|
677
|
-
GGML_TASK_TYPE_COMPUTE,
|
|
678
|
-
GGML_TASK_TYPE_FINALIZE,
|
|
679
|
-
};
|
|
680
|
-
|
|
681
|
-
struct ggml_compute_params {
|
|
682
|
-
enum ggml_task_type type;
|
|
683
|
-
|
|
684
|
-
// ith = thread index, nth = number of threads
|
|
685
|
-
int ith, nth;
|
|
686
|
-
|
|
687
|
-
// work buffer for all threads
|
|
688
|
-
size_t wsize;
|
|
689
|
-
void * wdata;
|
|
690
|
-
};
|
|
691
|
-
|
|
692
680
|
// numa strategies
|
|
693
681
|
enum ggml_numa_strategy {
|
|
694
682
|
GGML_NUMA_STRATEGY_DISABLED = 0,
|
|
@@ -717,8 +705,6 @@ extern "C" {
|
|
|
717
705
|
GGML_API int64_t ggml_cycles(void);
|
|
718
706
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
|
719
707
|
|
|
720
|
-
GGML_API void ggml_print_backtrace(void);
|
|
721
|
-
|
|
722
708
|
// accepts a UTF-8 path, even on Windows
|
|
723
709
|
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
|
724
710
|
|
|
@@ -733,9 +719,9 @@ extern "C" {
|
|
|
733
719
|
GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
|
734
720
|
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
|
735
721
|
|
|
736
|
-
GGML_API GGML_CALL
|
|
737
|
-
GGML_API GGML_CALL size_t
|
|
738
|
-
GGML_API GGML_CALL size_t
|
|
722
|
+
GGML_API GGML_CALL int64_t ggml_blck_size(enum ggml_type type);
|
|
723
|
+
GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
|
|
724
|
+
GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
|
739
725
|
|
|
740
726
|
GGML_DEPRECATED(
|
|
741
727
|
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
|
|
@@ -756,7 +742,6 @@ extern "C" {
|
|
|
756
742
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
|
757
743
|
|
|
758
744
|
GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
|
759
|
-
GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
|
760
745
|
GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
|
761
746
|
GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
|
|
762
747
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
|
@@ -765,9 +750,16 @@ extern "C" {
|
|
|
765
750
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
|
766
751
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
|
767
752
|
|
|
753
|
+
GGML_API GGML_CALL bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
|
754
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
|
755
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
|
756
|
+
GGML_API GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
|
757
|
+
|
|
768
758
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
769
759
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
770
760
|
|
|
761
|
+
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
762
|
+
|
|
771
763
|
// use this to compute the memory overhead of a tensor
|
|
772
764
|
GGML_API size_t ggml_tensor_overhead(void);
|
|
773
765
|
|
|
@@ -1461,7 +1453,6 @@ extern "C" {
|
|
|
1461
1453
|
// rotary position embedding
|
|
1462
1454
|
// if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
|
|
1463
1455
|
// if mode & 2 == 1, GPT-NeoX style
|
|
1464
|
-
// if mode & 4 == 1, ChatGLM style
|
|
1465
1456
|
//
|
|
1466
1457
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
|
1467
1458
|
// c is freq factors (e.g. phi3-128k), (optional)
|
|
@@ -1470,8 +1461,7 @@ extern "C" {
|
|
|
1470
1461
|
struct ggml_tensor * a,
|
|
1471
1462
|
struct ggml_tensor * b,
|
|
1472
1463
|
int n_dims,
|
|
1473
|
-
int mode
|
|
1474
|
-
int n_ctx);
|
|
1464
|
+
int mode);
|
|
1475
1465
|
|
|
1476
1466
|
// in-place, returns view(a)
|
|
1477
1467
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
|
@@ -1479,8 +1469,7 @@ extern "C" {
|
|
|
1479
1469
|
struct ggml_tensor * a,
|
|
1480
1470
|
struct ggml_tensor * b,
|
|
1481
1471
|
int n_dims,
|
|
1482
|
-
int mode
|
|
1483
|
-
int n_ctx);
|
|
1472
|
+
int mode);
|
|
1484
1473
|
|
|
1485
1474
|
// custom RoPE
|
|
1486
1475
|
GGML_API struct ggml_tensor * ggml_rope_ext(
|
|
@@ -1490,8 +1479,7 @@ extern "C" {
|
|
|
1490
1479
|
struct ggml_tensor * c,
|
|
1491
1480
|
int n_dims,
|
|
1492
1481
|
int mode,
|
|
1493
|
-
int
|
|
1494
|
-
int n_orig_ctx,
|
|
1482
|
+
int n_ctx_orig,
|
|
1495
1483
|
float freq_base,
|
|
1496
1484
|
float freq_scale,
|
|
1497
1485
|
float ext_factor,
|
|
@@ -1507,8 +1495,7 @@ extern "C" {
|
|
|
1507
1495
|
struct ggml_tensor * c,
|
|
1508
1496
|
int n_dims,
|
|
1509
1497
|
int mode,
|
|
1510
|
-
int
|
|
1511
|
-
int n_orig_ctx,
|
|
1498
|
+
int n_ctx_orig,
|
|
1512
1499
|
float freq_base,
|
|
1513
1500
|
float freq_scale,
|
|
1514
1501
|
float ext_factor,
|
|
@@ -1522,8 +1509,7 @@ extern "C" {
|
|
|
1522
1509
|
struct ggml_tensor * b,
|
|
1523
1510
|
int n_dims,
|
|
1524
1511
|
int mode,
|
|
1525
|
-
int
|
|
1526
|
-
int n_orig_ctx,
|
|
1512
|
+
int n_ctx_orig,
|
|
1527
1513
|
float freq_base,
|
|
1528
1514
|
float freq_scale,
|
|
1529
1515
|
float ext_factor,
|
|
@@ -1538,8 +1524,7 @@ extern "C" {
|
|
|
1538
1524
|
struct ggml_tensor * b,
|
|
1539
1525
|
int n_dims,
|
|
1540
1526
|
int mode,
|
|
1541
|
-
int
|
|
1542
|
-
int n_orig_ctx,
|
|
1527
|
+
int n_ctx_orig,
|
|
1543
1528
|
float freq_base,
|
|
1544
1529
|
float freq_scale,
|
|
1545
1530
|
float ext_factor,
|
|
@@ -1550,7 +1535,7 @@ extern "C" {
|
|
|
1550
1535
|
|
|
1551
1536
|
// compute correction dims for YaRN RoPE scaling
|
|
1552
1537
|
GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
1553
|
-
int n_dims, int
|
|
1538
|
+
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1554
1539
|
|
|
1555
1540
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1556
1541
|
// a - dy
|
|
@@ -1561,16 +1546,13 @@ extern "C" {
|
|
|
1561
1546
|
struct ggml_tensor * c,
|
|
1562
1547
|
int n_dims,
|
|
1563
1548
|
int mode,
|
|
1564
|
-
int
|
|
1565
|
-
int n_orig_ctx,
|
|
1549
|
+
int n_ctx_orig,
|
|
1566
1550
|
float freq_base,
|
|
1567
1551
|
float freq_scale,
|
|
1568
1552
|
float ext_factor,
|
|
1569
1553
|
float attn_factor,
|
|
1570
1554
|
float beta_fast,
|
|
1571
|
-
float beta_slow
|
|
1572
|
-
float xpos_base,
|
|
1573
|
-
bool xpos_down);
|
|
1555
|
+
float beta_slow);
|
|
1574
1556
|
|
|
1575
1557
|
// clamp
|
|
1576
1558
|
// in-place, returns view(a)
|
|
@@ -2028,8 +2010,8 @@ extern "C" {
|
|
|
2028
2010
|
|
|
2029
2011
|
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
|
2030
2012
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
2031
|
-
GGML_API struct ggml_cplan ggml_graph_plan
|
|
2032
|
-
GGML_API enum ggml_status ggml_graph_compute
|
|
2013
|
+
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
|
2014
|
+
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
|
2033
2015
|
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
|
2034
2016
|
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
|
2035
2017
|
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
|
@@ -2413,15 +2395,17 @@ extern "C" {
|
|
|
2413
2395
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
|
2414
2396
|
GGML_API int ggml_cpu_has_blas (void);
|
|
2415
2397
|
GGML_API int ggml_cpu_has_cuda (void);
|
|
2416
|
-
GGML_API int ggml_cpu_has_clblast (void);
|
|
2417
2398
|
GGML_API int ggml_cpu_has_vulkan (void);
|
|
2418
2399
|
GGML_API int ggml_cpu_has_kompute (void);
|
|
2419
2400
|
GGML_API int ggml_cpu_has_gpublas (void);
|
|
2420
2401
|
GGML_API int ggml_cpu_has_sse3 (void);
|
|
2421
2402
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
|
2422
2403
|
GGML_API int ggml_cpu_has_sycl (void);
|
|
2404
|
+
GGML_API int ggml_cpu_has_rpc (void);
|
|
2423
2405
|
GGML_API int ggml_cpu_has_vsx (void);
|
|
2424
2406
|
GGML_API int ggml_cpu_has_matmul_int8(void);
|
|
2407
|
+
GGML_API int ggml_cpu_has_cann (void);
|
|
2408
|
+
GGML_API int ggml_cpu_has_llamafile (void);
|
|
2425
2409
|
|
|
2426
2410
|
//
|
|
2427
2411
|
// Internal types and functions exposed for tests and benchmarks
|
|
@@ -2435,20 +2419,31 @@ extern "C" {
|
|
|
2435
2419
|
#endif
|
|
2436
2420
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2437
2421
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
2438
|
-
typedef void (*
|
|
2439
|
-
|
|
2422
|
+
typedef void (*ggml_from_float_to_mat_t)
|
|
2423
|
+
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
|
|
2424
|
+
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
|
2425
|
+
const void * GGML_RESTRICT y, size_t by, int nrc);
|
|
2426
|
+
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2427
|
+
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2428
|
+
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
|
2429
|
+
const void * GGML_RESTRICT y, int nr, int nc);
|
|
2440
2430
|
|
|
2441
2431
|
typedef struct {
|
|
2442
|
-
const char
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
ggml_from_float_t
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2432
|
+
const char * type_name;
|
|
2433
|
+
int64_t blck_size;
|
|
2434
|
+
int64_t blck_size_interleave; // interleave elements in blocks
|
|
2435
|
+
size_t type_size;
|
|
2436
|
+
bool is_quantized;
|
|
2437
|
+
ggml_to_float_t to_float;
|
|
2438
|
+
ggml_from_float_t from_float;
|
|
2439
|
+
ggml_from_float_t from_float_ref;
|
|
2440
|
+
ggml_from_float_to_mat_t from_float_to_mat;
|
|
2441
|
+
ggml_vec_dot_t vec_dot;
|
|
2442
|
+
enum ggml_type vec_dot_type;
|
|
2443
|
+
int64_t nrows; // number of rows to process simultaneously
|
|
2444
|
+
int64_t ncols; // number of columns to process simultaneously
|
|
2445
|
+
ggml_gemv_t gemv;
|
|
2446
|
+
ggml_gemm_t gemm;
|
|
2452
2447
|
} ggml_type_traits_t;
|
|
2453
2448
|
|
|
2454
2449
|
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|