npm - @fugood/llama.node - Versions diffs - 0.3.3 → 0.3.4 - Mend

@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

package/CMakeLists.txt +5 -0
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +18 -1
package/package.json +1 -1
package/src/EmbeddingWorker.cpp +15 -5
package/src/EmbeddingWorker.h +2 -1
package/src/LlamaCompletionWorker.cpp +1 -1
package/src/LlamaContext.cpp +81 -18
package/src/LlamaContext.h +2 -0
package/src/llama.cpp/.github/workflows/build.yml +197 -159
package/src/llama.cpp/.github/workflows/docker.yml +5 -8
package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
package/src/llama.cpp/.github/workflows/server.yml +21 -14
package/src/llama.cpp/CMakeLists.txt +11 -6
package/src/llama.cpp/Sources/llama/llama.h +4 -0
package/src/llama.cpp/cmake/common.cmake +33 -0
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
package/src/llama.cpp/common/CMakeLists.txt +6 -2
package/src/llama.cpp/common/arg.cpp +426 -245
package/src/llama.cpp/common/common.cpp +143 -80
package/src/llama.cpp/common/common.h +81 -24
package/src/llama.cpp/common/sampling.cpp +53 -19
package/src/llama.cpp/common/sampling.h +22 -1
package/src/llama.cpp/common/speculative.cpp +274 -0
package/src/llama.cpp/common/speculative.h +28 -0
package/src/llama.cpp/docs/build.md +101 -148
package/src/llama.cpp/examples/CMakeLists.txt +32 -13
package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/batched/batched.cpp +5 -4
package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/infill/infill.cpp +1 -1
package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
package/src/llama.cpp/examples/llava/clip.cpp +262 -66
package/src/llama.cpp/examples/llava/clip.h +8 -2
package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
package/src/llama.cpp/examples/llava/llava.cpp +46 -19
package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/main/main.cpp +9 -5
package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/run/run.cpp +911 -0
package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
package/src/llama.cpp/examples/server/server.cpp +1758 -886
package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
package/src/llama.cpp/examples/server/utils.hpp +94 -304
package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/simple/simple.cpp +4 -0
package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/tts/tts.cpp +932 -0
package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
package/src/llama.cpp/ggml/include/ggml.h +106 -24
package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
package/src/llama.cpp/ggml/src/ggml.c +367 -207
package/src/llama.cpp/include/llama-cpp.h +25 -0
package/src/llama.cpp/include/llama.h +26 -19
package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
package/src/llama.cpp/src/CMakeLists.txt +2 -7
package/src/llama.cpp/src/llama-grammar.cpp +15 -15
package/src/llama.cpp/src/llama-grammar.h +2 -5
package/src/llama.cpp/src/llama-sampling.cpp +35 -90
package/src/llama.cpp/src/llama-vocab.cpp +6 -1
package/src/llama.cpp/src/llama.cpp +1748 -640
package/src/llama.cpp/src/unicode.cpp +62 -51
package/src/llama.cpp/src/unicode.h +9 -10
package/src/llama.cpp/tests/CMakeLists.txt +48 -37
package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
package/src/llama.cpp/tests/test-rope.cpp +61 -20
package/src/llama.cpp/tests/test-sampling.cpp +2 -2
package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446

package/src/llama.cpp/tests/test-grammar-integration.cpp CHANGED Viewed

@@ -32,13 +32,10 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
 static bool match_string(const std::string & input, llama_grammar * grammar) {
     const auto cpts = unicode_cpts_from_utf8(input);
-    const llama_grammar_rules  & rules      = llama_grammar_get_rules (grammar);
-          llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
+    auto & stacks_cur = llama_grammar_get_stacks(grammar);
     for (const auto & cpt : cpts) {
-        const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
-        llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
+        llama_grammar_accept(grammar, cpt);
         if (stacks_cur.empty()) {
             // no stacks means that the grammar failed to match at this point
@@ -63,7 +60,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
     auto * grammar = build_grammar(grammar_str);
     // Save the original grammar stacks so that we can reset after every new string we want to test
-    const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
+    const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
     llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);

package/src/llama.cpp/tests/test-llama-grammar.cpp CHANGED Viewed

@@ -113,12 +113,10 @@ int main()
         }
     }
-    llama_grammar * grammar = NULL;
     std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
-    grammar = llama_grammar_init_impl(nullptr, grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
-    if (grammar == nullptr)
-    {
+    llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
+    if (grammar == nullptr) {
         throw std::runtime_error("Failed to initialize llama_grammar");
     }

package/src/llama.cpp/tests/test-quantize-fns.cpp CHANGED Viewed

@@ -79,9 +79,9 @@ static float dot_product(const float * a1, const float * a2, size_t test_size) {
 }
 // Total dot product error
-static float dot_product_error(
-    const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float *test_data2
-) {
+static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float * test_data2) {
+    GGML_UNUSED(qfns);
     std::vector<uint8_t> tmp_q1(2*test_size);
     std::vector<uint8_t> tmp_q2(2*test_size);

package/src/llama.cpp/tests/test-rope.cpp CHANGED Viewed

@@ -138,7 +138,7 @@ int main(int /*argc*/, const char ** /*argv*/) {
     struct ggml_tensor * x;
     // rope f32
-    for (int m = 0; m < 3; ++m) {
+    for (int m = 0; m < 5; ++m) {
         const int ndims = 4;
         const int64_t n_rot = 128;
@@ -147,28 +147,69 @@ int main(int /*argc*/, const char ** /*argv*/) {
         const int n_past_0 = 100;
         const int n_past_2 = 33;
-        struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
-        struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
-        struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
-        for (int i = 0; i < ne[2]; ++i) {
-            ((int32_t *) p0->data)[i] = n_past_0 + i;
-            ((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
-            ((int32_t *) p2->data)[i] = n_past_2 + i;
-        }
-        // test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
-        const int mode = m == 0 ? 0 : m == 1 ? 2 : 4;
+        struct ggml_tensor * r0;
+        struct ggml_tensor * r1;
+        struct ggml_tensor * r2;
         x = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
+        int mode = -1;
-        // 100, 101, 102, ..., 172
-        struct ggml_tensor * r0 = ggml_rope(ctx0, x,  p0, n_rot, mode);
-        // -67, -67, -67, ..., -67
-        struct ggml_tensor * r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens
+        if (m < 3) {
+            struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
+            struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
+            struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
-        //  33,  34,  35, ..., 105
-        struct ggml_tensor * r2 = ggml_rope(ctx0, x,  p2, n_rot, mode);
+            for (int i = 0; i < ne[2]; ++i) {
+                ((int32_t *) p0->data)[i] = n_past_0 + i;
+                ((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
+                ((int32_t *) p2->data)[i] = n_past_2 + i;
+            }
+            // test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
+            mode = m == 0 ? 0 : m == 1 ? 2 : 4;
+            // 100, 101, 102, ..., 172
+            r0 = ggml_rope(ctx0, x,  p0, n_rot, mode);
+            // -67, -67, -67, ..., -67
+            r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens
+            //  33,  34,  35, ..., 105
+            r2 = ggml_rope(ctx0, x,  p2, n_rot, mode);
+        } else {
+            // testing multi-dimension rope position embedding mode
+            struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
+            struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
+            struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4);
+            int sections[4] = {16, 24, 24, 0};
+            mode = (m == 3) ? GGML_ROPE_TYPE_MROPE : GGML_ROPE_TYPE_VISION;
+            for (int i = 0; i < ne[2]; ++i) {
+                for (int j = 0; j < 4; ++j) {
+                    ((int32_t *) p0->data)[i + ne[2] * j] = n_past_0 + i + j;
+                    ((int32_t *) p1->data)[i + ne[2] * j] = n_past_2 - n_past_0;
+                    ((int32_t *) p2->data)[i + ne[2] * j] = n_past_2 + i + j;
+                }
+            }
+            // [[100, 101, 102, ..., 172],
+            // [101, 102, 103, ..., 173],
+            // [102, 103, 104, ..., 174]]
+            r0 = ggml_rope_multi(
+                ctx0, x, p0, nullptr,
+                n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
+            // [[-67, -67, -67, ..., -67]
+            // [-67, -67, -67, ..., -67]
+            // [-67, -67, -67, ..., -67]]
+            r1 = ggml_rope_multi(
+                ctx0, r0, p1, nullptr,
+                n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
+            //  [[33,  34,  35, ..., 105]
+            //  [34,  35,  36, ..., 106]
+            //  [35,  36,  37, ..., 107]]
+            r2 = ggml_rope_multi(
+                ctx0, x, p2, nullptr,
+                n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1);
+        }
         ggml_cgraph * gf = ggml_new_graph(ctx0);

package/src/llama.cpp/tests/test-sampling.cpp CHANGED Viewed

@@ -145,7 +145,7 @@ static void test_penalties(
     sampler_tester tester(probs, probs_expected);
     const size_t n_vocab = probs.size();
-    auto * sampler = llama_sampler_init_penalties(n_vocab, LLAMA_TOKEN_NULL, LLAMA_TOKEN_NULL, last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence, false, false);
+    auto * sampler = llama_sampler_init_penalties(last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence);
     for (size_t i = 0; i < last_tokens.size(); i++) {
         llama_sampler_accept(sampler, last_tokens[i]);
@@ -284,7 +284,7 @@ static void test_perf() {
     data.reserve(n_vocab);
     for (int i = 0; i < n_vocab; i++) {
-        const float logit = 2.0f*((float)(rand())/RAND_MAX - 0.5f);
+        const float logit = 2.0f*((double)(rand())/RAND_MAX - 0.5);
         data.emplace_back(llama_token_data{i, logit, 0.0f});
     }

package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml DELETED Viewed

@@ -1,72 +0,0 @@
-name: Nix aarch64 builds
-on:
-  workflow_dispatch: # allows manual triggering
-  schedule:
-    # Rebuild daily rather than on every push because QEMU is expensive (e.g.
-    # 1.5h instead of minutes with the cold cache).
-    #
-    # randint(0, 59), randint(0, 23)
-    - cron: '26 12 * * *'
-  # But also rebuild if we touched any of the Nix expressions:
-  push:
-    branches:
-      - master
-    paths: ['**/*.nix', 'flake.lock']
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths: ['**/*.nix', 'flake.lock']
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-# Fine-grant permission
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
-permissions:
-  # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
-  id-token: write
-  contents: read
-jobs:
-  nix-build-aarch64:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install QEMU
-      # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y qemu-user-static qemu-system-aarch64
-        sudo usermod -a -G kvm $USER
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-platforms = aarch64-linux
-          extra-system-features = nixos-test kvm
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: Set-up cachix to push the results to
-      uses: cachix/cachix-action@v13
-      with:
-        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
-        name: llama-cpp
-    - name: Show all output paths
-      run: >
-          nix run github:nix-community/nix-eval-jobs
-          -- --gc-roots-dir gcroot
-          --flake
-          ".#packages.aarch64-linux"
-    - name: Build
-      run: >
-          nix run github:Mic92/nix-fast-build
-          -- --skip-cached --no-nom
-          --systems aarch64-linux
-          --flake
-          ".#checks.aarch64-linux"

package/src/llama.cpp/.github/workflows/nix-ci.yml DELETED Viewed

@@ -1,79 +0,0 @@
-name: Nix CI
-on:
-  workflow_dispatch: # allows manual triggering
-  push:
-    branches:
-      - master
-  pull_request:
-    types: [opened, synchronize, reopened]
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-# Fine-grant permission
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
-permissions:
-  # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
-  id-token: write
-  contents: read
-jobs:
-  nix-eval:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest ]
-    runs-on: ${{ matrix.os }}
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: List all flake outputs
-      run: nix flake show --all-systems
-    - name: Show all output paths
-      run: >
-          nix run github:nix-community/nix-eval-jobs
-          -- --gc-roots-dir gcroot
-          --flake
-          ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
-  nix-build:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest ]
-    runs-on: ${{ matrix.os }}
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: Set-up cachix to push the results to
-      uses: cachix/cachix-action@v13
-      with:
-        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
-        name: llama-cpp
-    - name: Build
-      run: >
-          nix run github:Mic92/nix-fast-build
-          -- --skip-cached --no-nom
-          --flake
-          ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"

package/src/llama.cpp/.github/workflows/nix-flake-update.yml DELETED Viewed

@@ -1,22 +0,0 @@
-name: update-flake-lock
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
-jobs:
-  lockfile:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Install Nix
-        uses: DeterminateSystems/nix-installer-action@main
-      - name: Update flake.lock
-        uses: DeterminateSystems/update-flake-lock@main
-        with:
-          pr-title: "nix: update flake.lock"
-          pr-labels: |
-            nix
-          pr-reviewers: philiptaron,SomeoneSerge
-          token: ${{ secrets.FLAKE_TOKEN }}

package/src/llama.cpp/.github/workflows/nix-publish-flake.yml DELETED Viewed

@@ -1,36 +0,0 @@
-# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
-name: "Publish a flake to flakestry & flakehub"
-on:
-    push:
-        tags:
-        - "*"
-    workflow_dispatch:
-        inputs:
-            tag:
-                description: "The existing tag to publish"
-                type: "string"
-                required: true
-jobs:
-    flakestry-publish:
-        runs-on: ubuntu-latest
-        permissions:
-            id-token: "write"
-            contents: "read"
-        steps:
-            - uses: flakestry/flakestry-publish@main
-              with:
-                version: "${{ inputs.tag || github.ref_name }}"
-    flakehub-publish:
-      runs-on: "ubuntu-latest"
-      permissions:
-        id-token: "write"
-        contents: "read"
-      steps:
-        - uses: "actions/checkout@v4"
-          with:
-            ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
-        - uses: "DeterminateSystems/nix-installer-action@main"
-        - uses: "DeterminateSystems/flakehub-push@main"
-          with:
-            visibility: "public"
-            tag: "${{ inputs.tag }}"

package/src/llama.cpp/ggml/include/ggml-amx.h DELETED Viewed

@@ -1,25 +0,0 @@
-#pragma once
-#include "ggml.h"
-#include "ggml-backend.h"
-#ifdef  __cplusplus
-extern "C" {
-#endif
-// buffer_type API
-GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
-GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);
-// backend API
-GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);
-GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
-GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);
-#ifdef  __cplusplus
-}
-#endif

package/src/llama.cpp/ggml/src/ggml-aarch64.c DELETED Viewed

@@ -1,129 +0,0 @@
-#define GGML_COMMON_DECL_C
-#include "ggml-common.h"
-#include "ggml-aarch64.h"
-#include "ggml-impl.h"
-#include "ggml-quants.h"
-#include <assert.h>
-#define UNUSED GGML_UNUSED
-static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
-    block_q4_0x4 out;
-    for (int i = 0; i < 4; i++) {
-        out.d[i] = in[i].d;
-    }
-    const int end = QK4_0 * 2 / blck_size_interleave;
-    if (blck_size_interleave == 8) {
-        const uint64_t xor_mask = 0x8888888888888888ULL;
-        for (int i = 0; i < end; ++i) {
-            int src_id = i % 4;
-            int src_offset = (i / 4) * blck_size_interleave;
-            int dst_offset = i * blck_size_interleave;
-            uint64_t elems;
-            // Using memcpy to avoid unaligned memory accesses
-            memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
-            elems ^= xor_mask;
-            memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
-        }
-    } else if (blck_size_interleave == 4) {
-        const uint32_t xor_mask = 0x88888888;
-        for (int i = 0; i < end; ++i) {
-            int src_id = i % 4;
-            int src_offset = (i / 4) * blck_size_interleave;
-            int dst_offset = i * blck_size_interleave;
-            uint32_t elems;
-            memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
-            elems ^= xor_mask;
-            memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
-        }
-    } else {
-        GGML_ASSERT(false);
-    }
-    return out;
-}
-// interleave 8 block_q4_0s in blocks of blck_size_interleave
-// returns an interleaved block_q4_0x8
-// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
-// first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
-static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
-    block_q4_0x8 out;
-    for (int i = 0; i < 8; i++) {
-        out.d[i] = in[i].d;
-    }
-    const int end = QK4_0 * 4 / blck_size_interleave;
-    const uint64_t xor_mask = 0x8888888888888888ULL;
-    for (int i = 0; i < end; ++i) {
-        int src_id = i % 8;
-        int src_offset = (i / 8) * blck_size_interleave;
-        int dst_offset = i * blck_size_interleave;
-        uint64_t elems;
-        memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
-        elems ^= xor_mask;
-        memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
-    }
-    return out;
-}
-static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
-    assert(n_per_row % QK4_0 == 0);
-    const int nb = n_per_row / QK4_0;
-    void * out_ptr = NULL;
-    if (nrows_interleaved == 8) {
-        out_ptr = (block_q4_0x8 *) dst;
-    }
-    else if (nrows_interleaved == 4) {
-        out_ptr = (block_q4_0x4 *) dst;
-    }
-    assert(nrows_interleaved <= 8);
-    block_q4_0 dst_tmp[8];
-    for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
-        for (int64_t x = 0; x < nb; x++) {
-            for (int i  = 0; i < nrows_interleaved; i++ ) {
-                quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
-            }
-            if (nrows_interleaved == 8) {
-                *(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
-                out_ptr = (block_q4_0x8 *) out_ptr + 1;
-            }
-            else if (nrows_interleaved == 4) {
-                *(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
-                out_ptr = (block_q4_0x4 *) out_ptr + 1;
-            }
-        }
-    }
-    return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
-}
-size_t quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
-    UNUSED(quant_weights);
-    return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
-}
-size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
-    UNUSED(quant_weights);
-    return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
-}
-size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
-    UNUSED(quant_weights);
-    return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
-}

package/src/llama.cpp/ggml/src/ggml-aarch64.h DELETED Viewed

@@ -1,19 +0,0 @@
-#pragma once
-#include "ggml.h"
-// GGML internal header
-#ifdef __cplusplus
-extern "C" {
-#endif
-// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
-size_t quantize_q4_0_4x4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
-size_t quantize_q4_0_4x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
-size_t quantize_q4_0_8x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
-#ifdef __cplusplus
-}
-#endif

package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt DELETED Viewed

@@ -1,107 +0,0 @@
-if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
-        (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
-         CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$") AND
-        CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
-    message(STATUS "Using AMX")
-    file(GLOB   GGML_HEADERS_AMX "*.h")
-    list(APPEND GGML_HEADERS_AMX "../../include/ggml-amx.h")
-    file(GLOB   GGML_SOURCES_AMX "*.cpp")
-    add_library(ggml-amx
-                ${GGML_HEADERS_AMX}
-                ${GGML_SOURCES_AMX})
-    target_link_libraries(ggml-amx PRIVATE ggml-base)
-    target_include_directories(ggml-amx PRIVATE . ..)
-    # this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags
-    # TODO: integrate AMX backend into the CPU backend
-    if (MSVC)
-        # instruction set detection for MSVC only
-        if (GGML_NATIVE)
-            # TODO: improve, should not reference files from the parent folder
-            include(../ggml-cpu/cmake/FindSIMD.cmake)
-        endif ()
-        if (GGML_AVX512)
-            list(APPEND ARCH_FLAGS /arch:AVX512)
-            # MSVC has no compile-time flags enabling specific
-            # AVX512 extensions, neither it defines the
-            # macros corresponding to the extensions.
-            # Do it manually.
-            if (GGML_AVX512_VBMI)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
-            endif()
-            if (GGML_AVX512_VNNI)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
-            endif()
-            if (GGML_AVX512_BF16)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
-            endif()
-            if (GGML_AMX_TILE)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
-            endif()
-            if (GGML_AMX_INT8)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
-            endif()
-            if (GGML_AMX_BF16)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
-            endif()
-        elseif (GGML_AVX2)
-            list(APPEND ARCH_FLAGS /arch:AVX2)
-        elseif (GGML_AVX)
-            list(APPEND ARCH_FLAGS /arch:AVX)
-        endif()
-    else()
-        if (GGML_NATIVE)
-            list(APPEND ARCH_FLAGS -march=native)
-        endif()
-        if (GGML_F16C)
-            list(APPEND ARCH_FLAGS -mf16c)
-        endif()
-        if (GGML_FMA)
-            list(APPEND ARCH_FLAGS -mfma)
-        endif()
-        if (GGML_AVX)
-            list(APPEND ARCH_FLAGS -mavx)
-        endif()
-        if (GGML_AVX2)
-            list(APPEND ARCH_FLAGS -mavx2)
-        endif()
-        if (GGML_AVX512)
-            list(APPEND ARCH_FLAGS -mavx512f)
-            list(APPEND ARCH_FLAGS -mavx512dq)
-            list(APPEND ARCH_FLAGS -mavx512bw)
-        endif()
-        if (GGML_AVX512_VBMI)
-            list(APPEND ARCH_FLAGS -mavx512vbmi)
-        endif()
-        if (GGML_AVX512_VNNI)
-            list(APPEND ARCH_FLAGS -mavx512vnni)
-        endif()
-        if (GGML_AVX512_BF16)
-            list(APPEND ARCH_FLAGS -mavx512bf16)
-        endif()
-        if (GGML_AMX_TILE)
-            list(APPEND ARCH_FLAGS -mamx-tile)
-        endif()
-        if (GGML_AMX_INT8)
-            list(APPEND ARCH_FLAGS -mamx-int8)
-        endif()
-        if (GGML_AMX_BF16)
-            list(APPEND ARCH_FLAGS -mamx-bf16)
-        endif()
-    endif()
-    target_compile_options(ggml-amx PRIVATE ${ARCH_FLAGS})
-else()
-    set(GGML_AMX OFF PARENT_SCOPE)
-    message(WARNING "AMX requires x86 and gcc version > 11.0. Turning off GGML_AMX.")
-endif()