npm - @fugood/llama.node - Versions diffs - 0.3.0 → 0.3.2 - Mend

@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

package/CMakeLists.txt +1 -10
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/package.json +6 -4
package/src/LlamaCompletionWorker.cpp +6 -6
package/src/LlamaContext.cpp +7 -9
package/src/common.hpp +2 -1
package/src/llama.cpp/.github/workflows/build.yml +98 -24
package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
package/src/llama.cpp/.github/workflows/docker.yml +43 -34
package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
package/src/llama.cpp/.github/workflows/server.yml +7 -0
package/src/llama.cpp/CMakeLists.txt +20 -8
package/src/llama.cpp/common/CMakeLists.txt +12 -10
package/src/llama.cpp/common/arg.cpp +2006 -0
package/src/llama.cpp/common/arg.h +77 -0
package/src/llama.cpp/common/common.cpp +496 -1632
package/src/llama.cpp/common/common.h +161 -63
package/src/llama.cpp/common/console.cpp +3 -0
package/src/llama.cpp/common/log.cpp +401 -0
package/src/llama.cpp/common/log.h +66 -698
package/src/llama.cpp/common/ngram-cache.cpp +3 -0
package/src/llama.cpp/common/sampling.cpp +348 -350
package/src/llama.cpp/common/sampling.h +62 -139
package/src/llama.cpp/common/stb_image.h +5990 -6398
package/src/llama.cpp/common/train.cpp +2 -0
package/src/llama.cpp/docs/build.md +36 -1
package/src/llama.cpp/examples/CMakeLists.txt +0 -1
package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
package/src/llama.cpp/examples/batched/batched.cpp +39 -55
package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
package/src/llama.cpp/examples/infill/infill.cpp +117 -132
package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
package/src/llama.cpp/examples/llava/clip.cpp +685 -150
package/src/llama.cpp/examples/llava/clip.h +11 -2
package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
package/src/llama.cpp/examples/llava/llava.cpp +110 -24
package/src/llama.cpp/examples/llava/llava.h +2 -3
package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
package/src/llama.cpp/examples/llava/requirements.txt +1 -0
package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
package/src/llama.cpp/examples/main/main.cpp +210 -262
package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
package/src/llama.cpp/examples/server/server.cpp +1027 -1073
package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
package/src/llama.cpp/examples/server/utils.hpp +107 -105
package/src/llama.cpp/examples/simple/simple.cpp +35 -41
package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
package/src/llama.cpp/ggml/include/ggml.h +293 -186
package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
package/src/llama.cpp/include/llama.h +241 -264
package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
package/src/llama.cpp/src/llama-grammar.cpp +721 -122
package/src/llama.cpp/src/llama-grammar.h +120 -15
package/src/llama.cpp/src/llama-impl.h +156 -1
package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
package/src/llama.cpp/src/llama-sampling.h +20 -47
package/src/llama.cpp/src/llama-vocab.cpp +343 -120
package/src/llama.cpp/src/llama-vocab.h +33 -17
package/src/llama.cpp/src/llama.cpp +4247 -1525
package/src/llama.cpp/src/unicode-data.cpp +6 -4
package/src/llama.cpp/src/unicode-data.h +4 -4
package/src/llama.cpp/src/unicode.cpp +15 -7
package/src/llama.cpp/tests/CMakeLists.txt +3 -0
package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
package/src/llama.cpp/tests/test-barrier.cpp +93 -0
package/src/llama.cpp/tests/test-grad0.cpp +187 -70
package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
package/src/llama.cpp/tests/test-log.cpp +39 -0
package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
package/src/llama.cpp/tests/test-rope.cpp +1 -1
package/src/llama.cpp/tests/test-sampling.cpp +157 -98
package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
package/patches/llama.patch +0 -22
package/src/llama.cpp/.github/workflows/bench.yml +0 -310
package/src/llama.cpp/common/grammar-parser.cpp +0 -536
package/src/llama.cpp/common/grammar-parser.h +0 -29
package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275

package/src/llama.cpp/tests/test-grad0.cpp CHANGED Viewed

@@ -1,10 +1,14 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
 #include "ggml.h"
+#include <cfloat>
 #include <cmath>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cassert>
+#include <initializer_list>
+#include <vector>
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@@ -217,7 +221,8 @@ static bool check_gradient(
         int nargs,
         float eps,
         float max_error_abs,
-        float max_error_rel) {
+        float max_error_rel,
+        std::vector<double> expected_vals) {
     static int n_threads = -1;
     if (n_threads < 0) {
@@ -239,8 +244,10 @@ static bool check_gradient(
     ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
-    ggml_graph_reset  (gf);
-    ggml_set_f32      (f->grad, 1.0f);
+    ggml_graph_reset(gb);
+    if (f->grad) {
+        ggml_set_f32(f->grad, 1.0f);
+    }
     ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
@@ -248,9 +255,10 @@ static bool check_gradient(
     // ggml_graph_dump_dot(gb, gf,  "test-grad0-backward.dot");
     for (int i = 0; i < nargs; ++i) {
+        bool all_g0_bad = true;
         const int nelements = ggml_nelements(x[i]);
         for (int k = 0; k < nelements; ++k) {
-            // compute gradient using finite differences
+            // Calculate gradient numerically:
             const float x0 = ggml_get_f32_1d(x[i], k);
             const float xm = x0 - eps;
             const float xp = x0 + eps;
@@ -267,18 +275,42 @@ static bool check_gradient(
             const double f1 = ggml_get_f32_1d(f, 0);
             const double g0 = (f0 - f1)/(2.0*(double) eps);
+            // The numerical calculation of the gradient fails around noncontinuities (e.g. 0 for ReLU).
+            // In such cases, provide a vector of expected values and skip the comparison for failed calculations.
+            if (!expected_vals.empty()) {
+                bool matches_any = false;
+                for (const double & ev : expected_vals) {
+                    const double error_abs = std::fabs(g0 - ev);
+                    if (error_abs > max_error_abs) {
+                        continue;
+                    }
+                    const double error_rel = g0 != 0.0 ? fabs(g0 - ev)/fabs(g0) : 0.0;
+                    if (error_rel > max_error_rel) {
+                        continue;
+                    }
+                    matches_any = true;
+                    break;
+                }
+                if (!matches_any) {
+                    continue;
+                }
+            }
+            all_g0_bad = false;
             ggml_set_f32_1d(x[i], k, x0);
             // compute gradient using backward graph
-            ggml_graph_reset  (gf);
-            ggml_set_f32      (f->grad, 1.0f);
+            ggml_graph_reset(gb);
+            if (f->grad) {
+                ggml_set_f32(f->grad, 1.0f);
+            }
             ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
             const double g1 = ggml_get_f32_1d(x[i]->grad, k);
             const double error_abs = fabs(g0 - g1);
-            const double error_rel = g0 != 0 ? fabs(g0 - g1)/fabs(g0) : 0;
+            const double error_rel = g0 != 0.0 ? fabs(g0 - g1)/fabs(g0) : 0.0;
             if (error_abs > max_error_abs || error_rel > max_error_rel) {
                 printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
@@ -287,6 +319,10 @@ static bool check_gradient(
                 return false;
             }
         }
+        if (all_g0_bad) {
+            printf("%s: numerical calculation of the gradient failed for all values\n", op_name);
+            return false;
+        }
     }
     return true;
@@ -404,7 +440,7 @@ int main(int argc, const char ** argv) {
         seed_iter = rand();
         unsigned seed = rand();
-        printf("test-grad0: iter:%d/%d\n", iter, niter);
+        printf("test-grad0: iter:%d/%d\n", (iter+1), niter);
         struct ggml_context * ctx0 = ggml_init(params);
         get_random_dims(ne, 4);
@@ -424,7 +460,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
-                check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
+                check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f, {});
             }
         }
@@ -441,7 +477,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
-                check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f);
+                check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f, {});
             }
         }
@@ -458,7 +494,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
-                check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -475,7 +511,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
-                check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -492,7 +528,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
-                check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
+                check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f, {});
             }
         }
@@ -509,7 +545,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
-                check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -526,7 +562,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
-                check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f);
+                check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f, {});
             }
         }
@@ -543,7 +579,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
-                check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
+                check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f, {});
             }
         }
@@ -560,7 +596,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
-                check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -578,7 +614,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
-                check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
+                check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
             }
         }
@@ -596,7 +632,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
-                check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -614,7 +650,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
-                check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -637,7 +673,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
-                check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
+                check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
             }
         }
@@ -660,25 +696,25 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
-                check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
+                check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
             }
         }
-        // abs (finite differences do not work)
-        //{
-        //    const int nargs = 1;
+        // abs
+        {
+           const int nargs = 1;
-        //    for (int ndims = 1; ndims <= 2; ++ndims) {
-        //        for (int i = 0; i < nargs; ++i) {
-        //            x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
-        //            ggml_set_param(ctx0, x[i]);
-        //        }
+           for (int ndims = 1; ndims <= 4; ++ndims) {
+               for (int i = 0; i < nargs; ++i) {
+                   x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
+                   ggml_set_param(ctx0, x[i]);
+               }
-        //        struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
+               struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
-        //        check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
-        //    }
-        //}
+               check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f, {-1.0, 1.0});
+           }
+        }
         // sgn
         {
@@ -693,7 +729,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
-                check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
             }
         }
@@ -710,7 +746,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
-                check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -727,7 +763,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
-                check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
             }
         }
@@ -745,7 +781,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
-                check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -776,7 +812,7 @@ int main(int argc, const char ** argv) {
                         GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
-                        check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                        check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
                         if (ndims == 2) {
                             // check_mat_mul does not support ndims > 2
                             check_mat_mul(m, x[1], x[0]);
@@ -800,7 +836,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
-                check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -817,7 +853,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
-                check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {0.0, 1.0});
             }
         }
@@ -835,7 +871,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
-                check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
+                check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
             }
         }
@@ -854,9 +890,9 @@ int main(int argc, const char ** argv) {
 #ifdef GGML_SILU_FP16
                 // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
-                check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
+                check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY, {});
 #else
-                check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
 #endif
             }
         }
@@ -874,7 +910,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
-                check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
+                check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY, {});
             }
         }
@@ -892,7 +928,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s));
-                check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -910,7 +946,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
-                check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -928,7 +964,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
-                check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
+                check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
             }
         }
@@ -952,7 +988,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
-                check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -976,7 +1012,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
-                check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1004,7 +1040,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
-                check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1037,7 +1073,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
-                check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1072,7 +1108,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
-                check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1109,7 +1145,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
-                check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1137,7 +1173,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
-                check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1170,7 +1206,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
-                check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1194,7 +1230,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
-                check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1225,7 +1261,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
-                check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1257,7 +1293,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
-                check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1291,7 +1327,7 @@ int main(int argc, const char ** argv) {
                 // sum requires contiguous tensor rows
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
-                check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1319,7 +1355,7 @@ int main(int argc, const char ** argv) {
                 // sum requires contiguous tensor rows
                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
-                check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+                check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1337,7 +1373,7 @@ int main(int argc, const char ** argv) {
             struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
-            check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+            check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
         }
         // diag_mask_inf
@@ -1353,7 +1389,7 @@ int main(int argc, const char ** argv) {
             struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
-            check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+            check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
         }
         // diag_mask_zero
@@ -1369,7 +1405,7 @@ int main(int argc, const char ** argv) {
             struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
-            check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+            check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
         }
         // softmax
@@ -1395,7 +1431,7 @@ int main(int argc, const char ** argv) {
                                                         1.0f - eps),
                                                     ggml_new_f32(ctx0, eps))));
-                check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY);
+                check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY, {});
                 // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
                 // this may result in different gradients too finite differences.
                 // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
@@ -1412,7 +1448,7 @@ int main(int argc, const char ** argv) {
             get_random_dims(ne2, 4);
             for (int ndims = 1; ndims <= 4; ++ndims) {
-                x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -0.1f, 0.1f);
+                x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
                 // the second argument to cross_entropy_loss must sum up to 1 for each row
                 int nr = ggml_nrows(x[1]);
@@ -1430,7 +1466,7 @@ int main(int argc, const char ** argv) {
                 struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
-                check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-4f, 1e-3f, INFINITY);
+                check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
             }
         }
@@ -1468,7 +1504,7 @@ int main(int argc, const char ** argv) {
                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
                         GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
-                        check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
+                        check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
                     }
                 }
             }
@@ -1508,12 +1544,93 @@ int main(int argc, const char ** argv) {
                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
                         GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
-                        check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
+                        check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
                     }
                 }
             }
         }
+        // im2col f32
+        {
+            srand(seed);
+            const int nargs = 1;
+            const int ndims = 4;
+            for (const bool is_2D : {false, true}) {
+                int64_t ne0[ndims];
+                int64_t ne1[ndims];
+                get_random_dims(ne0, ndims);
+                get_random_dims(ne1, ndims);
+                // // Ensure that the output is not zero-sized:
+                ne1[0] += 8;
+                ne1[1] += 8;
+                if (is_2D) {
+                    ne1[2] = ne0[2];
+                } else {
+                    ne1[1] = ne0[1];
+                    ne0[3] = 1;
+                    ne1[3] = 1;
+                }
+                // The order of arguments is swapped because the first tensor is only used for its shape.
+                x[1] = get_random_tensor_f16(ctx0, ndims, ne0, -1.0f, 1.0f);
+                x[0] = get_random_tensor_f32(ctx0, ndims, ne1, -1.0f, 1.0f);
+                ggml_set_param(ctx0, x[0]);
+                const int s0 =         1 + irand(2);
+                const int s1 = is_2D ? 1 + irand(2) : 0;
+                const int p0 =         0 + irand(2);
+                const int p1 = is_2D ? 0 + irand(2) : 0;
+                const int d0 =         1 + irand(2);
+                const int d1 = is_2D ? 1 + irand(2) : 0;
+                struct ggml_tensor * f = ggml_sum(ctx0, ggml_im2col(ctx0, x[1], x[0], s0, s1, p0, p1, d0, d1, is_2D, GGML_TYPE_F32));
+                GGML_PRINT_DEBUG("im2col f32: is_2D=%s, s0=%d, s1=%d, p0=%d, p1=%d, d0=%d, d1=%d\n", is_2D ? "yes" : "no", s0, s1, p0, p1, d0, d1);
+                check_gradient("im2col f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
+            }
+        }
+        // pool_2d f32
+        {
+            srand(seed);
+            const int nargs = 1;
+            const int ndims = 4;
+            for (const enum ggml_op_pool op : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
+                int64_t ne0[ndims];
+                get_random_dims(ne0, ndims);
+                ne0[0] += 8;
+                ne0[1] += 8;
+                x[0] = get_random_tensor_f32(ctx0, ndims, ne0, -1.0f, 1.0f);
+                ggml_set_param(ctx0, x[0]);
+                const int k0 = 2 + irand(2);
+                const int k1 = 2 + irand(2);
+                const int s0 = 2 + irand(2);
+                const int s1 = 2 + irand(2);
+                const int p0 = 0 + irand(2);
+                const int p1 = 0 + irand(2);
+                struct ggml_tensor * f = ggml_sum(ctx0, ggml_pool_2d(ctx0, x[0], op, k0, k1, s0, s1, p0, p1));
+                GGML_PRINT_DEBUG("ggml_pool_2d f32: op=%s k0=%d, k1=%d, s0=%d, s1=%d, p0=%d, p1=%d\n",
+                                 op == GGML_OP_POOL_MAX ? "max" : "avg", k0, k1, s0, s1, p0, p1);
+                std::vector<double> expected_vals;
+                if (op == GGML_OP_POOL_MAX) {
+                    expected_vals.push_back(0.0);
+                    expected_vals.push_back(1.0);
+                }
+                check_gradient("ggml_pool_2d f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, expected_vals);
+            }
+        }
         // flash_attn f32
         // TODO: adapt to ggml_flash_attn_ext() changes
         //{
@@ -1553,7 +1670,7 @@ int main(int argc, const char ** argv) {
         //                struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
-        //                check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
+        //                check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY, {});
         //            }
         //        }
         //    }