npm - cui-llama.rn - Versions diffs - 1.1.2 → 1.1.5 - Mend

cui-llama.rn 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/android/src/main/CMakeLists.txt +1 -2
package/android/src/main/jni.cpp +26 -21
package/cpp/common.cpp +181 -1584
package/cpp/common.h +131 -52
package/cpp/ggml-aarch64.c +612 -0
package/cpp/ggml-alloc.h +2 -2
package/cpp/ggml-backend.c +33 -6
package/cpp/ggml-backend.h +2 -0
package/cpp/ggml-common.h +20 -0
package/cpp/ggml-impl.h +36 -7
package/cpp/ggml-metal.m +68 -8
package/cpp/ggml-quants.c +932 -50
package/cpp/ggml-quants.h +15 -0
package/cpp/ggml.c +1712 -325
package/cpp/ggml.h +169 -100
package/cpp/llama-grammar.cpp +721 -122
package/cpp/llama-grammar.h +120 -15
package/cpp/llama-impl.h +132 -1
package/cpp/llama-sampling.cpp +1483 -354
package/cpp/llama-sampling.h +20 -48
package/cpp/llama-vocab.cpp +140 -7
package/cpp/llama-vocab.h +3 -2
package/cpp/llama.cpp +824 -327
package/cpp/llama.h +235 -256
package/cpp/rn-llama.hpp +18 -14
package/cpp/sampling.cpp +353 -354
package/cpp/sampling.h +62 -143
package/cpp/sgemm.cpp +153 -0
package/package.json +1 -1
package/cpp/grammar-parser.cpp +0 -539
package/cpp/grammar-parser.h +0 -29

package/cpp/ggml.h CHANGED Viewed

@@ -220,7 +220,7 @@
 #include <stdio.h>
 #define LM_GGML_FILE_MAGIC   0x67676d6c // "ggml"
-#define LM_GGML_FILE_VERSION 1
+#define LM_GGML_FILE_VERSION 2
 #define LM_GGML_QNT_VERSION        2    // bump this on quantization format changes
 #define LM_GGML_QNT_VERSION_FACTOR 1000 // do not change this
@@ -231,6 +231,8 @@
 #define LM_GGML_MAX_SRC            10
 #ifndef LM_GGML_MAX_NAME
 #define LM_GGML_MAX_NAME           64
+#define LM_GGML_MAX_N_THREADS      512
 #endif
 #define LM_GGML_MAX_OP_PARAMS      64
 #define LM_GGML_DEFAULT_N_THREADS  4
@@ -356,6 +358,7 @@ extern "C" {
     struct lm_ggml_object;
     struct lm_ggml_context;
+    struct lm_ggml_cgraph;
     // NOTE: always add types at the end of the enum to keep backward compatibility
     enum lm_ggml_type {
@@ -393,6 +396,8 @@ extern "C" {
         LM_GGML_TYPE_Q4_0_4_4 = 31,
         LM_GGML_TYPE_Q4_0_4_8 = 32,
         LM_GGML_TYPE_Q4_0_8_8 = 33,
+        LM_GGML_TYPE_TQ1_0   = 34,
+        LM_GGML_TYPE_TQ2_0   = 35,
         LM_GGML_TYPE_COUNT,
     };
@@ -453,6 +458,8 @@ extern "C" {
         LM_GGML_OP_SQR,
         LM_GGML_OP_SQRT,
         LM_GGML_OP_LOG,
+        LM_GGML_OP_SIN,
+        LM_GGML_OP_COS,
         LM_GGML_OP_SUM,
         LM_GGML_OP_SUM_ROWS,
         LM_GGML_OP_MEAN,
@@ -490,9 +497,11 @@ extern "C" {
         LM_GGML_OP_CLAMP,
         LM_GGML_OP_CONV_TRANSPOSE_1D,
         LM_GGML_OP_IM2COL,
+        LM_GGML_OP_IM2COL_BACK,
         LM_GGML_OP_CONV_TRANSPOSE_2D,
         LM_GGML_OP_POOL_1D,
         LM_GGML_OP_POOL_2D,
+        LM_GGML_OP_POOL_2D_BACK,
         LM_GGML_OP_UPSCALE, // nearest interpolate
         LM_GGML_OP_PAD,
         LM_GGML_OP_ARANGE,
@@ -508,6 +517,7 @@ extern "C" {
         LM_GGML_OP_WIN_UNPART,
         LM_GGML_OP_GET_REL_POS,
         LM_GGML_OP_ADD_REL_POS,
+        LM_GGML_OP_RWKV_WKV,
         LM_GGML_OP_UNARY,
@@ -542,6 +552,7 @@ extern "C" {
         LM_GGML_UNARY_OP_SILU,
         LM_GGML_UNARY_OP_HARDSWISH,
         LM_GGML_UNARY_OP_HARDSIGMOID,
+        LM_GGML_UNARY_OP_EXP,
         LM_GGML_UNARY_OP_COUNT,
     };
@@ -565,23 +576,9 @@ extern "C" {
         LM_GGML_TENSOR_FLAG_PARAM  = 4,
     };
-    // ggml object
-    struct lm_ggml_object {
-        size_t offs;
-        size_t size;
-        struct lm_ggml_object * next;
-        enum lm_ggml_object_type type;
-        char padding[4];
-    };
-    static const size_t LM_GGML_OBJECT_SIZE = sizeof(struct lm_ggml_object);
     // n-dimensional tensor
     struct lm_ggml_tensor {
-        enum lm_ggml_type         type;
+        enum lm_ggml_type type;
         LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
@@ -624,6 +621,29 @@ extern "C" {
     // If it returns true, the computation is aborted
     typedef bool (*lm_ggml_abort_callback)(void * data);
+    // Scheduling priorities
+    enum lm_ggml_sched_priority {
+        LM_GGML_SCHED_PRIO_NORMAL,
+        LM_GGML_SCHED_PRIO_MEDIUM,
+        LM_GGML_SCHED_PRIO_HIGH,
+        LM_GGML_SCHED_PRIO_REALTIME
+    };
+    // Threadpool params
+    // Use lm_ggml_threadpool_params_default() or lm_ggml_threadpool_params_init() to populate the defaults
+    struct lm_ggml_threadpool_params {
+        bool                cpumask[LM_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
+        int                 n_threads;                   // number of threads
+        enum lm_ggml_sched_priority prio;                   // thread priority
+        uint32_t            poll;                        // polling level (0 - no polling, 100 - aggressive polling)
+        bool                strict_cpu;                  // strict cpu placement
+        bool                paused;                      // start in paused state
+    };
+    struct lm_ggml_threadpool;     // forward declaration, see ggml.c
+    typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
     // the compute plan that needs to be prepared for lm_ggml_graph_compute()
     // since https://github.com/ggerganov/ggml/issues/287
     struct lm_ggml_cplan {
@@ -631,41 +651,13 @@ extern "C" {
         uint8_t * work_data; // work buffer, to be allocated by caller before calling to `lm_ggml_graph_compute()`
         int n_threads;
+        struct lm_ggml_threadpool * threadpool;
         // abort lm_ggml_graph_compute when true
         lm_ggml_abort_callback abort_callback;
         void *              abort_callback_data;
     };
-    enum lm_ggml_cgraph_eval_order {
-        LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
-        LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
-        LM_GGML_CGRAPH_EVAL_ORDER_COUNT
-    };
-    typedef uint32_t lm_ggml_bitset_t;
-    struct lm_ggml_hash_set {
-        size_t size;
-        lm_ggml_bitset_t * used;
-        struct lm_ggml_tensor ** keys;
-    };
-    // computation graph
-    struct lm_ggml_cgraph {
-        int size;
-        int n_nodes;
-        int n_leafs;
-        struct lm_ggml_tensor ** nodes;
-        struct lm_ggml_tensor ** grads;
-        struct lm_ggml_tensor ** leafs;
-        struct lm_ggml_hash_set visited_hash_set;
-        enum lm_ggml_cgraph_eval_order order;
-    };
     // scratch buffer
     struct lm_ggml_scratch {
         size_t offs;
@@ -969,6 +961,22 @@ extern "C" {
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin_inplace(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos_inplace(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
     // return scalar
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_sum(
             struct lm_ggml_context * ctx,
@@ -1119,6 +1127,14 @@ extern "C" {
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp_inplace(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a);
     // normalize along rows
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_norm(
             struct lm_ggml_context * ctx,
@@ -1214,7 +1230,7 @@ extern "C" {
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
-            size_t                offset);
+            size_t                offset); // in bytes
     // b -> view(a,offset,nb1,nb2,3), return view(a)
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_inplace(
@@ -1224,19 +1240,19 @@ extern "C" {
             size_t                nb1,
             size_t                nb2,
             size_t                nb3,
-            size_t                offset);
+            size_t                offset); // in bytes
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d(
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b,
-            size_t                offset);
+            size_t                offset); // in bytes
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d_inplace(
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b,
-            size_t                offset);
+            size_t                offset); // in bytes
     // b -> view(a,offset,nb1,nb2,3), return modified a
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d(
@@ -1244,7 +1260,7 @@ extern "C" {
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b,
             size_t                nb1,
-            size_t                offset);
+            size_t                offset); // in bytes
     // b -> view(a,offset,nb1,nb2,3), return view(a)
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d_inplace(
@@ -1252,7 +1268,7 @@ extern "C" {
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b,
             size_t                nb1,
-            size_t                offset);
+            size_t                offset); // in bytes
     // a -> b, return view(b)
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_cpy(
@@ -1566,34 +1582,49 @@ extern "C" {
             float                 min,
             float                 max);
+    // im2col
+    // converts data into a format that effectively results in a convolution when combined with matrix multiplication
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
-            int                  s0,
-            int                  s1,
-            int                  p0,
-            int                  p1,
-            int                  d0,
-            int                  d1,
-            bool                 is_2D,
-            enum lm_ggml_type       dst_type);
+            struct lm_ggml_tensor  * a,  // convolution kernel
+            struct lm_ggml_tensor  * b,  // data
+            int                   s0, // stride dimension 0
+            int                   s1, // stride dimension 1
+            int                   p0, // padding dimension 0
+            int                   p1, // padding dimension 1
+            int                   d0, // dilation dimension 0
+            int                   d1, // dilation dimension 1
+            bool                  is_2D,
+            enum lm_ggml_type        dst_type);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col_back(
+        struct lm_ggml_context * ctx,
+        struct lm_ggml_tensor  * a,  // convolution kernel
+        struct lm_ggml_tensor  * b,  // gradient of im2col output
+        int64_t             * ne, // shape of im2col input
+        int                   s0, // stride dimension 0
+        int                   s1, // stride dimension 1
+        int                   p0, // padding dimension 0
+        int                   p1, // padding dimension 1
+        int                   d0, // dilation dimension 0
+        int                   d1, // dilation dimension 1
+        bool                  is_2D);
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
-            int                  s0,
-            int                  s1,
-            int                  p0,
-            int                  p1,
-            int                  d0,
-            int                  d1);
+            struct lm_ggml_tensor  * a,  // convolution kernel
+            struct lm_ggml_tensor  * b,  // data
+            int                  s0,  // stride dimension 0
+            int                  s1,  // stride dimension 1
+            int                  p0,  // padding dimension 0
+            int                  p1,  // padding dimension 1
+            int                  d0,  // dilation dimension 0
+            int                  d1); // dilation dimension 1
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
+            struct lm_ggml_tensor  * a,   // convolution kernel
+            struct lm_ggml_tensor  * b,   // data
             int                   s0,  // stride
             int                   p0,  // padding
             int                   d0); // dilation
@@ -1602,29 +1633,29 @@ extern "C" {
     // alias for lm_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
     LM_GGML_API struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
-            int                   s,
-            int                   d);
+            struct lm_ggml_tensor  * a,  // convolution kernel
+            struct lm_ggml_tensor  * b,  // data
+            int                   s,  // stride
+            int                   d); // dilation
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
-            int                   s0,
-            int                   p0,
-            int                   d0);
+            struct lm_ggml_tensor  * a,   // convolution kernel
+            struct lm_ggml_tensor  * b,   // data
+            int                   s0,  // stride
+            int                   p0,  // padding
+            int                   d0); // dilation
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d(
             struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,
-            struct lm_ggml_tensor  * b,
-            int                   s0,
-            int                   s1,
-            int                   p0,
-            int                   p1,
-            int                   d0,
-            int                   d1);
+            struct lm_ggml_tensor  * a,   // convolution kernel
+            struct lm_ggml_tensor  * b,   // data
+            int                   s0,  // stride dimension 0
+            int                   s1,  // stride dimension 1
+            int                   p0,  // padding dimension 0
+            int                   p1,  // padding dimension 1
+            int                   d0,  // dilation dimension 0
+            int                   d1); // dilation dimension 1
     // kernel size is a->ne[0] x a->ne[1]
@@ -1686,6 +1717,18 @@ extern "C" {
             float                 p0,
             float                 p1);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_pool_2d_back(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,
+            struct lm_ggml_tensor  * af, // "a"/input used in forward pass
+            enum lm_ggml_op_pool     op,
+            int                   k0,
+            int                   k1,
+            int                   s0,
+            int                   s1,
+            float                 p0,
+            float                 p1);
     // nearest interpolate
     // multiplies ne0 and ne1 by scale factor
     // used in stable-diffusion
@@ -1840,6 +1883,15 @@ extern "C" {
             struct lm_ggml_tensor  * pw,
             struct lm_ggml_tensor  * ph);
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_rwkv_wkv(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * k,
+            struct lm_ggml_tensor  * v,
+            struct lm_ggml_tensor  * r,
+            struct lm_ggml_tensor  * tf,
+            struct lm_ggml_tensor  * td,
+            struct lm_ggml_tensor  * state);
     // custom operators
     typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1923,8 +1975,6 @@ extern "C" {
     typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
     typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
-    #define LM_GGML_N_TASKS_MAX -1
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1(
             struct lm_ggml_context   * ctx,
             struct lm_ggml_tensor    * a,
@@ -1994,26 +2044,44 @@ extern "C" {
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * tensor);
     LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
     LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool keep);
     // graph allocation in a context
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph         (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom  (struct lm_ggml_context * ctx, size_t size, bool grads);
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup         (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
-    LM_GGML_API struct lm_ggml_cgraph   lm_ggml_graph_view        (struct lm_ggml_cgraph * cgraph, int i0, int i1);
-    LM_GGML_API void                 lm_ggml_graph_cpy         (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
-    LM_GGML_API void                 lm_ggml_graph_reset       (struct lm_ggml_cgraph * cgraph);  // zero grads
-    LM_GGML_API void                 lm_ggml_graph_clear       (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph       (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup       (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API void                 lm_ggml_graph_cpy       (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
+    LM_GGML_API void                 lm_ggml_graph_reset     (struct lm_ggml_cgraph * cgraph);  // zero grads
+    LM_GGML_API void                 lm_ggml_graph_clear     (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API int                   lm_ggml_graph_size   (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API struct lm_ggml_tensor *  lm_ggml_graph_node   (struct lm_ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
+    LM_GGML_API struct lm_ggml_tensor ** lm_ggml_graph_nodes  (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API int                   lm_ggml_graph_n_nodes(struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API void   lm_ggml_graph_add_node(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
     LM_GGML_API size_t lm_ggml_graph_overhead(void);
     LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
+    LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
+    LM_GGML_API void                          lm_ggml_threadpool_params_init   (struct lm_ggml_threadpool_params * p, int n_threads);
+    LM_GGML_API bool                          lm_ggml_threadpool_params_match  (const struct lm_ggml_threadpool_params * p0, const struct lm_ggml_threadpool_params * p1);
+    LM_GGML_API struct lm_ggml_threadpool *      lm_ggml_threadpool_new          (struct lm_ggml_threadpool_params  * params);
+    LM_GGML_API void                          lm_ggml_threadpool_free         (struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API int                           lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API void                          lm_ggml_threadpool_pause        (struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API void                          lm_ggml_threadpool_resume       (struct lm_ggml_threadpool * threadpool);
     // lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
     // when plan.work_size > 0, caller must allocate memory for plan.work_data
-    LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan   (const struct lm_ggml_cgraph * cgraph, int n_threads /*= LM_GGML_DEFAULT_N_THREADS*/);
-    LM_GGML_API enum lm_ggml_status  lm_ggml_graph_compute(      struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
+    LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan(
+                  const struct lm_ggml_cgraph * cgraph,
+                                       int   n_threads, /* = LM_GGML_DEFAULT_N_THREADS */
+                    struct lm_ggml_threadpool * threadpool /* = NULL */ );
+    LM_GGML_API enum lm_ggml_status  lm_ggml_graph_compute(struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
     // same as lm_ggml_graph_compute() but the work data is allocated as a part of the context
     // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
     LM_GGML_API enum lm_ggml_status  lm_ggml_graph_compute_with_ctx(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, int n_threads);
@@ -2402,6 +2470,7 @@ extern "C" {
     LM_GGML_API int lm_ggml_cpu_has_gpublas    (void);
     LM_GGML_API int lm_ggml_cpu_has_sse3       (void);
     LM_GGML_API int lm_ggml_cpu_has_ssse3      (void);
+    LM_GGML_API int lm_ggml_cpu_has_riscv_v    (void);
     LM_GGML_API int lm_ggml_cpu_has_sycl       (void);
     LM_GGML_API int lm_ggml_cpu_has_rpc        (void);
     LM_GGML_API int lm_ggml_cpu_has_vsx        (void);