npm - cui-llama.rn - Versions diffs - 1.3.4 → 1.3.6 - Mend

cui-llama.rn 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/android/src/main/CMakeLists.txt +14 -8
package/android/src/main/jni.cpp +38 -37
package/cpp/common.cpp +50 -30
package/cpp/common.h +32 -13
package/cpp/ggml-alloc.c +0 -1
package/cpp/ggml-backend-reg.cpp +79 -49
package/cpp/ggml-backend.cpp +5 -2
package/cpp/ggml-cpp.h +1 -0
package/cpp/ggml-cpu-aarch64.cpp +57 -72
package/cpp/ggml-cpu-quants.c +5 -1
package/cpp/ggml-cpu.c +6 -6
package/cpp/ggml-cpu.cpp +9 -0
package/cpp/ggml-impl.h +11 -0
package/cpp/ggml-metal.m +2 -2
package/cpp/ggml.c +129 -1388
package/cpp/ggml.h +29 -152
package/cpp/gguf.cpp +1325 -0
package/cpp/gguf.h +202 -0
package/cpp/llama-adapter.cpp +346 -0
package/cpp/llama-adapter.h +73 -0
package/cpp/llama-arch.cpp +1434 -0
package/cpp/llama-arch.h +395 -0
package/cpp/llama-batch.cpp +368 -0
package/cpp/llama-batch.h +88 -0
package/cpp/llama-chat.cpp +567 -0
package/cpp/llama-chat.h +51 -0
package/cpp/llama-context.cpp +1771 -0
package/cpp/llama-context.h +128 -0
package/cpp/llama-cparams.cpp +1 -0
package/cpp/llama-cparams.h +37 -0
package/cpp/llama-cpp.h +30 -0
package/cpp/llama-grammar.cpp +16 -15
package/cpp/llama-grammar.h +5 -6
package/cpp/llama-hparams.cpp +71 -0
package/cpp/llama-hparams.h +140 -0
package/cpp/llama-impl.cpp +167 -0
package/cpp/llama-impl.h +16 -136
package/cpp/llama-kv-cache.cpp +718 -0
package/cpp/llama-kv-cache.h +218 -0
package/cpp/llama-mmap.cpp +589 -0
package/cpp/llama-mmap.h +67 -0
package/cpp/llama-model-loader.cpp +1011 -0
package/cpp/llama-model-loader.h +158 -0
package/cpp/llama-model.cpp +2202 -0
package/cpp/llama-model.h +391 -0
package/cpp/llama-sampling.cpp +117 -4
package/cpp/llama-vocab.cpp +26 -29
package/cpp/llama-vocab.h +14 -2
package/cpp/llama.cpp +8839 -19131
package/cpp/llama.cpp.rej +23 -0
package/cpp/llama.h +31 -9
package/cpp/rn-llama.hpp +39 -37
package/cpp/sgemm.cpp +1091 -378
package/cpp/sgemm.h +2 -2
package/cpp/unicode.cpp +6 -0
package/package.json +1 -1

package/cpp/ggml.h CHANGED Viewed

@@ -242,12 +242,6 @@
 #define LM_GGML_ROPE_TYPE_MROPE  8
 #define LM_GGML_ROPE_TYPE_VISION 24
-#define LM_GGUF_MAGIC "GGUF"
-#define LM_GGUF_VERSION 3
-#define LM_GGUF_DEFAULT_ALIGNMENT 32
 #define LM_GGML_UNUSED(x) (void)(x)
 #define LM_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -404,12 +398,6 @@ extern "C" {
         LM_GGML_PREC_F32,
     };
-    enum lm_ggml_backend_type {
-        LM_GGML_BACKEND_TYPE_CPU = 0,
-        LM_GGML_BACKEND_TYPE_GPU = 10,
-        LM_GGML_BACKEND_TYPE_GPU_SPLIT = 20,
-    };
     // model file types
     enum lm_ggml_ftype {
         LM_GGML_FTYPE_UNKNOWN        = -1,
@@ -588,8 +576,6 @@ extern "C" {
     struct lm_ggml_tensor {
         enum lm_ggml_type type;
-        LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
         struct lm_ggml_backend_buffer * buffer;
         int64_t ne[LM_GGML_MAX_DIMS]; // number of elements
@@ -1565,17 +1551,6 @@ extern "C" {
         int                   d1, // dilation dimension 1
         bool                  is_2D);
-    LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
-            struct lm_ggml_context * ctx,
-            struct lm_ggml_tensor  * a,  // convolution kernel
-            struct lm_ggml_tensor  * b,  // data
-            int                  s0,  // stride dimension 0
-            int                  s1,  // stride dimension 1
-            int                  p0,  // padding dimension 0
-            int                  p1,  // padding dimension 1
-            int                  d0,  // dilation dimension 0
-            int                  d1); // dilation dimension 1
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a,   // convolution kernel
@@ -1593,6 +1568,23 @@ extern "C" {
             int                   s,  // stride
             int                   d); // dilation
+    // depthwise
+    // TODO: this is very likely wrong for some cases! - needs more testing
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,   // convolution kernel
+            struct lm_ggml_tensor  * b,   // data
+            int                   s0,  // stride
+            int                   p0,  // padding
+            int                   d0); // dilation
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,   // convolution kernel
+            struct lm_ggml_tensor  * b,   // data
+            int                   s0,  // stride
+            int                   d0); // dilation
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a,   // convolution kernel
@@ -1612,7 +1604,6 @@ extern "C" {
             int                   d0,  // dilation dimension 0
             int                   d1); // dilation dimension 1
     // kernel size is a->ne[0] x a->ne[1]
     // stride is equal to kernel size
     // padding is zero
@@ -1639,6 +1630,18 @@ extern "C" {
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b);
+    // depthwise
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,  // convolution kernel
+            struct lm_ggml_tensor  * b,  // data
+            int                  s0,  // stride dimension 0
+            int                  s1,  // stride dimension 1
+            int                  p0,  // padding dimension 0
+            int                  p1,  // padding dimension 1
+            int                  d0,  // dilation dimension 0
+            int                  d1); // dilation dimension 1
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * a,
@@ -2095,132 +2098,6 @@ extern "C" {
                    int64_t   n_per_row,
                const float * imatrix);
-    //
-    // gguf
-    //
-    enum lm_gguf_type {
-        LM_GGUF_TYPE_UINT8   = 0,
-        LM_GGUF_TYPE_INT8    = 1,
-        LM_GGUF_TYPE_UINT16  = 2,
-        LM_GGUF_TYPE_INT16   = 3,
-        LM_GGUF_TYPE_UINT32  = 4,
-        LM_GGUF_TYPE_INT32   = 5,
-        LM_GGUF_TYPE_FLOAT32 = 6,
-        LM_GGUF_TYPE_BOOL    = 7,
-        LM_GGUF_TYPE_STRING  = 8,
-        LM_GGUF_TYPE_ARRAY   = 9,
-        LM_GGUF_TYPE_UINT64  = 10,
-        LM_GGUF_TYPE_INT64   = 11,
-        LM_GGUF_TYPE_FLOAT64 = 12,
-        LM_GGUF_TYPE_COUNT,       // marks the end of the enum
-    };
-    struct lm_gguf_context;
-    struct lm_gguf_init_params {
-        bool no_alloc;
-        // if not NULL, create a lm_ggml_context and allocate the tensor data in it
-        struct lm_ggml_context ** ctx;
-    };
-    LM_GGML_API struct lm_gguf_context * lm_gguf_init_empty(void);
-    LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params);
-    //LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_buffer(..);
-    LM_GGML_API void lm_gguf_free(struct lm_gguf_context * ctx);
-    LM_GGML_API const char * lm_gguf_type_name(enum lm_gguf_type type);
-    LM_GGML_API int    lm_gguf_get_version    (const struct lm_gguf_context * ctx);
-    LM_GGML_API size_t lm_gguf_get_alignment  (const struct lm_gguf_context * ctx);
-    LM_GGML_API size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx);
-    LM_GGML_API void * lm_gguf_get_data       (const struct lm_gguf_context * ctx);
-    LM_GGML_API int          lm_gguf_get_n_kv(const struct lm_gguf_context * ctx);
-    LM_GGML_API int          lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key);
-    LM_GGML_API const char * lm_gguf_get_key (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API enum lm_gguf_type lm_gguf_get_kv_type (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id);
-    // will abort if the wrong type is used for the key
-    LM_GGML_API uint8_t      lm_gguf_get_val_u8  (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API int8_t       lm_gguf_get_val_i8  (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API uint16_t     lm_gguf_get_val_u16 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API int16_t      lm_gguf_get_val_i16 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API uint32_t     lm_gguf_get_val_u32 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API int32_t      lm_gguf_get_val_i32 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API float        lm_gguf_get_val_f32 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API uint64_t     lm_gguf_get_val_u64 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API int64_t      lm_gguf_get_val_i64 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API double       lm_gguf_get_val_f64 (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API bool         lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API const char * lm_gguf_get_val_str (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API int          lm_gguf_get_arr_n   (const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id);
-    LM_GGML_API const char * lm_gguf_get_arr_str (const struct lm_gguf_context * ctx, int key_id, int i);
-    LM_GGML_API int            lm_gguf_get_n_tensors    (const struct lm_gguf_context * ctx);
-    LM_GGML_API int            lm_gguf_find_tensor      (const struct lm_gguf_context * ctx, const char * name);
-    LM_GGML_API size_t         lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i);
-    LM_GGML_API char *         lm_gguf_get_tensor_name  (const struct lm_gguf_context * ctx, int i);
-    LM_GGML_API enum lm_ggml_type lm_gguf_get_tensor_type  (const struct lm_gguf_context * ctx, int i);
-    // removes key if it exists
-    LM_GGML_API void lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key);
-    // overrides existing values or adds a new one
-    LM_GGML_API void lm_gguf_set_val_u8  (struct lm_gguf_context * ctx, const char * key, uint8_t  val);
-    LM_GGML_API void lm_gguf_set_val_i8  (struct lm_gguf_context * ctx, const char * key, int8_t   val);
-    LM_GGML_API void lm_gguf_set_val_u16 (struct lm_gguf_context * ctx, const char * key, uint16_t val);
-    LM_GGML_API void lm_gguf_set_val_i16 (struct lm_gguf_context * ctx, const char * key, int16_t  val);
-    LM_GGML_API void lm_gguf_set_val_u32 (struct lm_gguf_context * ctx, const char * key, uint32_t val);
-    LM_GGML_API void lm_gguf_set_val_i32 (struct lm_gguf_context * ctx, const char * key, int32_t  val);
-    LM_GGML_API void lm_gguf_set_val_f32 (struct lm_gguf_context * ctx, const char * key, float    val);
-    LM_GGML_API void lm_gguf_set_val_u64 (struct lm_gguf_context * ctx, const char * key, uint64_t val);
-    LM_GGML_API void lm_gguf_set_val_i64 (struct lm_gguf_context * ctx, const char * key, int64_t  val);
-    LM_GGML_API void lm_gguf_set_val_f64 (struct lm_gguf_context * ctx, const char * key, double   val);
-    LM_GGML_API void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool     val);
-    LM_GGML_API void lm_gguf_set_val_str (struct lm_gguf_context * ctx, const char * key, const char * val);
-    LM_GGML_API void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n);
-    LM_GGML_API void lm_gguf_set_arr_str (struct lm_gguf_context * ctx, const char * key, const char ** data, int n);
-    // set or add KV pairs from another context
-    LM_GGML_API void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src);
-    // manage tensor info
-    LM_GGML_API void lm_gguf_add_tensor(struct lm_gguf_context * ctx, const struct lm_ggml_tensor * tensor);
-    LM_GGML_API void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type);
-    LM_GGML_API void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size);
-    // writing gguf files can be done in 2 ways:
-    //
-    // - write the entire lm_gguf_context to a binary file in a single pass:
-    //
-    //   lm_gguf_write_to_file(ctx, fname);
-    //
-    // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
-    //
-    //   FILE * f = fopen(fname, "wb");
-    //   fseek(f, lm_gguf_get_meta_size(ctx), SEEK_SET);
-    //   fwrite(f, ...);
-    //   void * data = lm_gguf_meta_get_meta_data(ctx);
-    //   fseek(f, 0, SEEK_SET);
-    //   fwrite(f, data, lm_gguf_get_meta_size(ctx));
-    //   free(data);
-    //   fclose(f);
-    //
-    // write the entire context to a binary file
-    LM_GGML_API void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta);
-    // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
-    LM_GGML_API size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx);
-    LM_GGML_API void   lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data);
 #ifdef __cplusplus
     // restrict not standard in C++
 #    if defined(__GNUC__)