npm - cui-llama.rn - Versions diffs - 1.1.4 → 1.1.6 - Mend

cui-llama.rn 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/android/src/main/CMakeLists.txt +1 -0
package/android/src/main/jni.cpp +3 -4
package/cpp/common.cpp +183 -1990
package/cpp/common.h +101 -130
package/cpp/ggml-impl.h +32 -0
package/cpp/ggml-metal.m +38 -28
package/cpp/ggml-quants.c +275 -84
package/cpp/ggml.c +89 -35
package/cpp/ggml.h +30 -67
package/cpp/llama-impl.h +1 -0
package/cpp/llama-sampling.cpp +218 -102
package/cpp/llama.cpp +599 -120
package/cpp/llama.h +33 -25
package/cpp/log.cpp +401 -0
package/cpp/log.h +85 -703
package/cpp/rn-llama.hpp +9 -11
package/cpp/sampling.cpp +12 -9
package/cpp/sampling.h +4 -56
package/cpp/sgemm.cpp +38 -0
package/package.json +1 -1

package/cpp/ggml.h CHANGED Viewed

@@ -358,6 +358,7 @@ extern "C" {
     struct lm_ggml_object;
     struct lm_ggml_context;
+    struct lm_ggml_cgraph;
     // NOTE: always add types at the end of the enum to keep backward compatibility
     enum lm_ggml_type {
@@ -563,10 +564,11 @@ extern "C" {
     };
     enum lm_ggml_log_level {
-        LM_GGML_LOG_LEVEL_ERROR = 2,
-        LM_GGML_LOG_LEVEL_WARN  = 3,
-        LM_GGML_LOG_LEVEL_INFO  = 4,
-        LM_GGML_LOG_LEVEL_DEBUG = 5
+        LM_GGML_LOG_LEVEL_NONE  = 0,
+        LM_GGML_LOG_LEVEL_INFO  = 1,
+        LM_GGML_LOG_LEVEL_WARN  = 2,
+        LM_GGML_LOG_LEVEL_ERROR = 3,
+        LM_GGML_LOG_LEVEL_DEBUG = 4,
     };
     enum lm_ggml_tensor_flag {
@@ -575,23 +577,9 @@ extern "C" {
         LM_GGML_TENSOR_FLAG_PARAM  = 4,
     };
-    // ggml object
-    struct lm_ggml_object {
-        size_t offs;
-        size_t size;
-        struct lm_ggml_object * next;
-        enum lm_ggml_object_type type;
-        char padding[4];
-    };
-    static const size_t LM_GGML_OBJECT_SIZE = sizeof(struct lm_ggml_object);
     // n-dimensional tensor
     struct lm_ggml_tensor {
-        enum lm_ggml_type         type;
+        enum lm_ggml_type type;
         LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
@@ -655,7 +643,7 @@ extern "C" {
     struct lm_ggml_threadpool;     // forward declaration, see ggml.c
-    typedef struct  lm_ggml_threadpool * lm_ggml_threadpool_t;
+    typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
     // the compute plan that needs to be prepared for lm_ggml_graph_compute()
     // since https://github.com/ggerganov/ggml/issues/287
@@ -671,35 +659,6 @@ extern "C" {
         void *              abort_callback_data;
     };
-    enum lm_ggml_cgraph_eval_order {
-        LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
-        LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
-        LM_GGML_CGRAPH_EVAL_ORDER_COUNT
-    };
-    typedef uint32_t lm_ggml_bitset_t;
-    struct lm_ggml_hash_set {
-        size_t size;
-        lm_ggml_bitset_t * used;       // whether or not the keys are in use i.e. set
-        struct lm_ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if lm_ggml_bitset_get(used, i)
-    };
-    // computation graph
-    struct lm_ggml_cgraph {
-        int size;
-        int n_nodes;
-        int n_leafs;
-        struct lm_ggml_tensor ** nodes;
-        struct lm_ggml_tensor ** grads;
-        struct lm_ggml_tensor ** leafs;
-        struct lm_ggml_hash_set visited_hash_set;
-        enum lm_ggml_cgraph_eval_order order;
-    };
     // scratch buffer
     struct lm_ggml_scratch {
         size_t offs;
@@ -2017,8 +1976,6 @@ extern "C" {
     typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
     typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
-    #define LM_GGML_N_TASKS_MAX -1
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1(
             struct lm_ggml_context   * ctx,
             struct lm_ggml_tensor    * a,
@@ -2088,30 +2045,35 @@ extern "C" {
             struct lm_ggml_context * ctx,
             struct lm_ggml_tensor  * tensor);
     LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
     LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool keep);
     // graph allocation in a context
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph         (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom  (struct lm_ggml_context * ctx, size_t size, bool grads);
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup         (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
-    LM_GGML_API struct lm_ggml_cgraph   lm_ggml_graph_view        (struct lm_ggml_cgraph * cgraph, int i0, int i1);
-    LM_GGML_API void                 lm_ggml_graph_cpy         (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
-    LM_GGML_API void                 lm_ggml_graph_reset       (struct lm_ggml_cgraph * cgraph);  // zero grads
-    LM_GGML_API void                 lm_ggml_graph_clear       (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph       (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
+    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup       (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API void                 lm_ggml_graph_cpy       (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
+    LM_GGML_API void                 lm_ggml_graph_reset     (struct lm_ggml_cgraph * cgraph);  // zero grads
+    LM_GGML_API void                 lm_ggml_graph_clear     (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API int                   lm_ggml_graph_size   (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API struct lm_ggml_tensor *  lm_ggml_graph_node   (struct lm_ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
+    LM_GGML_API struct lm_ggml_tensor ** lm_ggml_graph_nodes  (struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API int                   lm_ggml_graph_n_nodes(struct lm_ggml_cgraph * cgraph);
+    LM_GGML_API void   lm_ggml_graph_add_node(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
     LM_GGML_API size_t lm_ggml_graph_overhead(void);
     LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
-    LM_GGML_API struct lm_ggml_threadpool_params   lm_ggml_threadpool_params_default(int n_threads);
-    LM_GGML_API void                            lm_ggml_threadpool_params_init  (struct lm_ggml_threadpool_params *p, int n_threads);
-    LM_GGML_API bool                            lm_ggml_threadpool_params_match (const struct lm_ggml_threadpool_params *p0, const struct lm_ggml_threadpool_params *p1);
-    LM_GGML_API struct lm_ggml_threadpool*         lm_ggml_threadpool_new          (struct lm_ggml_threadpool_params  * params);
-    LM_GGML_API void                            lm_ggml_threadpool_free         (struct lm_ggml_threadpool * threadpool);
-    LM_GGML_API int                             lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
-    LM_GGML_API void                            lm_ggml_threadpool_pause        (struct lm_ggml_threadpool * threadpool);
-    LM_GGML_API void                            lm_ggml_threadpool_resume       (struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
+    LM_GGML_API void                          lm_ggml_threadpool_params_init   (struct lm_ggml_threadpool_params * p, int n_threads);
+    LM_GGML_API bool                          lm_ggml_threadpool_params_match  (const struct lm_ggml_threadpool_params * p0, const struct lm_ggml_threadpool_params * p1);
+    LM_GGML_API struct lm_ggml_threadpool *      lm_ggml_threadpool_new          (struct lm_ggml_threadpool_params  * params);
+    LM_GGML_API void                          lm_ggml_threadpool_free         (struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API int                           lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API void                          lm_ggml_threadpool_pause        (struct lm_ggml_threadpool * threadpool);
+    LM_GGML_API void                          lm_ggml_threadpool_resume       (struct lm_ggml_threadpool * threadpool);
     // lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
     // when plan.work_size > 0, caller must allocate memory for plan.work_data
@@ -2509,6 +2471,7 @@ extern "C" {
     LM_GGML_API int lm_ggml_cpu_has_gpublas    (void);
     LM_GGML_API int lm_ggml_cpu_has_sse3       (void);
     LM_GGML_API int lm_ggml_cpu_has_ssse3      (void);
+    LM_GGML_API int lm_ggml_cpu_has_riscv_v    (void);
     LM_GGML_API int lm_ggml_cpu_has_sycl       (void);
     LM_GGML_API int lm_ggml_cpu_has_rpc        (void);
     LM_GGML_API int lm_ggml_cpu_has_vsx        (void);

package/cpp/llama-impl.h CHANGED Viewed

@@ -24,6 +24,7 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
 void llama_log_internal        (lm_ggml_log_level level, const char * format, ...);
 void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
+#define LLAMA_LOG(...)       llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
 #define LLAMA_LOG_INFO(...)  llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
 #define LLAMA_LOG_WARN(...)  llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
 #define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)