cui-llama.rn 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +1 -0
- package/android/src/main/jni.cpp +3 -4
- package/cpp/common.cpp +183 -1990
- package/cpp/common.h +101 -130
- package/cpp/ggml-impl.h +32 -0
- package/cpp/ggml-metal.m +38 -28
- package/cpp/ggml-quants.c +275 -84
- package/cpp/ggml.c +89 -35
- package/cpp/ggml.h +30 -67
- package/cpp/llama-impl.h +1 -0
- package/cpp/llama-sampling.cpp +218 -102
- package/cpp/llama.cpp +599 -120
- package/cpp/llama.h +33 -25
- package/cpp/log.cpp +401 -0
- package/cpp/log.h +85 -703
- package/cpp/rn-llama.hpp +9 -11
- package/cpp/sampling.cpp +12 -9
- package/cpp/sampling.h +4 -56
- package/cpp/sgemm.cpp +38 -0
- package/package.json +1 -1
package/cpp/ggml.h
CHANGED
@@ -358,6 +358,7 @@ extern "C" {
|
|
358
358
|
|
359
359
|
struct lm_ggml_object;
|
360
360
|
struct lm_ggml_context;
|
361
|
+
struct lm_ggml_cgraph;
|
361
362
|
|
362
363
|
// NOTE: always add types at the end of the enum to keep backward compatibility
|
363
364
|
enum lm_ggml_type {
|
@@ -563,10 +564,11 @@ extern "C" {
|
|
563
564
|
};
|
564
565
|
|
565
566
|
enum lm_ggml_log_level {
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
567
|
+
LM_GGML_LOG_LEVEL_NONE = 0,
|
568
|
+
LM_GGML_LOG_LEVEL_INFO = 1,
|
569
|
+
LM_GGML_LOG_LEVEL_WARN = 2,
|
570
|
+
LM_GGML_LOG_LEVEL_ERROR = 3,
|
571
|
+
LM_GGML_LOG_LEVEL_DEBUG = 4,
|
570
572
|
};
|
571
573
|
|
572
574
|
enum lm_ggml_tensor_flag {
|
@@ -575,23 +577,9 @@ extern "C" {
|
|
575
577
|
LM_GGML_TENSOR_FLAG_PARAM = 4,
|
576
578
|
};
|
577
579
|
|
578
|
-
// ggml object
|
579
|
-
struct lm_ggml_object {
|
580
|
-
size_t offs;
|
581
|
-
size_t size;
|
582
|
-
|
583
|
-
struct lm_ggml_object * next;
|
584
|
-
|
585
|
-
enum lm_ggml_object_type type;
|
586
|
-
|
587
|
-
char padding[4];
|
588
|
-
};
|
589
|
-
|
590
|
-
static const size_t LM_GGML_OBJECT_SIZE = sizeof(struct lm_ggml_object);
|
591
|
-
|
592
580
|
// n-dimensional tensor
|
593
581
|
struct lm_ggml_tensor {
|
594
|
-
enum lm_ggml_type
|
582
|
+
enum lm_ggml_type type;
|
595
583
|
|
596
584
|
LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
597
585
|
|
@@ -655,7 +643,7 @@ extern "C" {
|
|
655
643
|
|
656
644
|
struct lm_ggml_threadpool; // forward declaration, see ggml.c
|
657
645
|
|
658
|
-
typedef struct
|
646
|
+
typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
|
659
647
|
|
660
648
|
// the compute plan that needs to be prepared for lm_ggml_graph_compute()
|
661
649
|
// since https://github.com/ggerganov/ggml/issues/287
|
@@ -671,35 +659,6 @@ extern "C" {
|
|
671
659
|
void * abort_callback_data;
|
672
660
|
};
|
673
661
|
|
674
|
-
enum lm_ggml_cgraph_eval_order {
|
675
|
-
LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
676
|
-
LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
677
|
-
LM_GGML_CGRAPH_EVAL_ORDER_COUNT
|
678
|
-
};
|
679
|
-
|
680
|
-
typedef uint32_t lm_ggml_bitset_t;
|
681
|
-
|
682
|
-
struct lm_ggml_hash_set {
|
683
|
-
size_t size;
|
684
|
-
lm_ggml_bitset_t * used; // whether or not the keys are in use i.e. set
|
685
|
-
struct lm_ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if lm_ggml_bitset_get(used, i)
|
686
|
-
};
|
687
|
-
|
688
|
-
// computation graph
|
689
|
-
struct lm_ggml_cgraph {
|
690
|
-
int size;
|
691
|
-
int n_nodes;
|
692
|
-
int n_leafs;
|
693
|
-
|
694
|
-
struct lm_ggml_tensor ** nodes;
|
695
|
-
struct lm_ggml_tensor ** grads;
|
696
|
-
struct lm_ggml_tensor ** leafs;
|
697
|
-
|
698
|
-
struct lm_ggml_hash_set visited_hash_set;
|
699
|
-
|
700
|
-
enum lm_ggml_cgraph_eval_order order;
|
701
|
-
};
|
702
|
-
|
703
662
|
// scratch buffer
|
704
663
|
struct lm_ggml_scratch {
|
705
664
|
size_t offs;
|
@@ -2017,8 +1976,6 @@ extern "C" {
|
|
2017
1976
|
typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
|
2018
1977
|
typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
|
2019
1978
|
|
2020
|
-
#define LM_GGML_N_TASKS_MAX -1
|
2021
|
-
|
2022
1979
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1(
|
2023
1980
|
struct lm_ggml_context * ctx,
|
2024
1981
|
struct lm_ggml_tensor * a,
|
@@ -2088,30 +2045,35 @@ extern "C" {
|
|
2088
2045
|
struct lm_ggml_context * ctx,
|
2089
2046
|
struct lm_ggml_tensor * tensor);
|
2090
2047
|
|
2091
|
-
|
2092
2048
|
LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2093
2049
|
LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool keep);
|
2094
2050
|
|
2095
2051
|
// graph allocation in a context
|
2096
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph
|
2097
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom
|
2098
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup
|
2099
|
-
LM_GGML_API
|
2100
|
-
LM_GGML_API void
|
2101
|
-
LM_GGML_API void
|
2102
|
-
|
2052
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2053
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
|
2054
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
|
2055
|
+
LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
|
2056
|
+
LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // zero grads
|
2057
|
+
LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
|
2058
|
+
|
2059
|
+
LM_GGML_API int lm_ggml_graph_size (struct lm_ggml_cgraph * cgraph);
|
2060
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_node (struct lm_ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
2061
|
+
LM_GGML_API struct lm_ggml_tensor ** lm_ggml_graph_nodes (struct lm_ggml_cgraph * cgraph);
|
2062
|
+
LM_GGML_API int lm_ggml_graph_n_nodes(struct lm_ggml_cgraph * cgraph);
|
2063
|
+
|
2064
|
+
LM_GGML_API void lm_ggml_graph_add_node(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2103
2065
|
|
2104
2066
|
LM_GGML_API size_t lm_ggml_graph_overhead(void);
|
2105
2067
|
LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
|
2106
2068
|
|
2107
|
-
LM_GGML_API struct lm_ggml_threadpool_params
|
2108
|
-
LM_GGML_API void
|
2109
|
-
LM_GGML_API bool
|
2110
|
-
LM_GGML_API struct lm_ggml_threadpool*
|
2111
|
-
LM_GGML_API void
|
2112
|
-
LM_GGML_API int
|
2113
|
-
LM_GGML_API void
|
2114
|
-
LM_GGML_API void
|
2069
|
+
LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
|
2070
|
+
LM_GGML_API void lm_ggml_threadpool_params_init (struct lm_ggml_threadpool_params * p, int n_threads);
|
2071
|
+
LM_GGML_API bool lm_ggml_threadpool_params_match (const struct lm_ggml_threadpool_params * p0, const struct lm_ggml_threadpool_params * p1);
|
2072
|
+
LM_GGML_API struct lm_ggml_threadpool * lm_ggml_threadpool_new (struct lm_ggml_threadpool_params * params);
|
2073
|
+
LM_GGML_API void lm_ggml_threadpool_free (struct lm_ggml_threadpool * threadpool);
|
2074
|
+
LM_GGML_API int lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
|
2075
|
+
LM_GGML_API void lm_ggml_threadpool_pause (struct lm_ggml_threadpool * threadpool);
|
2076
|
+
LM_GGML_API void lm_ggml_threadpool_resume (struct lm_ggml_threadpool * threadpool);
|
2115
2077
|
|
2116
2078
|
// lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
|
2117
2079
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
@@ -2509,6 +2471,7 @@ extern "C" {
|
|
2509
2471
|
LM_GGML_API int lm_ggml_cpu_has_gpublas (void);
|
2510
2472
|
LM_GGML_API int lm_ggml_cpu_has_sse3 (void);
|
2511
2473
|
LM_GGML_API int lm_ggml_cpu_has_ssse3 (void);
|
2474
|
+
LM_GGML_API int lm_ggml_cpu_has_riscv_v (void);
|
2512
2475
|
LM_GGML_API int lm_ggml_cpu_has_sycl (void);
|
2513
2476
|
LM_GGML_API int lm_ggml_cpu_has_rpc (void);
|
2514
2477
|
LM_GGML_API int lm_ggml_cpu_has_vsx (void);
|
package/cpp/llama-impl.h
CHANGED
@@ -24,6 +24,7 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
|
24
24
|
void llama_log_internal (lm_ggml_log_level level, const char * format, ...);
|
25
25
|
void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
|
26
26
|
|
27
|
+
#define LLAMA_LOG(...) llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
27
28
|
#define LLAMA_LOG_INFO(...) llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
28
29
|
#define LLAMA_LOG_WARN(...) llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
|
29
30
|
#define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|