cui-llama.rn 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/android/src/main/CMakeLists.txt +2 -2
- package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
- package/android/src/main/java/com/rnllama/RNLlama.java +39 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +5 -0
- package/cpp/common.cpp +36 -1
- package/cpp/common.h +5 -1
- package/cpp/ggml-aarch64.c +2 -11
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +151 -78
- package/cpp/{ggml-backend.c → ggml-backend.cpp} +565 -269
- package/cpp/ggml-backend.h +147 -62
- package/cpp/ggml-impl.h +15 -0
- package/cpp/ggml-metal.h +8 -9
- package/cpp/ggml-metal.m +2428 -2111
- package/cpp/ggml-quants.c +2 -2
- package/cpp/ggml-quants.h +0 -4
- package/cpp/ggml.c +799 -1121
- package/cpp/ggml.h +79 -72
- package/cpp/llama-vocab.cpp +189 -106
- package/cpp/llama-vocab.h +18 -9
- package/cpp/llama.cpp +736 -341
- package/cpp/llama.h +9 -4
- package/cpp/unicode-data.cpp +6 -4
- package/cpp/unicode-data.h +4 -4
- package/cpp/unicode.cpp +14 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +4 -0
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +3 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +6 -0
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +2 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +7 -0
- package/src/index.ts +5 -0
package/cpp/ggml.h
CHANGED
@@ -187,16 +187,6 @@
|
|
187
187
|
# define LM_GGML_API
|
188
188
|
#endif
|
189
189
|
|
190
|
-
#ifdef LM_GGML_MULTIPLATFORM
|
191
|
-
# if defined(_WIN32)
|
192
|
-
# define LM_GGML_CALL
|
193
|
-
# else
|
194
|
-
# define LM_GGML_CALL __attribute__((__ms_abi__))
|
195
|
-
# endif
|
196
|
-
#else
|
197
|
-
# define LM_GGML_CALL
|
198
|
-
#endif
|
199
|
-
|
200
190
|
// TODO: support for clang
|
201
191
|
#ifdef __GNUC__
|
202
192
|
# define LM_GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
@@ -230,14 +220,16 @@
|
|
230
220
|
#define LM_GGML_MAX_PARAMS 2048
|
231
221
|
#define LM_GGML_MAX_CONTEXTS 64
|
232
222
|
#define LM_GGML_MAX_SRC 10
|
233
|
-
#ifndef LM_GGML_MAX_NAME
|
234
|
-
#define LM_GGML_MAX_NAME 64
|
235
223
|
#define LM_GGML_MAX_N_THREADS 512
|
224
|
+
#define LM_GGML_MAX_OP_PARAMS 64
|
236
225
|
|
226
|
+
#ifndef LM_GGML_MAX_NAME
|
227
|
+
# define LM_GGML_MAX_NAME 64
|
237
228
|
#endif
|
238
|
-
|
229
|
+
|
239
230
|
#define LM_GGML_DEFAULT_N_THREADS 4
|
240
231
|
#define LM_GGML_DEFAULT_GRAPH_SIZE 2048
|
232
|
+
|
241
233
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
242
234
|
#define LM_GGML_MEM_ALIGN 4
|
243
235
|
#else
|
@@ -260,21 +252,21 @@
|
|
260
252
|
#define LM_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
261
253
|
|
262
254
|
#ifndef NDEBUG
|
263
|
-
#define LM_GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
255
|
+
# define LM_GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
264
256
|
#elif defined(__GNUC__)
|
265
|
-
#define LM_GGML_UNREACHABLE() __builtin_unreachable()
|
257
|
+
# define LM_GGML_UNREACHABLE() __builtin_unreachable()
|
266
258
|
#elif defined(_MSC_VER)
|
267
|
-
#define LM_GGML_UNREACHABLE() __assume(0)
|
259
|
+
# define LM_GGML_UNREACHABLE() __assume(0)
|
268
260
|
#else
|
269
|
-
#define LM_GGML_UNREACHABLE() ((void) 0)
|
261
|
+
# define LM_GGML_UNREACHABLE() ((void) 0)
|
270
262
|
#endif
|
271
263
|
|
272
264
|
#ifdef __cplusplus
|
273
|
-
#define LM_GGML_NORETURN [[noreturn]]
|
265
|
+
# define LM_GGML_NORETURN [[noreturn]]
|
274
266
|
#elif defined(_MSC_VER)
|
275
|
-
#define LM_GGML_NORETURN __declspec(noreturn)
|
267
|
+
# define LM_GGML_NORETURN __declspec(noreturn)
|
276
268
|
#else
|
277
|
-
#define LM_GGML_NORETURN _Noreturn
|
269
|
+
# define LM_GGML_NORETURN _Noreturn
|
278
270
|
#endif
|
279
271
|
|
280
272
|
#define LM_GGML_ABORT(...) lm_ggml_abort((strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__), __LINE__, __VA_ARGS__)
|
@@ -339,7 +331,7 @@ extern "C" {
|
|
339
331
|
};
|
340
332
|
|
341
333
|
// get lm_ggml_status name string
|
342
|
-
LM_GGML_API
|
334
|
+
LM_GGML_API const char * lm_ggml_status_to_string(enum lm_ggml_status status);
|
343
335
|
|
344
336
|
// ieee 754-2008 half-precision float16
|
345
337
|
// todo: make this not an integral type
|
@@ -465,6 +457,7 @@ extern "C" {
|
|
465
457
|
LM_GGML_OP_SUM_ROWS,
|
466
458
|
LM_GGML_OP_MEAN,
|
467
459
|
LM_GGML_OP_ARGMAX,
|
460
|
+
LM_GGML_OP_COUNT_EQUAL,
|
468
461
|
LM_GGML_OP_REPEAT,
|
469
462
|
LM_GGML_OP_REPEAT_BACK,
|
470
463
|
LM_GGML_OP_CONCAT,
|
@@ -576,10 +569,10 @@ extern "C" {
|
|
576
569
|
|
577
570
|
// this tensor...
|
578
571
|
enum lm_ggml_tensor_flag {
|
579
|
-
LM_GGML_TENSOR_FLAG_INPUT
|
580
|
-
LM_GGML_TENSOR_FLAG_OUTPUT
|
581
|
-
LM_GGML_TENSOR_FLAG_PARAM
|
582
|
-
LM_GGML_TENSOR_FLAG_LOSS
|
572
|
+
LM_GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
573
|
+
LM_GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
574
|
+
LM_GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
575
|
+
LM_GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
583
576
|
};
|
584
577
|
|
585
578
|
// n-dimensional tensor
|
@@ -715,46 +708,46 @@ extern "C" {
|
|
715
708
|
LM_GGML_API void lm_ggml_print_object (const struct lm_ggml_object * obj);
|
716
709
|
LM_GGML_API void lm_ggml_print_objects(const struct lm_ggml_context * ctx);
|
717
710
|
|
718
|
-
LM_GGML_API
|
719
|
-
LM_GGML_API
|
720
|
-
LM_GGML_API
|
721
|
-
LM_GGML_API
|
711
|
+
LM_GGML_API int64_t lm_ggml_nelements (const struct lm_ggml_tensor * tensor);
|
712
|
+
LM_GGML_API int64_t lm_ggml_nrows (const struct lm_ggml_tensor * tensor);
|
713
|
+
LM_GGML_API size_t lm_ggml_nbytes (const struct lm_ggml_tensor * tensor);
|
714
|
+
LM_GGML_API size_t lm_ggml_nbytes_pad(const struct lm_ggml_tensor * tensor); // same as lm_ggml_nbytes() but padded to LM_GGML_MEM_ALIGN
|
722
715
|
|
723
|
-
LM_GGML_API
|
724
|
-
LM_GGML_API
|
725
|
-
LM_GGML_API
|
716
|
+
LM_GGML_API int64_t lm_ggml_blck_size(enum lm_ggml_type type);
|
717
|
+
LM_GGML_API size_t lm_ggml_type_size(enum lm_ggml_type type); // size in bytes for all elements in a block
|
718
|
+
LM_GGML_API size_t lm_ggml_row_size (enum lm_ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
726
719
|
|
727
720
|
LM_GGML_DEPRECATED(
|
728
721
|
LM_GGML_API double lm_ggml_type_sizef(enum lm_ggml_type type), // lm_ggml_type_size()/lm_ggml_blck_size() as float
|
729
722
|
"use lm_ggml_row_size() instead");
|
730
723
|
|
731
|
-
LM_GGML_API
|
732
|
-
LM_GGML_API
|
733
|
-
LM_GGML_API
|
724
|
+
LM_GGML_API const char * lm_ggml_type_name(enum lm_ggml_type type);
|
725
|
+
LM_GGML_API const char * lm_ggml_op_name (enum lm_ggml_op op);
|
726
|
+
LM_GGML_API const char * lm_ggml_op_symbol(enum lm_ggml_op op);
|
734
727
|
|
735
|
-
LM_GGML_API
|
736
|
-
LM_GGML_API
|
728
|
+
LM_GGML_API const char * lm_ggml_unary_op_name(enum lm_ggml_unary_op op);
|
729
|
+
LM_GGML_API const char * lm_ggml_op_desc(const struct lm_ggml_tensor * t); // unary or op name
|
737
730
|
|
738
|
-
LM_GGML_API
|
731
|
+
LM_GGML_API size_t lm_ggml_element_size(const struct lm_ggml_tensor * tensor);
|
739
732
|
|
740
|
-
LM_GGML_API
|
733
|
+
LM_GGML_API bool lm_ggml_is_quantized(enum lm_ggml_type type);
|
741
734
|
|
742
735
|
// TODO: temporary until model loading of ggml examples is refactored
|
743
736
|
LM_GGML_API enum lm_ggml_type lm_ggml_ftype_to_lm_ggml_type(enum lm_ggml_ftype ftype);
|
744
737
|
|
745
|
-
LM_GGML_API
|
746
|
-
LM_GGML_API
|
747
|
-
LM_GGML_API
|
748
|
-
LM_GGML_API
|
749
|
-
LM_GGML_API
|
750
|
-
LM_GGML_API
|
751
|
-
LM_GGML_API
|
752
|
-
LM_GGML_API
|
738
|
+
LM_GGML_API bool lm_ggml_is_transposed(const struct lm_ggml_tensor * tensor);
|
739
|
+
LM_GGML_API bool lm_ggml_is_permuted (const struct lm_ggml_tensor * tensor);
|
740
|
+
LM_GGML_API bool lm_ggml_is_empty (const struct lm_ggml_tensor * tensor);
|
741
|
+
LM_GGML_API bool lm_ggml_is_scalar (const struct lm_ggml_tensor * tensor);
|
742
|
+
LM_GGML_API bool lm_ggml_is_vector (const struct lm_ggml_tensor * tensor);
|
743
|
+
LM_GGML_API bool lm_ggml_is_matrix (const struct lm_ggml_tensor * tensor);
|
744
|
+
LM_GGML_API bool lm_ggml_is_3d (const struct lm_ggml_tensor * tensor);
|
745
|
+
LM_GGML_API int lm_ggml_n_dims (const struct lm_ggml_tensor * tensor); // returns 1 for scalars
|
753
746
|
|
754
|
-
LM_GGML_API
|
755
|
-
LM_GGML_API
|
756
|
-
LM_GGML_API
|
757
|
-
LM_GGML_API
|
747
|
+
LM_GGML_API bool lm_ggml_is_contiguous (const struct lm_ggml_tensor * tensor);
|
748
|
+
LM_GGML_API bool lm_ggml_is_contiguous_0(const struct lm_ggml_tensor * tensor); // same as lm_ggml_is_contiguous()
|
749
|
+
LM_GGML_API bool lm_ggml_is_contiguous_1(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 1
|
750
|
+
LM_GGML_API bool lm_ggml_is_contiguous_2(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 2
|
758
751
|
|
759
752
|
LM_GGML_API bool lm_ggml_are_same_shape (const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
760
753
|
LM_GGML_API bool lm_ggml_are_same_stride(const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
@@ -846,7 +839,7 @@ extern "C" {
|
|
846
839
|
LM_GGML_API void * lm_ggml_get_data (const struct lm_ggml_tensor * tensor);
|
847
840
|
LM_GGML_API float * lm_ggml_get_data_f32(const struct lm_ggml_tensor * tensor);
|
848
841
|
|
849
|
-
LM_GGML_API
|
842
|
+
LM_GGML_API enum lm_ggml_unary_op lm_ggml_get_unary_op(const struct lm_ggml_tensor * tensor);
|
850
843
|
|
851
844
|
LM_GGML_API const char * lm_ggml_get_name (const struct lm_ggml_tensor * tensor);
|
852
845
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_name ( struct lm_ggml_tensor * tensor, const char * name);
|
@@ -1003,6 +996,12 @@ extern "C" {
|
|
1003
996
|
struct lm_ggml_context * ctx,
|
1004
997
|
struct lm_ggml_tensor * a);
|
1005
998
|
|
999
|
+
// count number of equal elements in a and b
|
1000
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_count_equal(
|
1001
|
+
struct lm_ggml_context * ctx,
|
1002
|
+
struct lm_ggml_tensor * a,
|
1003
|
+
struct lm_ggml_tensor * b);
|
1004
|
+
|
1006
1005
|
// if a is the same shape as b, and a is not parameter, return a
|
1007
1006
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
1008
1007
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat(
|
@@ -1409,14 +1408,14 @@ extern "C" {
|
|
1409
1408
|
// supports 3D: a->ne[2] == b->ne[1]
|
1410
1409
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_get_rows(
|
1411
1410
|
struct lm_ggml_context * ctx,
|
1412
|
-
struct lm_ggml_tensor * a,
|
1413
|
-
struct lm_ggml_tensor * b);
|
1411
|
+
struct lm_ggml_tensor * a, // data
|
1412
|
+
struct lm_ggml_tensor * b); // row indices
|
1414
1413
|
|
1415
1414
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_get_rows_back(
|
1416
1415
|
struct lm_ggml_context * ctx,
|
1417
|
-
struct lm_ggml_tensor * a,
|
1418
|
-
struct lm_ggml_tensor * b,
|
1419
|
-
struct lm_ggml_tensor * c);
|
1416
|
+
struct lm_ggml_tensor * a, // gradients of lm_ggml_get_rows result
|
1417
|
+
struct lm_ggml_tensor * b, // row indices
|
1418
|
+
struct lm_ggml_tensor * c); // data for lm_ggml_get_rows, only used for its shape
|
1420
1419
|
|
1421
1420
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_diag(
|
1422
1421
|
struct lm_ggml_context * ctx,
|
@@ -1560,16 +1559,16 @@ extern "C" {
|
|
1560
1559
|
"use lm_ggml_rope_ext_inplace instead");
|
1561
1560
|
|
1562
1561
|
// compute correction dims for YaRN RoPE scaling
|
1563
|
-
|
1562
|
+
void lm_ggml_rope_yarn_corr_dims(
|
1564
1563
|
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
1565
1564
|
|
1566
1565
|
// rotary position embedding backward, i.e compute dx from dy
|
1567
1566
|
// a - dy
|
1568
1567
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_rope_back(
|
1569
1568
|
struct lm_ggml_context * ctx,
|
1570
|
-
struct lm_ggml_tensor * a,
|
1571
|
-
struct lm_ggml_tensor * b,
|
1572
|
-
struct lm_ggml_tensor * c,
|
1569
|
+
struct lm_ggml_tensor * a, // gradients of lm_ggml_rope result
|
1570
|
+
struct lm_ggml_tensor * b, // positions
|
1571
|
+
struct lm_ggml_tensor * c, // freq factors
|
1573
1572
|
int n_dims,
|
1574
1573
|
int mode,
|
1575
1574
|
int n_ctx_orig,
|
@@ -2035,15 +2034,15 @@ extern "C" {
|
|
2035
2034
|
// loss function
|
2036
2035
|
|
2037
2036
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss(
|
2038
|
-
struct lm_ggml_context
|
2039
|
-
struct lm_ggml_tensor
|
2040
|
-
struct lm_ggml_tensor
|
2037
|
+
struct lm_ggml_context * ctx,
|
2038
|
+
struct lm_ggml_tensor * a, // logits
|
2039
|
+
struct lm_ggml_tensor * b); // labels
|
2041
2040
|
|
2042
2041
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss_back(
|
2043
|
-
struct lm_ggml_context
|
2044
|
-
struct lm_ggml_tensor
|
2045
|
-
struct lm_ggml_tensor
|
2046
|
-
struct lm_ggml_tensor
|
2042
|
+
struct lm_ggml_context * ctx,
|
2043
|
+
struct lm_ggml_tensor * a, // logits
|
2044
|
+
struct lm_ggml_tensor * b, // labels
|
2045
|
+
struct lm_ggml_tensor * c); // gradients of cross_entropy_loss result
|
2047
2046
|
|
2048
2047
|
// AdamW optimizer step
|
2049
2048
|
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
|
@@ -2051,6 +2050,7 @@ extern "C" {
|
|
2051
2050
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_step_adamw(
|
2052
2051
|
struct lm_ggml_context * ctx,
|
2053
2052
|
struct lm_ggml_tensor * a,
|
2053
|
+
struct lm_ggml_tensor * grad,
|
2054
2054
|
float alpha,
|
2055
2055
|
float beta1,
|
2056
2056
|
float beta2,
|
@@ -2065,7 +2065,7 @@ extern "C" {
|
|
2065
2065
|
LM_GGML_API void lm_ggml_set_loss(struct lm_ggml_tensor * tensor);
|
2066
2066
|
|
2067
2067
|
LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2068
|
-
LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool accumulate
|
2068
|
+
LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool accumulate);
|
2069
2069
|
|
2070
2070
|
LM_GGML_API void lm_ggml_build_opt_adamw(
|
2071
2071
|
struct lm_ggml_context * ctx,
|
@@ -2175,6 +2175,10 @@ extern "C" {
|
|
2175
2175
|
typedef void (*lm_ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
2176
2176
|
typedef void (*lm_ggml_log_callback)(enum lm_ggml_log_level level, const char * text, void * user_data);
|
2177
2177
|
|
2178
|
+
// Set callback for all future logging events.
|
2179
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
2180
|
+
LM_GGML_API void lm_ggml_log_set(lm_ggml_log_callback log_callback, void * user_data);
|
2181
|
+
|
2178
2182
|
// optimization parameters
|
2179
2183
|
//
|
2180
2184
|
// see ggml.c (lm_ggml_opt_default_params) for default values
|
@@ -2508,6 +2512,9 @@ extern "C" {
|
|
2508
2512
|
LM_GGML_API int lm_ggml_cpu_has_cann (void);
|
2509
2513
|
LM_GGML_API int lm_ggml_cpu_has_llamafile (void);
|
2510
2514
|
|
2515
|
+
// get the sve vector length in bytes
|
2516
|
+
LM_GGML_API int lm_ggml_cpu_get_sve_cnt(void);
|
2517
|
+
|
2511
2518
|
//
|
2512
2519
|
// Internal types and functions exposed for tests and benchmarks
|
2513
2520
|
//
|
@@ -2529,7 +2536,7 @@ extern "C" {
|
|
2529
2536
|
typedef void (*lm_ggml_gemm_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x,
|
2530
2537
|
const void * LM_GGML_RESTRICT y, int nr, int nc);
|
2531
2538
|
|
2532
|
-
|
2539
|
+
struct lm_ggml_type_traits {
|
2533
2540
|
const char * type_name;
|
2534
2541
|
int64_t blck_size;
|
2535
2542
|
int64_t blck_size_interleave; // interleave elements in blocks
|
@@ -2545,9 +2552,9 @@ extern "C" {
|
|
2545
2552
|
int64_t ncols; // number of columns to process simultaneously
|
2546
2553
|
lm_ggml_gemv_t gemv;
|
2547
2554
|
lm_ggml_gemm_t gemm;
|
2548
|
-
}
|
2555
|
+
};
|
2549
2556
|
|
2550
|
-
LM_GGML_API
|
2557
|
+
LM_GGML_API const struct lm_ggml_type_traits * lm_ggml_get_type_traits(enum lm_ggml_type type);
|
2551
2558
|
|
2552
2559
|
#ifdef __cplusplus
|
2553
2560
|
}
|