cui-llama.rn 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/android/src/main/CMakeLists.txt +14 -8
  2. package/android/src/main/jni.cpp +38 -37
  3. package/cpp/common.cpp +50 -30
  4. package/cpp/common.h +32 -13
  5. package/cpp/ggml-alloc.c +0 -1
  6. package/cpp/ggml-backend-reg.cpp +79 -49
  7. package/cpp/ggml-backend.cpp +5 -2
  8. package/cpp/ggml-cpp.h +1 -0
  9. package/cpp/ggml-cpu-aarch64.cpp +57 -72
  10. package/cpp/ggml-cpu-quants.c +5 -1
  11. package/cpp/ggml-cpu.c +6 -6
  12. package/cpp/ggml-cpu.cpp +9 -0
  13. package/cpp/ggml-impl.h +11 -0
  14. package/cpp/ggml-metal.m +2 -2
  15. package/cpp/ggml.c +129 -1388
  16. package/cpp/ggml.h +29 -152
  17. package/cpp/gguf.cpp +1325 -0
  18. package/cpp/gguf.h +202 -0
  19. package/cpp/llama-adapter.cpp +346 -0
  20. package/cpp/llama-adapter.h +73 -0
  21. package/cpp/llama-arch.cpp +1434 -0
  22. package/cpp/llama-arch.h +395 -0
  23. package/cpp/llama-batch.cpp +368 -0
  24. package/cpp/llama-batch.h +88 -0
  25. package/cpp/llama-chat.cpp +567 -0
  26. package/cpp/llama-chat.h +51 -0
  27. package/cpp/llama-context.cpp +1771 -0
  28. package/cpp/llama-context.h +128 -0
  29. package/cpp/llama-cparams.cpp +1 -0
  30. package/cpp/llama-cparams.h +37 -0
  31. package/cpp/llama-cpp.h +30 -0
  32. package/cpp/llama-grammar.cpp +16 -15
  33. package/cpp/llama-grammar.h +5 -6
  34. package/cpp/llama-hparams.cpp +71 -0
  35. package/cpp/llama-hparams.h +140 -0
  36. package/cpp/llama-impl.cpp +167 -0
  37. package/cpp/llama-impl.h +16 -136
  38. package/cpp/llama-kv-cache.cpp +718 -0
  39. package/cpp/llama-kv-cache.h +218 -0
  40. package/cpp/llama-mmap.cpp +589 -0
  41. package/cpp/llama-mmap.h +67 -0
  42. package/cpp/llama-model-loader.cpp +1011 -0
  43. package/cpp/llama-model-loader.h +158 -0
  44. package/cpp/llama-model.cpp +2202 -0
  45. package/cpp/llama-model.h +391 -0
  46. package/cpp/llama-sampling.cpp +117 -4
  47. package/cpp/llama-vocab.cpp +26 -29
  48. package/cpp/llama-vocab.h +14 -2
  49. package/cpp/llama.cpp +8839 -19131
  50. package/cpp/llama.cpp.rej +23 -0
  51. package/cpp/llama.h +31 -9
  52. package/cpp/rn-llama.hpp +39 -37
  53. package/cpp/sgemm.cpp +1091 -378
  54. package/cpp/sgemm.h +2 -2
  55. package/cpp/unicode.cpp +6 -0
  56. package/package.json +1 -1
package/cpp/ggml.h CHANGED
@@ -242,12 +242,6 @@
242
242
  #define LM_GGML_ROPE_TYPE_MROPE 8
243
243
  #define LM_GGML_ROPE_TYPE_VISION 24
244
244
 
245
- #define LM_GGUF_MAGIC "GGUF"
246
-
247
- #define LM_GGUF_VERSION 3
248
-
249
- #define LM_GGUF_DEFAULT_ALIGNMENT 32
250
-
251
245
  #define LM_GGML_UNUSED(x) (void)(x)
252
246
 
253
247
  #define LM_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -404,12 +398,6 @@ extern "C" {
404
398
  LM_GGML_PREC_F32,
405
399
  };
406
400
 
407
- enum lm_ggml_backend_type {
408
- LM_GGML_BACKEND_TYPE_CPU = 0,
409
- LM_GGML_BACKEND_TYPE_GPU = 10,
410
- LM_GGML_BACKEND_TYPE_GPU_SPLIT = 20,
411
- };
412
-
413
401
  // model file types
414
402
  enum lm_ggml_ftype {
415
403
  LM_GGML_FTYPE_UNKNOWN = -1,
@@ -588,8 +576,6 @@ extern "C" {
588
576
  struct lm_ggml_tensor {
589
577
  enum lm_ggml_type type;
590
578
 
591
- LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
592
-
593
579
  struct lm_ggml_backend_buffer * buffer;
594
580
 
595
581
  int64_t ne[LM_GGML_MAX_DIMS]; // number of elements
@@ -1565,17 +1551,6 @@ extern "C" {
1565
1551
  int d1, // dilation dimension 1
1566
1552
  bool is_2D);
1567
1553
 
1568
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
1569
- struct lm_ggml_context * ctx,
1570
- struct lm_ggml_tensor * a, // convolution kernel
1571
- struct lm_ggml_tensor * b, // data
1572
- int s0, // stride dimension 0
1573
- int s1, // stride dimension 1
1574
- int p0, // padding dimension 0
1575
- int p1, // padding dimension 1
1576
- int d0, // dilation dimension 0
1577
- int d1); // dilation dimension 1
1578
-
1579
1554
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
1580
1555
  struct lm_ggml_context * ctx,
1581
1556
  struct lm_ggml_tensor * a, // convolution kernel
@@ -1593,6 +1568,23 @@ extern "C" {
1593
1568
  int s, // stride
1594
1569
  int d); // dilation
1595
1570
 
1571
+ // depthwise
1572
+ // TODO: this is very likely wrong for some cases! - needs more testing
1573
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
1574
+ struct lm_ggml_context * ctx,
1575
+ struct lm_ggml_tensor * a, // convolution kernel
1576
+ struct lm_ggml_tensor * b, // data
1577
+ int s0, // stride
1578
+ int p0, // padding
1579
+ int d0); // dilation
1580
+
1581
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
1582
+ struct lm_ggml_context * ctx,
1583
+ struct lm_ggml_tensor * a, // convolution kernel
1584
+ struct lm_ggml_tensor * b, // data
1585
+ int s0, // stride
1586
+ int d0); // dilation
1587
+
1596
1588
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
1597
1589
  struct lm_ggml_context * ctx,
1598
1590
  struct lm_ggml_tensor * a, // convolution kernel
@@ -1612,7 +1604,6 @@ extern "C" {
1612
1604
  int d0, // dilation dimension 0
1613
1605
  int d1); // dilation dimension 1
1614
1606
 
1615
-
1616
1607
  // kernel size is a->ne[0] x a->ne[1]
1617
1608
  // stride is equal to kernel size
1618
1609
  // padding is zero
@@ -1639,6 +1630,18 @@ extern "C" {
1639
1630
  struct lm_ggml_tensor * a,
1640
1631
  struct lm_ggml_tensor * b);
1641
1632
 
1633
+ // depthwise
1634
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
1635
+ struct lm_ggml_context * ctx,
1636
+ struct lm_ggml_tensor * a, // convolution kernel
1637
+ struct lm_ggml_tensor * b, // data
1638
+ int s0, // stride dimension 0
1639
+ int s1, // stride dimension 1
1640
+ int p0, // padding dimension 0
1641
+ int p1, // padding dimension 1
1642
+ int d0, // dilation dimension 0
1643
+ int d1); // dilation dimension 1
1644
+
1642
1645
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
1643
1646
  struct lm_ggml_context * ctx,
1644
1647
  struct lm_ggml_tensor * a,
@@ -2095,132 +2098,6 @@ extern "C" {
2095
2098
  int64_t n_per_row,
2096
2099
  const float * imatrix);
2097
2100
 
2098
- //
2099
- // gguf
2100
- //
2101
-
2102
- enum lm_gguf_type {
2103
- LM_GGUF_TYPE_UINT8 = 0,
2104
- LM_GGUF_TYPE_INT8 = 1,
2105
- LM_GGUF_TYPE_UINT16 = 2,
2106
- LM_GGUF_TYPE_INT16 = 3,
2107
- LM_GGUF_TYPE_UINT32 = 4,
2108
- LM_GGUF_TYPE_INT32 = 5,
2109
- LM_GGUF_TYPE_FLOAT32 = 6,
2110
- LM_GGUF_TYPE_BOOL = 7,
2111
- LM_GGUF_TYPE_STRING = 8,
2112
- LM_GGUF_TYPE_ARRAY = 9,
2113
- LM_GGUF_TYPE_UINT64 = 10,
2114
- LM_GGUF_TYPE_INT64 = 11,
2115
- LM_GGUF_TYPE_FLOAT64 = 12,
2116
- LM_GGUF_TYPE_COUNT, // marks the end of the enum
2117
- };
2118
-
2119
- struct lm_gguf_context;
2120
-
2121
- struct lm_gguf_init_params {
2122
- bool no_alloc;
2123
-
2124
- // if not NULL, create a lm_ggml_context and allocate the tensor data in it
2125
- struct lm_ggml_context ** ctx;
2126
- };
2127
-
2128
- LM_GGML_API struct lm_gguf_context * lm_gguf_init_empty(void);
2129
- LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params);
2130
- //LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_buffer(..);
2131
-
2132
- LM_GGML_API void lm_gguf_free(struct lm_gguf_context * ctx);
2133
-
2134
- LM_GGML_API const char * lm_gguf_type_name(enum lm_gguf_type type);
2135
-
2136
- LM_GGML_API int lm_gguf_get_version (const struct lm_gguf_context * ctx);
2137
- LM_GGML_API size_t lm_gguf_get_alignment (const struct lm_gguf_context * ctx);
2138
- LM_GGML_API size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx);
2139
- LM_GGML_API void * lm_gguf_get_data (const struct lm_gguf_context * ctx);
2140
-
2141
- LM_GGML_API int lm_gguf_get_n_kv(const struct lm_gguf_context * ctx);
2142
- LM_GGML_API int lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key);
2143
- LM_GGML_API const char * lm_gguf_get_key (const struct lm_gguf_context * ctx, int key_id);
2144
-
2145
- LM_GGML_API enum lm_gguf_type lm_gguf_get_kv_type (const struct lm_gguf_context * ctx, int key_id);
2146
- LM_GGML_API enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id);
2147
-
2148
- // will abort if the wrong type is used for the key
2149
- LM_GGML_API uint8_t lm_gguf_get_val_u8 (const struct lm_gguf_context * ctx, int key_id);
2150
- LM_GGML_API int8_t lm_gguf_get_val_i8 (const struct lm_gguf_context * ctx, int key_id);
2151
- LM_GGML_API uint16_t lm_gguf_get_val_u16 (const struct lm_gguf_context * ctx, int key_id);
2152
- LM_GGML_API int16_t lm_gguf_get_val_i16 (const struct lm_gguf_context * ctx, int key_id);
2153
- LM_GGML_API uint32_t lm_gguf_get_val_u32 (const struct lm_gguf_context * ctx, int key_id);
2154
- LM_GGML_API int32_t lm_gguf_get_val_i32 (const struct lm_gguf_context * ctx, int key_id);
2155
- LM_GGML_API float lm_gguf_get_val_f32 (const struct lm_gguf_context * ctx, int key_id);
2156
- LM_GGML_API uint64_t lm_gguf_get_val_u64 (const struct lm_gguf_context * ctx, int key_id);
2157
- LM_GGML_API int64_t lm_gguf_get_val_i64 (const struct lm_gguf_context * ctx, int key_id);
2158
- LM_GGML_API double lm_gguf_get_val_f64 (const struct lm_gguf_context * ctx, int key_id);
2159
- LM_GGML_API bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id);
2160
- LM_GGML_API const char * lm_gguf_get_val_str (const struct lm_gguf_context * ctx, int key_id);
2161
- LM_GGML_API const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int key_id);
2162
- LM_GGML_API int lm_gguf_get_arr_n (const struct lm_gguf_context * ctx, int key_id);
2163
- LM_GGML_API const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id);
2164
- LM_GGML_API const char * lm_gguf_get_arr_str (const struct lm_gguf_context * ctx, int key_id, int i);
2165
-
2166
- LM_GGML_API int lm_gguf_get_n_tensors (const struct lm_gguf_context * ctx);
2167
- LM_GGML_API int lm_gguf_find_tensor (const struct lm_gguf_context * ctx, const char * name);
2168
- LM_GGML_API size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i);
2169
- LM_GGML_API char * lm_gguf_get_tensor_name (const struct lm_gguf_context * ctx, int i);
2170
- LM_GGML_API enum lm_ggml_type lm_gguf_get_tensor_type (const struct lm_gguf_context * ctx, int i);
2171
-
2172
- // removes key if it exists
2173
- LM_GGML_API void lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key);
2174
-
2175
- // overrides existing values or adds a new one
2176
- LM_GGML_API void lm_gguf_set_val_u8 (struct lm_gguf_context * ctx, const char * key, uint8_t val);
2177
- LM_GGML_API void lm_gguf_set_val_i8 (struct lm_gguf_context * ctx, const char * key, int8_t val);
2178
- LM_GGML_API void lm_gguf_set_val_u16 (struct lm_gguf_context * ctx, const char * key, uint16_t val);
2179
- LM_GGML_API void lm_gguf_set_val_i16 (struct lm_gguf_context * ctx, const char * key, int16_t val);
2180
- LM_GGML_API void lm_gguf_set_val_u32 (struct lm_gguf_context * ctx, const char * key, uint32_t val);
2181
- LM_GGML_API void lm_gguf_set_val_i32 (struct lm_gguf_context * ctx, const char * key, int32_t val);
2182
- LM_GGML_API void lm_gguf_set_val_f32 (struct lm_gguf_context * ctx, const char * key, float val);
2183
- LM_GGML_API void lm_gguf_set_val_u64 (struct lm_gguf_context * ctx, const char * key, uint64_t val);
2184
- LM_GGML_API void lm_gguf_set_val_i64 (struct lm_gguf_context * ctx, const char * key, int64_t val);
2185
- LM_GGML_API void lm_gguf_set_val_f64 (struct lm_gguf_context * ctx, const char * key, double val);
2186
- LM_GGML_API void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val);
2187
- LM_GGML_API void lm_gguf_set_val_str (struct lm_gguf_context * ctx, const char * key, const char * val);
2188
- LM_GGML_API void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n);
2189
- LM_GGML_API void lm_gguf_set_arr_str (struct lm_gguf_context * ctx, const char * key, const char ** data, int n);
2190
-
2191
- // set or add KV pairs from another context
2192
- LM_GGML_API void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src);
2193
-
2194
- // manage tensor info
2195
- LM_GGML_API void lm_gguf_add_tensor(struct lm_gguf_context * ctx, const struct lm_ggml_tensor * tensor);
2196
- LM_GGML_API void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type);
2197
- LM_GGML_API void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size);
2198
-
2199
- // writing gguf files can be done in 2 ways:
2200
- //
2201
- // - write the entire lm_gguf_context to a binary file in a single pass:
2202
- //
2203
- // lm_gguf_write_to_file(ctx, fname);
2204
- //
2205
- // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2206
- //
2207
- // FILE * f = fopen(fname, "wb");
2208
- // fseek(f, lm_gguf_get_meta_size(ctx), SEEK_SET);
2209
- // fwrite(f, ...);
2210
- // void * data = lm_gguf_meta_get_meta_data(ctx);
2211
- // fseek(f, 0, SEEK_SET);
2212
- // fwrite(f, data, lm_gguf_get_meta_size(ctx));
2213
- // free(data);
2214
- // fclose(f);
2215
- //
2216
-
2217
- // write the entire context to a binary file
2218
- LM_GGML_API void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta);
2219
-
2220
- // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2221
- LM_GGML_API size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx);
2222
- LM_GGML_API void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data);
2223
-
2224
2101
  #ifdef __cplusplus
2225
2102
  // restrict not standard in C++
2226
2103
  # if defined(__GNUC__)