cui-llama.rn 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/android/src/main/CMakeLists.txt +5 -7
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +9 -9
  4. package/cpp/common.cpp +21 -40
  5. package/cpp/common.h +21 -12
  6. package/cpp/ggml-backend-impl.h +38 -20
  7. package/cpp/ggml-backend-reg.cpp +216 -87
  8. package/cpp/ggml-backend.h +1 -0
  9. package/cpp/ggml-common.h +42 -48
  10. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +591 -152
  11. package/cpp/ggml-cpu-aarch64.h +2 -26
  12. package/cpp/ggml-cpu-traits.cpp +36 -0
  13. package/cpp/ggml-cpu-traits.h +38 -0
  14. package/cpp/ggml-cpu.c +14122 -13971
  15. package/cpp/ggml-cpu.cpp +618 -715
  16. package/cpp/ggml-cpu.h +0 -17
  17. package/cpp/ggml-impl.h +6 -6
  18. package/cpp/ggml-metal.m +482 -24
  19. package/cpp/ggml-quants.c +0 -9
  20. package/cpp/ggml-threading.h +4 -2
  21. package/cpp/ggml.c +132 -43
  22. package/cpp/ggml.h +44 -13
  23. package/cpp/llama-sampling.cpp +35 -90
  24. package/cpp/llama-vocab.cpp +2 -1
  25. package/cpp/llama.cpp +737 -233
  26. package/cpp/llama.h +20 -16
  27. package/cpp/sampling.cpp +11 -16
  28. package/cpp/speculative.cpp +4 -0
  29. package/cpp/unicode.cpp +51 -51
  30. package/cpp/unicode.h +9 -10
  31. package/lib/commonjs/index.js +38 -1
  32. package/lib/commonjs/index.js.map +1 -1
  33. package/lib/module/index.js +36 -0
  34. package/lib/module/index.js.map +1 -1
  35. package/lib/typescript/NativeRNLlama.d.ts +2 -3
  36. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  37. package/lib/typescript/index.d.ts +36 -2
  38. package/lib/typescript/index.d.ts.map +1 -1
  39. package/package.json +1 -1
  40. package/src/NativeRNLlama.ts +3 -3
  41. package/src/index.ts +46 -2
  42. package/cpp/amx/amx.cpp +0 -196
  43. package/cpp/amx/amx.h +0 -20
  44. package/cpp/amx/common.h +0 -101
  45. package/cpp/amx/mmq.cpp +0 -2524
  46. package/cpp/amx/mmq.h +0 -16
  47. package/cpp/ggml-aarch64.c +0 -129
  48. package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml-cpu.h CHANGED
@@ -103,24 +103,14 @@ extern "C" {
103
103
 
104
104
  // Internal types and functions exposed for tests and benchmarks
105
105
 
106
- typedef void (*lm_ggml_from_float_to_mat_t)
107
- (const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
108
106
  typedef void (*lm_ggml_vec_dot_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x, size_t bx,
109
107
  const void * LM_GGML_RESTRICT y, size_t by, int nrc);
110
- typedef void (*lm_ggml_gemv_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x,
111
- const void * LM_GGML_RESTRICT y, int nr, int nc);
112
- typedef void (*lm_ggml_gemm_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x,
113
- const void * LM_GGML_RESTRICT y, int nr, int nc);
114
108
 
115
109
  struct lm_ggml_type_traits_cpu {
116
110
  lm_ggml_from_float_t from_float;
117
- lm_ggml_from_float_to_mat_t from_float_to_mat;
118
111
  lm_ggml_vec_dot_t vec_dot;
119
112
  enum lm_ggml_type vec_dot_type;
120
113
  int64_t nrows; // number of rows to process simultaneously
121
- int64_t ncols; // number of columns to process simultaneously
122
- lm_ggml_gemv_t gemv;
123
- lm_ggml_gemm_t gemm;
124
114
  };
125
115
 
126
116
  LM_GGML_BACKEND_API const struct lm_ggml_type_traits_cpu * lm_ggml_get_type_traits_cpu(enum lm_ggml_type type);
@@ -140,13 +130,6 @@ extern "C" {
140
130
 
141
131
  LM_GGML_BACKEND_API lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void);
142
132
 
143
- #ifdef LM_GGML_USE_CPU_HBM
144
- LM_GGML_BACKEND_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_hbm_buffer_type(void);
145
- #endif
146
-
147
- LM_GGML_BACKEND_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_aarch64_buffer_type(void);
148
- LM_GGML_BACKEND_API bool lm_ggml_backend_cpu_buft_is_aarch64(lm_ggml_backend_buffer_type_t buft);
149
-
150
133
  #ifdef __cplusplus
151
134
  }
152
135
  #endif
package/cpp/ggml-impl.h CHANGED
@@ -74,8 +74,8 @@ static inline int lm_ggml_up(int n, int m) {
74
74
  //
75
75
 
76
76
  LM_GGML_ATTRIBUTE_FORMAT(2, 3)
77
- void lm_ggml_log_internal (enum lm_ggml_log_level level, const char * format, ...);
78
- void lm_ggml_log_callback_default(enum lm_ggml_log_level level, const char * text, void * user_data);
77
+ LM_GGML_API void lm_ggml_log_internal (enum lm_ggml_log_level level, const char * format, ...);
78
+ LM_GGML_API void lm_ggml_log_callback_default(enum lm_ggml_log_level level, const char * text, void * user_data);
79
79
 
80
80
  #define LM_GGML_LOG(...) lm_ggml_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
81
81
  #define LM_GGML_LOG_INFO(...) lm_ggml_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
@@ -304,20 +304,20 @@ struct lm_ggml_cgraph lm_ggml_graph_view(struct lm_ggml_cgraph * cgraph, int i0,
304
304
 
305
305
  // Memory allocation
306
306
 
307
- void * lm_ggml_aligned_malloc(size_t size);
308
- void lm_ggml_aligned_free(void * ptr, size_t size);
307
+ LM_GGML_API void * lm_ggml_aligned_malloc(size_t size);
308
+ LM_GGML_API void lm_ggml_aligned_free(void * ptr, size_t size);
309
309
 
310
310
  // FP16 to FP32 conversion
311
311
 
312
312
  #if defined(__ARM_NEON)
313
- #ifdef _MSC_VER
313
+ #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
314
314
  typedef uint16_t lm_ggml_fp16_internal_t;
315
315
  #else
316
316
  typedef __fp16 lm_ggml_fp16_internal_t;
317
317
  #endif
318
318
  #endif
319
319
 
320
- #if defined(__ARM_NEON) && !defined(_MSC_VER)
320
+ #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
321
321
  #define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
322
322
  #define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
323
323