cui-llama.rn 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +5 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +21 -40
- package/cpp/common.h +21 -12
- package/cpp/ggml-backend-impl.h +38 -20
- package/cpp/ggml-backend-reg.cpp +216 -87
- package/cpp/ggml-backend.h +1 -0
- package/cpp/ggml-common.h +42 -48
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +591 -152
- package/cpp/ggml-cpu-aarch64.h +2 -26
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14122 -13971
- package/cpp/ggml-cpu.cpp +618 -715
- package/cpp/ggml-cpu.h +0 -17
- package/cpp/ggml-impl.h +6 -6
- package/cpp/ggml-metal.m +482 -24
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml.c +132 -43
- package/cpp/ggml.h +44 -13
- package/cpp/llama-sampling.cpp +35 -90
- package/cpp/llama-vocab.cpp +2 -1
- package/cpp/llama.cpp +737 -233
- package/cpp/llama.h +20 -16
- package/cpp/sampling.cpp +11 -16
- package/cpp/speculative.cpp +4 -0
- package/cpp/unicode.cpp +51 -51
- package/cpp/unicode.h +9 -10
- package/lib/commonjs/index.js +38 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +36 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -3
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +36 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +3 -3
- package/src/index.ts +46 -2
- package/cpp/amx/amx.cpp +0 -196
- package/cpp/amx/amx.h +0 -20
- package/cpp/amx/common.h +0 -101
- package/cpp/amx/mmq.cpp +0 -2524
- package/cpp/amx/mmq.h +0 -16
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml-cpu.h
CHANGED
@@ -103,24 +103,14 @@ extern "C" {
|
|
103
103
|
|
104
104
|
// Internal types and functions exposed for tests and benchmarks
|
105
105
|
|
106
|
-
typedef void (*lm_ggml_from_float_to_mat_t)
|
107
|
-
(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
|
108
106
|
typedef void (*lm_ggml_vec_dot_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x, size_t bx,
|
109
107
|
const void * LM_GGML_RESTRICT y, size_t by, int nrc);
|
110
|
-
typedef void (*lm_ggml_gemv_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x,
|
111
|
-
const void * LM_GGML_RESTRICT y, int nr, int nc);
|
112
|
-
typedef void (*lm_ggml_gemm_t) (int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT x,
|
113
|
-
const void * LM_GGML_RESTRICT y, int nr, int nc);
|
114
108
|
|
115
109
|
struct lm_ggml_type_traits_cpu {
|
116
110
|
lm_ggml_from_float_t from_float;
|
117
|
-
lm_ggml_from_float_to_mat_t from_float_to_mat;
|
118
111
|
lm_ggml_vec_dot_t vec_dot;
|
119
112
|
enum lm_ggml_type vec_dot_type;
|
120
113
|
int64_t nrows; // number of rows to process simultaneously
|
121
|
-
int64_t ncols; // number of columns to process simultaneously
|
122
|
-
lm_ggml_gemv_t gemv;
|
123
|
-
lm_ggml_gemm_t gemm;
|
124
114
|
};
|
125
115
|
|
126
116
|
LM_GGML_BACKEND_API const struct lm_ggml_type_traits_cpu * lm_ggml_get_type_traits_cpu(enum lm_ggml_type type);
|
@@ -140,13 +130,6 @@ extern "C" {
|
|
140
130
|
|
141
131
|
LM_GGML_BACKEND_API lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void);
|
142
132
|
|
143
|
-
#ifdef LM_GGML_USE_CPU_HBM
|
144
|
-
LM_GGML_BACKEND_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_hbm_buffer_type(void);
|
145
|
-
#endif
|
146
|
-
|
147
|
-
LM_GGML_BACKEND_API lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_aarch64_buffer_type(void);
|
148
|
-
LM_GGML_BACKEND_API bool lm_ggml_backend_cpu_buft_is_aarch64(lm_ggml_backend_buffer_type_t buft);
|
149
|
-
|
150
133
|
#ifdef __cplusplus
|
151
134
|
}
|
152
135
|
#endif
|
package/cpp/ggml-impl.h
CHANGED
@@ -74,8 +74,8 @@ static inline int lm_ggml_up(int n, int m) {
|
|
74
74
|
//
|
75
75
|
|
76
76
|
LM_GGML_ATTRIBUTE_FORMAT(2, 3)
|
77
|
-
void lm_ggml_log_internal (enum lm_ggml_log_level level, const char * format, ...);
|
78
|
-
void lm_ggml_log_callback_default(enum lm_ggml_log_level level, const char * text, void * user_data);
|
77
|
+
LM_GGML_API void lm_ggml_log_internal (enum lm_ggml_log_level level, const char * format, ...);
|
78
|
+
LM_GGML_API void lm_ggml_log_callback_default(enum lm_ggml_log_level level, const char * text, void * user_data);
|
79
79
|
|
80
80
|
#define LM_GGML_LOG(...) lm_ggml_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
81
81
|
#define LM_GGML_LOG_INFO(...) lm_ggml_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
@@ -304,20 +304,20 @@ struct lm_ggml_cgraph lm_ggml_graph_view(struct lm_ggml_cgraph * cgraph, int i0,
|
|
304
304
|
|
305
305
|
// Memory allocation
|
306
306
|
|
307
|
-
void * lm_ggml_aligned_malloc(size_t size);
|
308
|
-
void lm_ggml_aligned_free(void * ptr, size_t size);
|
307
|
+
LM_GGML_API void * lm_ggml_aligned_malloc(size_t size);
|
308
|
+
LM_GGML_API void lm_ggml_aligned_free(void * ptr, size_t size);
|
309
309
|
|
310
310
|
// FP16 to FP32 conversion
|
311
311
|
|
312
312
|
#if defined(__ARM_NEON)
|
313
|
-
#
|
313
|
+
#if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
314
314
|
typedef uint16_t lm_ggml_fp16_internal_t;
|
315
315
|
#else
|
316
316
|
typedef __fp16 lm_ggml_fp16_internal_t;
|
317
317
|
#endif
|
318
318
|
#endif
|
319
319
|
|
320
|
-
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
320
|
+
#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
321
321
|
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
322
322
|
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
|
323
323
|
|