cui-llama.rn 1.4.6 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -2
- package/android/src/main/jni.cpp +52 -34
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-metal-impl.h +77 -3
- package/cpp/ggml-metal.m +794 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +29 -5
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +44 -53
- package/cpp/rn-llama.h +2 -12
- package/cpp/sampling.cpp +3 -0
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +5 -2
- package/ios/RNLlama.mm +2 -2
- package/ios/RNLlamaContext.mm +40 -24
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +6 -4
- package/src/index.ts +3 -1
- package/cpp/chat-template.hpp +0 -529
- package/cpp/minja.hpp +0 -2915
package/cpp/ggml-cpu-impl.h
CHANGED
@@ -4,13 +4,13 @@
|
|
4
4
|
|
5
5
|
#include "ggml.h"
|
6
6
|
#include "ggml-impl.h"
|
7
|
+
|
7
8
|
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
8
9
|
//#include <stddef.h>
|
9
10
|
#include <stdbool.h>
|
10
11
|
#include <string.h> // memcpy
|
11
12
|
#include <math.h> // fabsf
|
12
13
|
|
13
|
-
|
14
14
|
#ifdef __cplusplus
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
@@ -69,33 +69,16 @@ struct lm_ggml_compute_params {
|
|
69
69
|
#endif
|
70
70
|
|
71
71
|
#if defined(__ARM_FEATURE_SVE)
|
72
|
-
#include <arm_sve.h>
|
73
72
|
#include <sys/prctl.h>
|
74
73
|
#endif
|
75
74
|
|
76
|
-
// 16-bit float
|
77
|
-
// on Arm, we use __fp16
|
78
|
-
// on x86, we use uint16_t
|
79
75
|
#if defined(__ARM_NEON)
|
80
76
|
|
81
|
-
//
|
82
|
-
//
|
83
|
-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
84
|
-
//
|
85
|
-
#include <arm_neon.h>
|
86
|
-
|
77
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/5404
|
87
78
|
#ifdef _MSC_VER
|
88
|
-
|
89
|
-
typedef uint16_t lm_ggml_fp16_internal_t;
|
90
|
-
|
91
79
|
#define lm_ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
92
|
-
|
93
80
|
#else
|
94
|
-
|
95
|
-
typedef __fp16 lm_ggml_fp16_internal_t;
|
96
|
-
|
97
81
|
#define lm_ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
98
|
-
|
99
82
|
#endif // _MSC_VER
|
100
83
|
|
101
84
|
#if !defined(__aarch64__)
|
@@ -340,8 +323,6 @@ inline static int32x4_t lm_ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t
|
|
340
323
|
#else
|
341
324
|
#ifdef __POWER9_VECTOR__
|
342
325
|
#include <altivec.h>
|
343
|
-
#undef bool
|
344
|
-
#define bool _Bool
|
345
326
|
#else
|
346
327
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
347
328
|
#include <intrin.h>
|