cui-llama.rn 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/android/src/main/CMakeLists.txt +21 -5
- package/android/src/main/java/com/rnllama/LlamaContext.java +115 -30
- package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
- package/android/src/main/jni.cpp +222 -36
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/cpp/common.cpp +1682 -2122
- package/cpp/common.h +600 -594
- package/cpp/ggml-aarch64.c +129 -3209
- package/cpp/ggml-aarch64.h +19 -39
- package/cpp/ggml-alloc.c +1040 -1040
- package/cpp/ggml-alloc.h +76 -76
- package/cpp/ggml-backend-impl.h +216 -227
- package/cpp/ggml-backend-reg.cpp +195 -0
- package/cpp/ggml-backend.cpp +1997 -2625
- package/cpp/ggml-backend.h +328 -326
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu-aarch64.c +3560 -0
- package/cpp/ggml-cpu-aarch64.h +30 -0
- package/cpp/ggml-cpu-impl.h +371 -614
- package/cpp/ggml-cpu-quants.c +10822 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13975 -0
- package/cpp/ggml-cpu.cpp +663 -0
- package/cpp/ggml-cpu.h +177 -0
- package/cpp/ggml-impl.h +550 -209
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +4294 -3819
- package/cpp/ggml-quants.c +5247 -15752
- package/cpp/ggml-quants.h +100 -147
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml.c +8180 -23464
- package/cpp/ggml.h +2411 -2562
- package/cpp/llama-grammar.cpp +1138 -1138
- package/cpp/llama-grammar.h +144 -144
- package/cpp/llama-impl.h +181 -181
- package/cpp/llama-sampling.cpp +2348 -2194
- package/cpp/llama-sampling.h +48 -30
- package/cpp/llama-vocab.cpp +1984 -1968
- package/cpp/llama-vocab.h +170 -165
- package/cpp/llama.cpp +22132 -21969
- package/cpp/llama.h +1253 -1253
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +83 -19
- package/cpp/sampling.cpp +466 -458
- package/cpp/sgemm.cpp +1884 -1219
- package/ios/RNLlama.mm +43 -20
- package/ios/RNLlamaContext.h +9 -3
- package/ios/RNLlamaContext.mm +133 -33
- package/jest/mock.js +0 -1
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +52 -15
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +51 -15
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +29 -6
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +12 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +41 -7
- package/src/index.ts +82 -27
- package/cpp/json-schema-to-grammar.cpp +0 -1045
- package/cpp/json-schema-to-grammar.h +0 -8
- package/cpp/json.hpp +0 -24766
@@ -0,0 +1,195 @@
|
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
#include "ggml-backend.h"
|
3
|
+
#include "ggml-cpu.h"
|
4
|
+
#include "ggml-impl.h"
|
5
|
+
#include <cstring>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
// Backend registry
|
9
|
+
|
10
|
+
#ifdef LM_GGML_USE_CUDA
|
11
|
+
#include "ggml-cuda.h"
|
12
|
+
#endif
|
13
|
+
|
14
|
+
#ifdef LM_GGML_USE_METAL
|
15
|
+
#include "ggml-metal.h"
|
16
|
+
#endif
|
17
|
+
|
18
|
+
#ifdef LM_GGML_USE_SYCL
|
19
|
+
#include "ggml-sycl.h"
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#ifdef LM_GGML_USE_VULKAN
|
23
|
+
#include "ggml-vulkan.h"
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#ifdef LM_GGML_USE_BLAS
|
27
|
+
#include "ggml-blas.h"
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#ifdef LM_GGML_USE_RPC
|
31
|
+
#include "ggml-rpc.h"
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#ifdef LM_GGML_USE_AMX
|
35
|
+
# include "ggml-amx.h"
|
36
|
+
#endif
|
37
|
+
|
38
|
+
#ifdef LM_GGML_USE_CANN
|
39
|
+
#include "ggml-cann.h"
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#ifdef LM_GGML_USE_KOMPUTE
|
43
|
+
#include "ggml-kompute.h"
|
44
|
+
#endif
|
45
|
+
|
46
|
+
struct lm_ggml_backend_registry {
|
47
|
+
std::vector<lm_ggml_backend_reg_t> backends;
|
48
|
+
std::vector<lm_ggml_backend_dev_t> devices;
|
49
|
+
|
50
|
+
lm_ggml_backend_registry() {
|
51
|
+
#ifdef LM_GGML_USE_CUDA
|
52
|
+
register_backend(lm_ggml_backend_cuda_reg());
|
53
|
+
#endif
|
54
|
+
#ifdef LM_GGML_USE_METAL
|
55
|
+
register_backend(lm_ggml_backend_metal_reg());
|
56
|
+
#endif
|
57
|
+
#ifdef LM_GGML_USE_SYCL
|
58
|
+
register_backend(lm_ggml_backend_sycl_reg());
|
59
|
+
#endif
|
60
|
+
#ifdef LM_GGML_USE_VULKAN
|
61
|
+
register_backend(lm_ggml_backend_vk_reg());
|
62
|
+
#endif
|
63
|
+
#ifdef LM_GGML_USE_CANN
|
64
|
+
register_backend(lm_ggml_backend_cann_reg());
|
65
|
+
#endif
|
66
|
+
#ifdef LM_GGML_USE_BLAS
|
67
|
+
register_backend(lm_ggml_backend_blas_reg());
|
68
|
+
#endif
|
69
|
+
#ifdef LM_GGML_USE_RPC
|
70
|
+
register_backend(lm_ggml_backend_rpc_reg());
|
71
|
+
#endif
|
72
|
+
#ifdef LM_GGML_USE_AMX
|
73
|
+
register_backend(lm_ggml_backend_amx_reg());
|
74
|
+
#endif
|
75
|
+
#ifdef LM_GGML_USE_KOMPUTE
|
76
|
+
register_backend(lm_ggml_backend_kompute_reg());
|
77
|
+
#endif
|
78
|
+
|
79
|
+
register_backend(lm_ggml_backend_cpu_reg());
|
80
|
+
}
|
81
|
+
|
82
|
+
void register_backend(lm_ggml_backend_reg_t reg) {
|
83
|
+
if (!reg) {
|
84
|
+
return;
|
85
|
+
}
|
86
|
+
|
87
|
+
#ifndef NDEBUG
|
88
|
+
LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
89
|
+
__func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
|
90
|
+
#endif
|
91
|
+
backends.push_back(reg);
|
92
|
+
for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
|
93
|
+
register_device(lm_ggml_backend_reg_dev_get(reg, i));
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
void register_device(lm_ggml_backend_dev_t device) {
|
98
|
+
#ifndef NDEBUG
|
99
|
+
LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
|
100
|
+
#endif
|
101
|
+
devices.push_back(device);
|
102
|
+
}
|
103
|
+
};
|
104
|
+
|
105
|
+
static lm_ggml_backend_registry & get_reg() {
|
106
|
+
static lm_ggml_backend_registry reg;
|
107
|
+
return reg;
|
108
|
+
}
|
109
|
+
|
110
|
+
// Internal API
|
111
|
+
void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
|
112
|
+
get_reg().register_backend(reg);
|
113
|
+
}
|
114
|
+
|
115
|
+
void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
|
116
|
+
get_reg().register_device(device);
|
117
|
+
}
|
118
|
+
|
119
|
+
// Backend (reg) enumeration
|
120
|
+
size_t lm_ggml_backend_reg_count() {
|
121
|
+
return get_reg().backends.size();
|
122
|
+
}
|
123
|
+
|
124
|
+
lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
|
125
|
+
LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
|
126
|
+
return get_reg().backends[index];
|
127
|
+
}
|
128
|
+
|
129
|
+
lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
|
130
|
+
for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
|
131
|
+
lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
|
132
|
+
if (std::strcmp(lm_ggml_backend_reg_name(reg), name) == 0) {
|
133
|
+
return reg;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
return NULL;
|
137
|
+
}
|
138
|
+
|
139
|
+
// Device enumeration
|
140
|
+
size_t lm_ggml_backend_dev_count() {
|
141
|
+
return get_reg().devices.size();
|
142
|
+
}
|
143
|
+
|
144
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
|
145
|
+
LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
|
146
|
+
return get_reg().devices[index];
|
147
|
+
}
|
148
|
+
|
149
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
|
150
|
+
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
151
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
152
|
+
if (strcmp(lm_ggml_backend_dev_name(dev), name) == 0) {
|
153
|
+
return dev;
|
154
|
+
}
|
155
|
+
}
|
156
|
+
return NULL;
|
157
|
+
}
|
158
|
+
|
159
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
|
160
|
+
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
161
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
162
|
+
if (lm_ggml_backend_dev_type(dev) == type) {
|
163
|
+
return dev;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
return NULL;
|
167
|
+
}
|
168
|
+
|
169
|
+
// Convenience functions
|
170
|
+
lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
|
171
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
|
172
|
+
if (!dev) {
|
173
|
+
return NULL;
|
174
|
+
}
|
175
|
+
return lm_ggml_backend_dev_init(dev, params);
|
176
|
+
}
|
177
|
+
|
178
|
+
lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
|
179
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
|
180
|
+
if (!dev) {
|
181
|
+
return NULL;
|
182
|
+
}
|
183
|
+
return lm_ggml_backend_dev_init(dev, params);
|
184
|
+
}
|
185
|
+
|
186
|
+
lm_ggml_backend_t lm_ggml_backend_init_best(void) {
|
187
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
|
188
|
+
if (!dev) {
|
189
|
+
dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
190
|
+
}
|
191
|
+
if (!dev) {
|
192
|
+
return NULL;
|
193
|
+
}
|
194
|
+
return lm_ggml_backend_dev_init(dev, NULL);
|
195
|
+
}
|