cui-llama.rn 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +3 -4
  2. package/android/src/main/CMakeLists.txt +21 -5
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +115 -30
  4. package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
  5. package/android/src/main/jni.cpp +222 -36
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
  8. package/cpp/common.cpp +1682 -2122
  9. package/cpp/common.h +600 -594
  10. package/cpp/ggml-aarch64.c +129 -3209
  11. package/cpp/ggml-aarch64.h +19 -39
  12. package/cpp/ggml-alloc.c +1040 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +216 -227
  15. package/cpp/ggml-backend-reg.cpp +195 -0
  16. package/cpp/ggml-backend.cpp +1997 -2625
  17. package/cpp/ggml-backend.h +328 -326
  18. package/cpp/ggml-common.h +1853 -1853
  19. package/cpp/ggml-cpp.h +38 -0
  20. package/cpp/ggml-cpu-aarch64.c +3560 -0
  21. package/cpp/ggml-cpu-aarch64.h +30 -0
  22. package/cpp/ggml-cpu-impl.h +371 -614
  23. package/cpp/ggml-cpu-quants.c +10822 -0
  24. package/cpp/ggml-cpu-quants.h +63 -0
  25. package/cpp/ggml-cpu.c +13975 -0
  26. package/cpp/ggml-cpu.cpp +663 -0
  27. package/cpp/ggml-cpu.h +177 -0
  28. package/cpp/ggml-impl.h +550 -209
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4294 -3819
  31. package/cpp/ggml-quants.c +5247 -15752
  32. package/cpp/ggml-quants.h +100 -147
  33. package/cpp/ggml-threading.cpp +12 -0
  34. package/cpp/ggml-threading.h +12 -0
  35. package/cpp/ggml.c +8180 -23464
  36. package/cpp/ggml.h +2411 -2562
  37. package/cpp/llama-grammar.cpp +1138 -1138
  38. package/cpp/llama-grammar.h +144 -144
  39. package/cpp/llama-impl.h +181 -181
  40. package/cpp/llama-sampling.cpp +2348 -2194
  41. package/cpp/llama-sampling.h +48 -30
  42. package/cpp/llama-vocab.cpp +1984 -1968
  43. package/cpp/llama-vocab.h +170 -165
  44. package/cpp/llama.cpp +22132 -21969
  45. package/cpp/llama.h +1253 -1253
  46. package/cpp/log.cpp +401 -401
  47. package/cpp/log.h +121 -121
  48. package/cpp/rn-llama.hpp +83 -19
  49. package/cpp/sampling.cpp +466 -458
  50. package/cpp/sgemm.cpp +1884 -1219
  51. package/ios/RNLlama.mm +43 -20
  52. package/ios/RNLlamaContext.h +9 -3
  53. package/ios/RNLlamaContext.mm +133 -33
  54. package/jest/mock.js +0 -1
  55. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  56. package/lib/commonjs/index.js +52 -15
  57. package/lib/commonjs/index.js.map +1 -1
  58. package/lib/module/NativeRNLlama.js.map +1 -1
  59. package/lib/module/index.js +51 -15
  60. package/lib/module/index.js.map +1 -1
  61. package/lib/typescript/NativeRNLlama.d.ts +29 -6
  62. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  63. package/lib/typescript/index.d.ts +12 -5
  64. package/lib/typescript/index.d.ts.map +1 -1
  65. package/package.json +1 -1
  66. package/src/NativeRNLlama.ts +41 -7
  67. package/src/index.ts +82 -27
  68. package/cpp/json-schema-to-grammar.cpp +0 -1045
  69. package/cpp/json-schema-to-grammar.h +0 -8
  70. package/cpp/json.hpp +0 -24766
@@ -0,0 +1,195 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-cpu.h"
4
+ #include "ggml-impl.h"
5
+ #include <cstring>
6
+ #include <vector>
7
+
8
+ // Backend registry
9
+
10
+ #ifdef LM_GGML_USE_CUDA
11
+ #include "ggml-cuda.h"
12
+ #endif
13
+
14
+ #ifdef LM_GGML_USE_METAL
15
+ #include "ggml-metal.h"
16
+ #endif
17
+
18
+ #ifdef LM_GGML_USE_SYCL
19
+ #include "ggml-sycl.h"
20
+ #endif
21
+
22
+ #ifdef LM_GGML_USE_VULKAN
23
+ #include "ggml-vulkan.h"
24
+ #endif
25
+
26
+ #ifdef LM_GGML_USE_BLAS
27
+ #include "ggml-blas.h"
28
+ #endif
29
+
30
+ #ifdef LM_GGML_USE_RPC
31
+ #include "ggml-rpc.h"
32
+ #endif
33
+
34
+ #ifdef LM_GGML_USE_AMX
35
+ # include "ggml-amx.h"
36
+ #endif
37
+
38
+ #ifdef LM_GGML_USE_CANN
39
+ #include "ggml-cann.h"
40
+ #endif
41
+
42
+ #ifdef LM_GGML_USE_KOMPUTE
43
+ #include "ggml-kompute.h"
44
+ #endif
45
+
46
+ struct lm_ggml_backend_registry {
47
+ std::vector<lm_ggml_backend_reg_t> backends;
48
+ std::vector<lm_ggml_backend_dev_t> devices;
49
+
50
+ lm_ggml_backend_registry() {
51
+ #ifdef LM_GGML_USE_CUDA
52
+ register_backend(lm_ggml_backend_cuda_reg());
53
+ #endif
54
+ #ifdef LM_GGML_USE_METAL
55
+ register_backend(lm_ggml_backend_metal_reg());
56
+ #endif
57
+ #ifdef LM_GGML_USE_SYCL
58
+ register_backend(lm_ggml_backend_sycl_reg());
59
+ #endif
60
+ #ifdef LM_GGML_USE_VULKAN
61
+ register_backend(lm_ggml_backend_vk_reg());
62
+ #endif
63
+ #ifdef LM_GGML_USE_CANN
64
+ register_backend(lm_ggml_backend_cann_reg());
65
+ #endif
66
+ #ifdef LM_GGML_USE_BLAS
67
+ register_backend(lm_ggml_backend_blas_reg());
68
+ #endif
69
+ #ifdef LM_GGML_USE_RPC
70
+ register_backend(lm_ggml_backend_rpc_reg());
71
+ #endif
72
+ #ifdef LM_GGML_USE_AMX
73
+ register_backend(lm_ggml_backend_amx_reg());
74
+ #endif
75
+ #ifdef LM_GGML_USE_KOMPUTE
76
+ register_backend(lm_ggml_backend_kompute_reg());
77
+ #endif
78
+
79
+ register_backend(lm_ggml_backend_cpu_reg());
80
+ }
81
+
82
+ void register_backend(lm_ggml_backend_reg_t reg) {
83
+ if (!reg) {
84
+ return;
85
+ }
86
+
87
+ #ifndef NDEBUG
88
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
90
+ #endif
91
+ backends.push_back(reg);
92
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
93
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
94
+ }
95
+ }
96
+
97
+ void register_device(lm_ggml_backend_dev_t device) {
98
+ #ifndef NDEBUG
99
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
100
+ #endif
101
+ devices.push_back(device);
102
+ }
103
+ };
104
+
105
+ static lm_ggml_backend_registry & get_reg() {
106
+ static lm_ggml_backend_registry reg;
107
+ return reg;
108
+ }
109
+
110
+ // Internal API
111
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
112
+ get_reg().register_backend(reg);
113
+ }
114
+
115
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
116
+ get_reg().register_device(device);
117
+ }
118
+
119
+ // Backend (reg) enumeration
120
+ size_t lm_ggml_backend_reg_count() {
121
+ return get_reg().backends.size();
122
+ }
123
+
124
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
125
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
126
+ return get_reg().backends[index];
127
+ }
128
+
129
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
130
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
131
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
132
+ if (std::strcmp(lm_ggml_backend_reg_name(reg), name) == 0) {
133
+ return reg;
134
+ }
135
+ }
136
+ return NULL;
137
+ }
138
+
139
+ // Device enumeration
140
+ size_t lm_ggml_backend_dev_count() {
141
+ return get_reg().devices.size();
142
+ }
143
+
144
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
145
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
146
+ return get_reg().devices[index];
147
+ }
148
+
149
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
150
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
151
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
152
+ if (strcmp(lm_ggml_backend_dev_name(dev), name) == 0) {
153
+ return dev;
154
+ }
155
+ }
156
+ return NULL;
157
+ }
158
+
159
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
160
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
161
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
162
+ if (lm_ggml_backend_dev_type(dev) == type) {
163
+ return dev;
164
+ }
165
+ }
166
+ return NULL;
167
+ }
168
+
169
+ // Convenience functions
170
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
171
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
172
+ if (!dev) {
173
+ return NULL;
174
+ }
175
+ return lm_ggml_backend_dev_init(dev, params);
176
+ }
177
+
178
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
179
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
180
+ if (!dev) {
181
+ return NULL;
182
+ }
183
+ return lm_ggml_backend_dev_init(dev, params);
184
+ }
185
+
186
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
187
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
188
+ if (!dev) {
189
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
190
+ }
191
+ if (!dev) {
192
+ return NULL;
193
+ }
194
+ return lm_ggml_backend_dev_init(dev, NULL);
195
+ }