cui-llama.rn 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -23
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +13 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +27 -20
- package/android/src/main/java/com/rnllama/RNLlama.java +5 -1
- package/android/src/main/jni.cpp +15 -12
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/cpp/README.md +1 -1
- package/cpp/common.cpp +158 -267
- package/cpp/common.h +46 -12
- package/cpp/ggml-alloc.c +1042 -1037
- package/cpp/ggml-backend-impl.h +255 -256
- package/cpp/ggml-backend-reg.cpp +582 -582
- package/cpp/ggml-backend.cpp +2002 -2002
- package/cpp/ggml-backend.h +354 -352
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +386 -386
- package/cpp/ggml-cpu-quants.c +10920 -10839
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +329 -60
- package/cpp/ggml-cpu.cpp +10 -2
- package/cpp/ggml-cpu.h +135 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +17 -17
- package/cpp/ggml-metal.m +4884 -4884
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6514 -6448
- package/cpp/ggml.h +2194 -2163
- package/cpp/gguf.cpp +1329 -1325
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1045 -1045
- package/cpp/json-schema-to-grammar.h +8 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -346
- package/cpp/llama-adapter.h +74 -73
- package/cpp/llama-arch.cpp +1487 -1434
- package/cpp/llama-arch.h +400 -395
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +578 -567
- package/cpp/llama-chat.h +52 -51
- package/cpp/llama-context.cpp +1775 -1771
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1139 -1139
- package/cpp/llama-grammar.h +143 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -140
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +218 -218
- package/cpp/llama-mmap.cpp +2 -1
- package/cpp/llama-mmap.h +67 -67
- package/cpp/llama-model-loader.cpp +1124 -1011
- package/cpp/llama-model-loader.h +167 -158
- package/cpp/llama-model.cpp +3997 -2202
- package/cpp/llama-model.h +370 -391
- package/cpp/llama-sampling.cpp +2408 -2406
- package/cpp/llama-sampling.h +32 -48
- package/cpp/llama-vocab.cpp +3247 -1982
- package/cpp/llama-vocab.h +125 -182
- package/cpp/llama.cpp +416 -2886
- package/cpp/llama.h +1323 -1285
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.cpp +822 -0
- package/cpp/rn-llama.h +123 -0
- package/cpp/rn-llama.hpp +18 -12
- package/cpp/sampling.cpp +505 -500
- package/cpp/sgemm.cpp +2597 -2597
- package/cpp/speculative.cpp +277 -274
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +2 -3
- package/ios/CMakeLists.txt +99 -0
- package/ios/RNLlama.h +5 -1
- package/ios/RNLlama.mm +2 -2
- package/ios/RNLlamaContext.h +8 -1
- package/ios/RNLlamaContext.mm +15 -11
- package/ios/rnllama.xcframework/Info.plist +74 -0
- package/jest/mock.js +3 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +4 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +4 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +5 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +8 -2
- package/package.json +5 -2
- package/src/NativeRNLlama.ts +5 -1
- package/src/index.ts +9 -2
package/cpp/ggml-metal-impl.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef GGML_METAL_IMPL
|
2
|
+
#define GGML_METAL_IMPL
|
3
3
|
|
4
4
|
// kernel argument structs
|
5
5
|
//
|
@@ -34,7 +34,7 @@ typedef struct {
|
|
34
34
|
uint64_t nb2;
|
35
35
|
uint64_t nb3;
|
36
36
|
int32_t dim;
|
37
|
-
}
|
37
|
+
} ggml_metal_kargs_concat;
|
38
38
|
|
39
39
|
typedef struct {
|
40
40
|
int32_t ne00;
|
@@ -62,7 +62,7 @@ typedef struct {
|
|
62
62
|
uint64_t nb2;
|
63
63
|
uint64_t nb3;
|
64
64
|
uint64_t offs;
|
65
|
-
}
|
65
|
+
} ggml_metal_kargs_bin;
|
66
66
|
|
67
67
|
typedef struct {
|
68
68
|
int32_t ne00;
|
@@ -81,7 +81,7 @@ typedef struct {
|
|
81
81
|
uint64_t nb1;
|
82
82
|
uint64_t nb2;
|
83
83
|
uint64_t nb3;
|
84
|
-
}
|
84
|
+
} ggml_metal_kargs_repeat;
|
85
85
|
|
86
86
|
typedef struct {
|
87
87
|
int64_t ne00;
|
@@ -100,7 +100,7 @@ typedef struct {
|
|
100
100
|
uint64_t nb1;
|
101
101
|
uint64_t nb2;
|
102
102
|
uint64_t nb3;
|
103
|
-
}
|
103
|
+
} ggml_metal_kargs_cpy;
|
104
104
|
|
105
105
|
typedef struct {
|
106
106
|
int64_t ne10;
|
@@ -115,7 +115,7 @@ typedef struct {
|
|
115
115
|
uint64_t nb3;
|
116
116
|
uint64_t offs;
|
117
117
|
bool inplace;
|
118
|
-
}
|
118
|
+
} ggml_metal_kargs_set;
|
119
119
|
|
120
120
|
typedef struct {
|
121
121
|
int32_t ne00;
|
@@ -143,7 +143,7 @@ typedef struct {
|
|
143
143
|
float attn_factor;
|
144
144
|
float beta_fast;
|
145
145
|
float beta_slow;
|
146
|
-
}
|
146
|
+
} ggml_metal_kargs_rope;
|
147
147
|
|
148
148
|
typedef struct {
|
149
149
|
int32_t ne01;
|
@@ -167,7 +167,7 @@ typedef struct {
|
|
167
167
|
float m1;
|
168
168
|
uint16_t n_head_log2;
|
169
169
|
float logit_softcap;
|
170
|
-
}
|
170
|
+
} ggml_metal_kargs_flash_attn_ext;
|
171
171
|
|
172
172
|
typedef struct {
|
173
173
|
int32_t ne00;
|
@@ -184,7 +184,7 @@ typedef struct {
|
|
184
184
|
int32_t ne1;
|
185
185
|
int16_t r2;
|
186
186
|
int16_t r3;
|
187
|
-
}
|
187
|
+
} ggml_metal_kargs_mul_mm;
|
188
188
|
|
189
189
|
typedef struct {
|
190
190
|
int32_t ne00;
|
@@ -205,7 +205,7 @@ typedef struct {
|
|
205
205
|
int32_t ne1;
|
206
206
|
int16_t r2;
|
207
207
|
int16_t r3;
|
208
|
-
}
|
208
|
+
} ggml_metal_kargs_mul_mv;
|
209
209
|
|
210
210
|
typedef struct {
|
211
211
|
int32_t ne00;
|
@@ -229,7 +229,7 @@ typedef struct {
|
|
229
229
|
int16_t nsg;
|
230
230
|
int16_t nxpsg;
|
231
231
|
int16_t r1ptg;
|
232
|
-
}
|
232
|
+
} ggml_metal_kargs_mul_mv_ext;
|
233
233
|
|
234
234
|
typedef struct {
|
235
235
|
int32_t nei0;
|
@@ -247,7 +247,7 @@ typedef struct {
|
|
247
247
|
uint64_t nb12;
|
248
248
|
int32_t ne0;
|
249
249
|
int32_t ne1;
|
250
|
-
}
|
250
|
+
} ggml_metal_kargs_mul_mm_id;
|
251
251
|
|
252
252
|
typedef struct {
|
253
253
|
int32_t nei0;
|
@@ -269,20 +269,20 @@ typedef struct {
|
|
269
269
|
int32_t ne0;
|
270
270
|
int32_t ne1;
|
271
271
|
uint64_t nb1;
|
272
|
-
}
|
272
|
+
} ggml_metal_kargs_mul_mv_id;
|
273
273
|
|
274
274
|
typedef struct {
|
275
275
|
int32_t ne00;
|
276
276
|
int32_t ne00_4;
|
277
277
|
uint64_t nb01;
|
278
278
|
float eps;
|
279
|
-
}
|
279
|
+
} ggml_metal_kargs_norm;
|
280
280
|
|
281
281
|
typedef struct {
|
282
282
|
int32_t ne00;
|
283
283
|
int32_t ne00_4;
|
284
284
|
uint64_t nb01;
|
285
285
|
float eps;
|
286
|
-
}
|
286
|
+
} ggml_metal_kargs_rms_norm;
|
287
287
|
|
288
|
-
#endif //
|
288
|
+
#endif // GGML_METAL_IMPL
|