cui-llama.rn 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -23
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +13 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +27 -20
- package/android/src/main/java/com/rnllama/RNLlama.java +5 -1
- package/android/src/main/jni.cpp +15 -12
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/cpp/README.md +1 -1
- package/cpp/common.cpp +158 -267
- package/cpp/common.h +46 -12
- package/cpp/ggml-alloc.c +1042 -1037
- package/cpp/ggml-backend-impl.h +255 -256
- package/cpp/ggml-backend-reg.cpp +582 -582
- package/cpp/ggml-backend.cpp +2002 -2002
- package/cpp/ggml-backend.h +354 -352
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +386 -386
- package/cpp/ggml-cpu-quants.c +10920 -10839
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +329 -60
- package/cpp/ggml-cpu.cpp +10 -2
- package/cpp/ggml-cpu.h +135 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +17 -17
- package/cpp/ggml-metal.m +4884 -4884
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6514 -6448
- package/cpp/ggml.h +2194 -2163
- package/cpp/gguf.cpp +1329 -1325
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1045 -1045
- package/cpp/json-schema-to-grammar.h +8 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -346
- package/cpp/llama-adapter.h +74 -73
- package/cpp/llama-arch.cpp +1487 -1434
- package/cpp/llama-arch.h +400 -395
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +578 -567
- package/cpp/llama-chat.h +52 -51
- package/cpp/llama-context.cpp +1775 -1771
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1139 -1139
- package/cpp/llama-grammar.h +143 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -140
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +218 -218
- package/cpp/llama-mmap.cpp +2 -1
- package/cpp/llama-mmap.h +67 -67
- package/cpp/llama-model-loader.cpp +1124 -1011
- package/cpp/llama-model-loader.h +167 -158
- package/cpp/llama-model.cpp +3997 -2202
- package/cpp/llama-model.h +370 -391
- package/cpp/llama-sampling.cpp +2408 -2406
- package/cpp/llama-sampling.h +32 -48
- package/cpp/llama-vocab.cpp +3247 -1982
- package/cpp/llama-vocab.h +125 -182
- package/cpp/llama.cpp +416 -2886
- package/cpp/llama.h +1323 -1285
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.cpp +822 -0
- package/cpp/rn-llama.h +123 -0
- package/cpp/rn-llama.hpp +18 -12
- package/cpp/sampling.cpp +505 -500
- package/cpp/sgemm.cpp +2597 -2597
- package/cpp/speculative.cpp +277 -274
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +2 -3
- package/ios/CMakeLists.txt +99 -0
- package/ios/RNLlama.h +5 -1
- package/ios/RNLlama.mm +2 -2
- package/ios/RNLlamaContext.h +8 -1
- package/ios/RNLlamaContext.mm +15 -11
- package/ios/rnllama.xcframework/Info.plist +74 -0
- package/jest/mock.js +3 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +4 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +4 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +5 -1
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +8 -2
- package/package.json +5 -2
- package/src/NativeRNLlama.ts +5 -1
- package/src/index.ts +9 -2
package/cpp/llama-batch.h
CHANGED
@@ -1,88 +1,88 @@
|
|
1
|
-
#pragma once
|
2
|
-
|
3
|
-
#include "llama.h"
|
4
|
-
|
5
|
-
#include <array>
|
6
|
-
#include <vector>
|
7
|
-
|
8
|
-
// very similar to llama_batch,
|
9
|
-
// but has more metadata about sequences
|
10
|
-
struct llama_ubatch {
|
11
|
-
bool equal_seqs;
|
12
|
-
// TODO: whole_seqs for embeddings?
|
13
|
-
|
14
|
-
uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
|
15
|
-
uint32_t n_seq_tokens; // tokens per sequence
|
16
|
-
uint32_t n_seqs;
|
17
|
-
|
18
|
-
llama_token * token; // [n_tokens]
|
19
|
-
float * embd; // [n_embd, n_tokens]
|
20
|
-
llama_pos * pos; // [n_tokens]
|
21
|
-
int32_t * n_seq_id; // [n_seqs]
|
22
|
-
llama_seq_id ** seq_id; // [n_seqs]
|
23
|
-
int8_t * output; // [n_tokens]
|
24
|
-
};
|
25
|
-
|
26
|
-
struct llama_sbatch_seq {
|
27
|
-
int32_t n_seq_id;
|
28
|
-
|
29
|
-
llama_seq_id * seq_id;
|
30
|
-
|
31
|
-
size_t offset;
|
32
|
-
size_t length;
|
33
|
-
};
|
34
|
-
|
35
|
-
// sequence-length-aware batch splitting
|
36
|
-
struct llama_sbatch {
|
37
|
-
// tokens left in this batch
|
38
|
-
size_t n_tokens;
|
39
|
-
|
40
|
-
size_t n_embd;
|
41
|
-
|
42
|
-
bool logits_all; // TODO: remove once lctx.logits_all is removed too
|
43
|
-
|
44
|
-
// sorted indices into the batch
|
45
|
-
std::vector<size_t> ids;
|
46
|
-
// batch indices of the output
|
47
|
-
std::vector<size_t> out_ids;
|
48
|
-
std::vector<llama_sbatch_seq> seq;
|
49
|
-
|
50
|
-
const llama_batch * batch = nullptr;
|
51
|
-
|
52
|
-
// buffers for the ubatch
|
53
|
-
std::vector<llama_token> ubatch_token;
|
54
|
-
std::vector<float> ubatch_embd;
|
55
|
-
std::vector<llama_pos> ubatch_pos;
|
56
|
-
std::vector<int32_t> ubatch_n_seq_id;
|
57
|
-
std::vector<llama_seq_id *> ubatch_seq_id;
|
58
|
-
std::vector<int8_t> ubatch_output;
|
59
|
-
|
60
|
-
llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
|
61
|
-
|
62
|
-
void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
|
63
|
-
|
64
|
-
// simple split, unknown number of sequences of unequal lengths
|
65
|
-
llama_ubatch split_simple(size_t n_ubatch);
|
66
|
-
|
67
|
-
// make batches of equal-length sequences
|
68
|
-
llama_ubatch split_equal(size_t n_ubatch);
|
69
|
-
|
70
|
-
// sequence-wise split
|
71
|
-
llama_ubatch split_seq(size_t n_ubatch);
|
72
|
-
|
73
|
-
void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
|
74
|
-
};
|
75
|
-
|
76
|
-
// temporary allocate memory for the input batch if needed
|
77
|
-
struct llama_batch_allocr {
|
78
|
-
struct llama_batch batch;
|
79
|
-
|
80
|
-
std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
|
81
|
-
std::vector<llama_pos> pos;
|
82
|
-
std::vector<int32_t> n_seq_id;
|
83
|
-
std::vector<llama_seq_id *> seq_id;
|
84
|
-
std::vector<int8_t> logits;
|
85
|
-
|
86
|
-
// optionally fulfill the batch returned by llama_batch_get_one
|
87
|
-
llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
|
88
|
-
};
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "llama.h"
|
4
|
+
|
5
|
+
#include <array>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
// very similar to llama_batch,
|
9
|
+
// but has more metadata about sequences
|
10
|
+
struct llama_ubatch {
|
11
|
+
bool equal_seqs;
|
12
|
+
// TODO: whole_seqs for embeddings?
|
13
|
+
|
14
|
+
uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
|
15
|
+
uint32_t n_seq_tokens; // tokens per sequence
|
16
|
+
uint32_t n_seqs;
|
17
|
+
|
18
|
+
llama_token * token; // [n_tokens]
|
19
|
+
float * embd; // [n_embd, n_tokens]
|
20
|
+
llama_pos * pos; // [n_tokens]
|
21
|
+
int32_t * n_seq_id; // [n_seqs]
|
22
|
+
llama_seq_id ** seq_id; // [n_seqs]
|
23
|
+
int8_t * output; // [n_tokens]
|
24
|
+
};
|
25
|
+
|
26
|
+
struct llama_sbatch_seq {
|
27
|
+
int32_t n_seq_id;
|
28
|
+
|
29
|
+
llama_seq_id * seq_id;
|
30
|
+
|
31
|
+
size_t offset;
|
32
|
+
size_t length;
|
33
|
+
};
|
34
|
+
|
35
|
+
// sequence-length-aware batch splitting
|
36
|
+
struct llama_sbatch {
|
37
|
+
// tokens left in this batch
|
38
|
+
size_t n_tokens;
|
39
|
+
|
40
|
+
size_t n_embd;
|
41
|
+
|
42
|
+
bool logits_all; // TODO: remove once lctx.logits_all is removed too
|
43
|
+
|
44
|
+
// sorted indices into the batch
|
45
|
+
std::vector<size_t> ids;
|
46
|
+
// batch indices of the output
|
47
|
+
std::vector<size_t> out_ids;
|
48
|
+
std::vector<llama_sbatch_seq> seq;
|
49
|
+
|
50
|
+
const llama_batch * batch = nullptr;
|
51
|
+
|
52
|
+
// buffers for the ubatch
|
53
|
+
std::vector<llama_token> ubatch_token;
|
54
|
+
std::vector<float> ubatch_embd;
|
55
|
+
std::vector<llama_pos> ubatch_pos;
|
56
|
+
std::vector<int32_t> ubatch_n_seq_id;
|
57
|
+
std::vector<llama_seq_id *> ubatch_seq_id;
|
58
|
+
std::vector<int8_t> ubatch_output;
|
59
|
+
|
60
|
+
llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
|
61
|
+
|
62
|
+
void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
|
63
|
+
|
64
|
+
// simple split, unknown number of sequences of unequal lengths
|
65
|
+
llama_ubatch split_simple(size_t n_ubatch);
|
66
|
+
|
67
|
+
// make batches of equal-length sequences
|
68
|
+
llama_ubatch split_equal(size_t n_ubatch);
|
69
|
+
|
70
|
+
// sequence-wise split
|
71
|
+
llama_ubatch split_seq(size_t n_ubatch);
|
72
|
+
|
73
|
+
void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
|
74
|
+
};
|
75
|
+
|
76
|
+
// temporary allocate memory for the input batch if needed
|
77
|
+
struct llama_batch_allocr {
|
78
|
+
struct llama_batch batch;
|
79
|
+
|
80
|
+
std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
|
81
|
+
std::vector<llama_pos> pos;
|
82
|
+
std::vector<int32_t> n_seq_id;
|
83
|
+
std::vector<llama_seq_id *> seq_id;
|
84
|
+
std::vector<int8_t> logits;
|
85
|
+
|
86
|
+
// optionally fulfill the batch returned by llama_batch_get_one
|
87
|
+
llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
|
88
|
+
};
|