cui-llama.rn 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +2 -2
- package/android/src/main/java/com/rnllama/LlamaContext.java +32 -7
- package/cpp/common.cpp +36 -1
- package/cpp/common.h +5 -1
- package/cpp/ggml-aarch64.c +2 -11
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +151 -78
- package/cpp/{ggml-backend.c → ggml-backend.cpp} +565 -269
- package/cpp/ggml-backend.h +147 -62
- package/cpp/ggml-impl.h +15 -0
- package/cpp/ggml-metal.h +8 -9
- package/cpp/ggml-metal.m +2428 -2111
- package/cpp/ggml-quants.c +2 -2
- package/cpp/ggml-quants.h +0 -4
- package/cpp/ggml.c +799 -1121
- package/cpp/ggml.h +79 -72
- package/cpp/llama-vocab.cpp +189 -106
- package/cpp/llama-vocab.h +18 -9
- package/cpp/llama.cpp +736 -341
- package/cpp/llama.h +9 -4
- package/cpp/unicode-data.cpp +6 -4
- package/cpp/unicode-data.h +4 -4
- package/cpp/unicode.cpp +14 -7
- package/package.json +1 -1
package/cpp/llama.h
CHANGED
@@ -103,6 +103,7 @@ extern "C" {
|
|
103
103
|
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
104
104
|
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
105
105
|
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
106
|
+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
106
107
|
};
|
107
108
|
|
108
109
|
enum llama_rope_type {
|
@@ -193,6 +194,7 @@ extern "C" {
|
|
193
194
|
LLAMA_POOLING_TYPE_MEAN = 1,
|
194
195
|
LLAMA_POOLING_TYPE_CLS = 2,
|
195
196
|
LLAMA_POOLING_TYPE_LAST = 3,
|
197
|
+
LLAMA_POOLING_TYPE_RANK = 4, // used by reranking models to attach the classification head to the graph
|
196
198
|
};
|
197
199
|
|
198
200
|
enum llama_attention_type {
|
@@ -202,9 +204,9 @@ extern "C" {
|
|
202
204
|
};
|
203
205
|
|
204
206
|
enum llama_split_mode {
|
205
|
-
LLAMA_SPLIT_MODE_NONE
|
206
|
-
LLAMA_SPLIT_MODE_LAYER
|
207
|
-
LLAMA_SPLIT_MODE_ROW
|
207
|
+
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
208
|
+
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
|
209
|
+
LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
|
208
210
|
};
|
209
211
|
|
210
212
|
// TODO: simplify (https://github.com/ggerganov/llama.cpp/pull/9294#pullrequestreview-2286561979)
|
@@ -872,7 +874,8 @@ extern "C" {
|
|
872
874
|
|
873
875
|
// Get the embeddings for a sequence id
|
874
876
|
// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
|
875
|
-
//
|
877
|
+
// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
|
878
|
+
// otherwise: float[n_embd] (1-dimensional)
|
876
879
|
LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
|
877
880
|
|
878
881
|
//
|
@@ -911,6 +914,8 @@ extern "C" {
|
|
911
914
|
//
|
912
915
|
// Tokenization
|
913
916
|
//
|
917
|
+
// The API is thread-safe.
|
918
|
+
//
|
914
919
|
|
915
920
|
/// @details Convert the provided text into tokens.
|
916
921
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
package/cpp/unicode-data.cpp
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
#include <unordered_map>
|
8
8
|
#include <unordered_set>
|
9
9
|
|
10
|
-
const std::
|
10
|
+
const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
|
11
11
|
{0x000000, 0x0080},
|
12
12
|
{0x000020, 0x0008},
|
13
13
|
{0x000021, 0x0020},
|
@@ -2311,7 +2311,8 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
|
|
2311
2311
|
0x003000,
|
2312
2312
|
};
|
2313
2313
|
|
2314
|
-
|
2314
|
+
// list is always in ascending order, to enable binary search
|
2315
|
+
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
|
2315
2316
|
{0x000041, 0x000061},
|
2316
2317
|
{0x000042, 0x000062},
|
2317
2318
|
{0x000043, 0x000063},
|
@@ -3747,7 +3748,8 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
|
|
3747
3748
|
{0x01E921, 0x01E943},
|
3748
3749
|
};
|
3749
3750
|
|
3750
|
-
|
3751
|
+
// list is always in ascending order, to enable binary search
|
3752
|
+
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
|
3751
3753
|
{0x000061, 0x000041},
|
3752
3754
|
{0x000062, 0x000042},
|
3753
3755
|
{0x000063, 0x000043},
|
@@ -5200,7 +5202,7 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
|
|
5200
5202
|
{0x01E943, 0x01E921},
|
5201
5203
|
};
|
5202
5204
|
|
5203
|
-
const std::
|
5205
|
+
const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, nfd
|
5204
5206
|
{0x000000, 0x000000, 0x000000},
|
5205
5207
|
{0x0000C0, 0x0000C5, 0x000041},
|
5206
5208
|
{0x0000C7, 0x0000C7, 0x000043},
|
package/cpp/unicode-data.h
CHANGED
@@ -13,8 +13,8 @@ struct range_nfd {
|
|
13
13
|
|
14
14
|
static const uint32_t MAX_CODEPOINTS = 0x110000;
|
15
15
|
|
16
|
-
extern const std::
|
16
|
+
extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
|
17
17
|
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
|
18
|
-
extern const std::
|
19
|
-
extern const std::
|
20
|
-
extern const std::
|
18
|
+
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
|
19
|
+
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
|
20
|
+
extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
|
package/cpp/unicode.cpp
CHANGED
@@ -123,11 +123,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
|
123
123
|
static std::vector<codepoint_flags> unicode_cpt_flags_array() {
|
124
124
|
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
|
125
125
|
|
126
|
-
assert (unicode_ranges_flags.
|
127
|
-
assert (unicode_ranges_flags.
|
126
|
+
assert (unicode_ranges_flags.begin()[0].first == 0);
|
127
|
+
assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
|
128
128
|
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
|
129
|
-
const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
|
130
|
-
const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
|
129
|
+
const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
|
130
|
+
const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
|
131
131
|
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
|
132
132
|
cpt_flags[cpt] = range_ini.second;
|
133
133
|
}
|
@@ -597,7 +597,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|
597
597
|
std::vector<uint32_t> result(cpts.size());
|
598
598
|
for (size_t i = 0; i < cpts.size(); ++i) {
|
599
599
|
const uint32_t cpt = cpts[i];
|
600
|
-
auto it = std::upper_bound(unicode_ranges_nfd.
|
600
|
+
auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
|
601
601
|
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
|
602
602
|
}
|
603
603
|
return result;
|
@@ -639,8 +639,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|
639
639
|
}
|
640
640
|
|
641
641
|
uint32_t unicode_tolower(uint32_t cp) {
|
642
|
-
|
643
|
-
|
642
|
+
// binary search
|
643
|
+
auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
|
644
|
+
[](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
|
645
|
+
return pair.first < value;
|
646
|
+
});
|
647
|
+
if (it != unicode_map_lowercase.end() && it->first == cp) {
|
648
|
+
return it->second;
|
649
|
+
}
|
650
|
+
return cp; // Return the original code point if no lowercase mapping is found
|
644
651
|
}
|
645
652
|
|
646
653
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|