cui-llama.rn 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/llama.h CHANGED
@@ -103,6 +103,7 @@ extern "C" {
103
103
  LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
104
104
  LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
105
105
  LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
106
+ LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
106
107
  };
107
108
 
108
109
  enum llama_rope_type {
@@ -193,6 +194,7 @@ extern "C" {
193
194
  LLAMA_POOLING_TYPE_MEAN = 1,
194
195
  LLAMA_POOLING_TYPE_CLS = 2,
195
196
  LLAMA_POOLING_TYPE_LAST = 3,
197
+ LLAMA_POOLING_TYPE_RANK = 4, // used by reranking models to attach the classification head to the graph
196
198
  };
197
199
 
198
200
  enum llama_attention_type {
@@ -202,9 +204,9 @@ extern "C" {
202
204
  };
203
205
 
204
206
  enum llama_split_mode {
205
- LLAMA_SPLIT_MODE_NONE = 0, // single GPU
206
- LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
207
- LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
207
+ LLAMA_SPLIT_MODE_NONE = 0, // single GPU
208
+ LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
209
+ LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
208
210
  };
209
211
 
210
212
  // TODO: simplify (https://github.com/ggerganov/llama.cpp/pull/9294#pullrequestreview-2286561979)
@@ -872,7 +874,8 @@ extern "C" {
872
874
 
873
875
  // Get the embeddings for a sequence id
874
876
  // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
875
- // shape: [n_embd] (1-dimensional)
877
+ // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
878
+ // otherwise: float[n_embd] (1-dimensional)
876
879
  LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
877
880
 
878
881
  //
@@ -911,6 +914,8 @@ extern "C" {
911
914
  //
912
915
  // Tokenization
913
916
  //
917
+ // The API is thread-safe.
918
+ //
914
919
 
915
920
  /// @details Convert the provided text into tokens.
916
921
  /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
@@ -7,7 +7,7 @@
7
7
  #include <unordered_map>
8
8
  #include <unordered_set>
9
9
 
10
- const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
10
+ const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
11
11
  {0x000000, 0x0080},
12
12
  {0x000020, 0x0008},
13
13
  {0x000021, 0x0020},
@@ -2311,7 +2311,8 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
2311
2311
  0x003000,
2312
2312
  };
2313
2313
 
2314
- const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
2314
+ // list is always in ascending order, to enable binary search
2315
+ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
2315
2316
  {0x000041, 0x000061},
2316
2317
  {0x000042, 0x000062},
2317
2318
  {0x000043, 0x000063},
@@ -3747,7 +3748,8 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
3747
3748
  {0x01E921, 0x01E943},
3748
3749
  };
3749
3750
 
3750
- const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
3751
+ // list is always in ascending order, to enable binary search
3752
+ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
3751
3753
  {0x000061, 0x000041},
3752
3754
  {0x000062, 0x000042},
3753
3755
  {0x000063, 0x000043},
@@ -5200,7 +5202,7 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
5200
5202
  {0x01E943, 0x01E921},
5201
5203
  };
5202
5204
 
5203
- const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
5205
+ const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, nfd
5204
5206
  {0x000000, 0x000000, 0x000000},
5205
5207
  {0x0000C0, 0x0000C5, 0x000041},
5206
5208
  {0x0000C7, 0x0000C7, 0x000043},
@@ -13,8 +13,8 @@ struct range_nfd {
13
13
 
14
14
  static const uint32_t MAX_CODEPOINTS = 0x110000;
15
15
 
16
- extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
16
+ extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
17
17
  extern const std::unordered_set<uint32_t> unicode_set_whitespace;
18
- extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
19
- extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
20
- extern const std::vector<range_nfd> unicode_ranges_nfd;
18
+ extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
19
+ extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
20
+ extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
package/cpp/unicode.cpp CHANGED
@@ -123,11 +123,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
123
123
  static std::vector<codepoint_flags> unicode_cpt_flags_array() {
124
124
  std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
125
125
 
126
- assert (unicode_ranges_flags.front().first == 0);
127
- assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS);
126
+ assert (unicode_ranges_flags.begin()[0].first == 0);
127
+ assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
128
128
  for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
129
- const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
130
- const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
129
+ const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
130
+ const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
131
131
  for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
132
132
  cpt_flags[cpt] = range_ini.second;
133
133
  }
@@ -597,7 +597,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
597
597
  std::vector<uint32_t> result(cpts.size());
598
598
  for (size_t i = 0; i < cpts.size(); ++i) {
599
599
  const uint32_t cpt = cpts[i];
600
- auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
600
+ auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
601
601
  result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
602
602
  }
603
603
  return result;
@@ -639,8 +639,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
639
639
  }
640
640
 
641
641
  uint32_t unicode_tolower(uint32_t cp) {
642
- auto it = unicode_map_lowercase.find(cp);
643
- return it == unicode_map_lowercase.end() ? cp : it->second;
642
+ // binary search
643
+ auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
644
+ [](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
645
+ return pair.first < value;
646
+ });
647
+ if (it != unicode_map_lowercase.end() && it->first == cp) {
648
+ return it->second;
649
+ }
650
+ return cp; // Return the original code point if no lowercase mapping is found
644
651
  }
645
652
 
646
653
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.2.1",
3
+ "version": "1.2.3",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",