llama_cpp 0.16.1 → 0.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +10 -2
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
- data/vendor/tmp/llama.cpp/ggml.c +67 -150
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +530 -237
- data/vendor/tmp/llama.cpp/llama.h +5 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +2 -2
@@ -226,8 +226,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
226
226
|
assert(offset_end <= cpts.size());
|
227
227
|
start = offset_end;
|
228
228
|
|
229
|
-
|
230
|
-
|
229
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
230
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
231
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
231
232
|
};
|
232
233
|
|
233
234
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
@@ -253,18 +254,18 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
253
254
|
};
|
254
255
|
|
255
256
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
256
|
-
const
|
257
|
+
const uint32_t cpt = _get_cpt(pos);
|
257
258
|
const auto flags = _get_flags(pos);
|
258
259
|
|
259
260
|
// regex: 's|'t|'re|'ve|'m|'ll|'d
|
260
261
|
if (cpt == '\'' && pos+1 < offset_end) {
|
261
|
-
|
262
|
+
uint32_t cpt_next = _get_cpt(pos+1);
|
262
263
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
263
264
|
pos += _add_token(pos+2);
|
264
265
|
continue;
|
265
266
|
}
|
266
267
|
if (pos+2 < offset_end) {
|
267
|
-
|
268
|
+
uint32_t cpt_next_next = _get_cpt(pos+2);
|
268
269
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
269
270
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
270
271
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
@@ -309,7 +310,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
309
310
|
}
|
310
311
|
|
311
312
|
// regex: \s+(?!\S)
|
312
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
313
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
313
314
|
pos += num_whitespaces - 1;
|
314
315
|
_add_token(pos);
|
315
316
|
continue;
|
@@ -344,8 +345,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
344
345
|
assert(offset_end <= cpts.size());
|
345
346
|
start = offset_end;
|
346
347
|
|
347
|
-
|
348
|
-
|
348
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
349
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
350
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
349
351
|
};
|
350
352
|
|
351
353
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
@@ -371,18 +373,18 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
371
373
|
};
|
372
374
|
|
373
375
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
374
|
-
const
|
376
|
+
const uint32_t cpt = _get_cpt(pos);
|
375
377
|
const auto flags = _get_flags(pos);
|
376
378
|
|
377
379
|
// regex: (?i:'s|'t|'re|'ve|'m|'ll|'d) // case insensitive
|
378
380
|
if (cpt == '\'' && pos+1 < offset_end) {
|
379
|
-
|
381
|
+
uint32_t cpt_next = unicode_tolower(_get_cpt(pos+1));
|
380
382
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
381
383
|
pos += _add_token(pos+2);
|
382
384
|
continue;
|
383
385
|
}
|
384
386
|
if (pos+2 < offset_end) {
|
385
|
-
|
387
|
+
uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos+2));
|
386
388
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
387
389
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
388
390
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
@@ -424,7 +426,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
424
426
|
while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
425
427
|
flags2 = _get_flags(++pos);
|
426
428
|
}
|
427
|
-
|
429
|
+
uint32_t cpt2 = _get_cpt(pos);
|
428
430
|
while (cpt2 == '\r' || cpt2 == '\n') {
|
429
431
|
cpt2 = _get_cpt(++pos);
|
430
432
|
}
|
@@ -435,7 +437,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
435
437
|
size_t num_whitespaces = 0;
|
436
438
|
size_t last_end_r_or_n = 0;
|
437
439
|
while (_get_flags(pos+num_whitespaces).is_whitespace) {
|
438
|
-
|
440
|
+
uint32_t cpt2 = _get_cpt(pos+num_whitespaces);
|
439
441
|
if (cpt2 == '\r' || cpt2 == '\n') {
|
440
442
|
last_end_r_or_n = pos + num_whitespaces + 1;
|
441
443
|
}
|
@@ -450,7 +452,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
450
452
|
}
|
451
453
|
|
452
454
|
// regex: \s+(?!\S)
|
453
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
455
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
454
456
|
pos += num_whitespaces - 1;
|
455
457
|
_add_token(pos);
|
456
458
|
continue;
|
@@ -594,6 +596,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|
594
596
|
|
595
597
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
596
598
|
std::vector<uint32_t> result;
|
599
|
+
result.reserve(utf8.size());
|
597
600
|
size_t offset = 0;
|
598
601
|
while (offset < utf8.size()) {
|
599
602
|
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
@@ -626,7 +629,7 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|
626
629
|
return map.at(utf8);
|
627
630
|
}
|
628
631
|
|
629
|
-
|
632
|
+
uint32_t unicode_tolower(uint32_t cp) {
|
630
633
|
auto it = unicode_map_lowercase.find(cp);
|
631
634
|
return it == unicode_map_lowercase.end() ? cp : it->second;
|
632
635
|
}
|
@@ -679,10 +682,14 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
679
682
|
continue;
|
680
683
|
}
|
681
684
|
|
682
|
-
const
|
685
|
+
const auto flags = unicode_cpt_flags(cpts[i]);
|
683
686
|
|
684
|
-
if (
|
685
|
-
|
687
|
+
if (flags.is_whitespace) {
|
688
|
+
//NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
|
689
|
+
//text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
|
690
|
+
text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
|
691
|
+
} else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
|
692
|
+
text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
|
686
693
|
} else {
|
687
694
|
text_collapsed[i] = (char) 0xD0; // fallback
|
688
695
|
}
|
@@ -766,9 +773,16 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
766
773
|
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
767
774
|
} else {
|
768
775
|
// no unicode category used, we can use std::wregex directly
|
769
|
-
const std::wstring wtext = unicode_wstring_from_utf8(text);
|
770
776
|
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
771
777
|
|
778
|
+
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
|
779
|
+
std::wstring wtext(cpts.begin(), cpts.end());
|
780
|
+
for (size_t i = 0; i < wtext.size(); ++i) {
|
781
|
+
if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
|
782
|
+
wtext[i] = 0x0B;
|
783
|
+
}
|
784
|
+
}
|
785
|
+
|
772
786
|
//printf("text: %s\n", text.c_str());
|
773
787
|
//printf("regex_expr: %s\n", regex_expr.c_str());
|
774
788
|
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
|
@@ -58,6 +58,6 @@ codepoint_flags unicode_cpt_flags(const std::string & utf8);
|
|
58
58
|
std::string unicode_byte_to_utf8(uint8_t byte);
|
59
59
|
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
60
60
|
|
61
|
-
|
61
|
+
uint32_t unicode_tolower(uint32_t cp);
|
62
62
|
|
63
63
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.16.
|
4
|
+
version: 0.16.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|