llama_cpp 0.16.1 → 0.16.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +10 -2
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +10 -10
- data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +28 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +6 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +8 -3
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2124 -13202
- data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +27564 -23876
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +278 -366
- data/vendor/tmp/llama.cpp/ggml.c +67 -150
- data/vendor/tmp/llama.cpp/ggml.h +6 -0
- data/vendor/tmp/llama.cpp/llama.cpp +530 -237
- data/vendor/tmp/llama.cpp/llama.h +5 -1
- data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
- data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
- data/vendor/tmp/llama.cpp/unicode.h +1 -1
- metadata +2 -2
@@ -226,8 +226,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
226
226
|
assert(offset_end <= cpts.size());
|
227
227
|
start = offset_end;
|
228
228
|
|
229
|
-
|
230
|
-
|
229
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
230
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
231
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
231
232
|
};
|
232
233
|
|
233
234
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
@@ -253,18 +254,18 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
253
254
|
};
|
254
255
|
|
255
256
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
256
|
-
const
|
257
|
+
const uint32_t cpt = _get_cpt(pos);
|
257
258
|
const auto flags = _get_flags(pos);
|
258
259
|
|
259
260
|
// regex: 's|'t|'re|'ve|'m|'ll|'d
|
260
261
|
if (cpt == '\'' && pos+1 < offset_end) {
|
261
|
-
|
262
|
+
uint32_t cpt_next = _get_cpt(pos+1);
|
262
263
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
263
264
|
pos += _add_token(pos+2);
|
264
265
|
continue;
|
265
266
|
}
|
266
267
|
if (pos+2 < offset_end) {
|
267
|
-
|
268
|
+
uint32_t cpt_next_next = _get_cpt(pos+2);
|
268
269
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
269
270
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
270
271
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
@@ -309,7 +310,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
309
310
|
}
|
310
311
|
|
311
312
|
// regex: \s+(?!\S)
|
312
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
313
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
313
314
|
pos += num_whitespaces - 1;
|
314
315
|
_add_token(pos);
|
315
316
|
continue;
|
@@ -344,8 +345,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
344
345
|
assert(offset_end <= cpts.size());
|
345
346
|
start = offset_end;
|
346
347
|
|
347
|
-
|
348
|
-
|
348
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
349
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
350
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
349
351
|
};
|
350
352
|
|
351
353
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
@@ -371,18 +373,18 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
371
373
|
};
|
372
374
|
|
373
375
|
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
374
|
-
const
|
376
|
+
const uint32_t cpt = _get_cpt(pos);
|
375
377
|
const auto flags = _get_flags(pos);
|
376
378
|
|
377
379
|
// regex: (?i:'s|'t|'re|'ve|'m|'ll|'d) // case insensitive
|
378
380
|
if (cpt == '\'' && pos+1 < offset_end) {
|
379
|
-
|
381
|
+
uint32_t cpt_next = unicode_tolower(_get_cpt(pos+1));
|
380
382
|
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
381
383
|
pos += _add_token(pos+2);
|
382
384
|
continue;
|
383
385
|
}
|
384
386
|
if (pos+2 < offset_end) {
|
385
|
-
|
387
|
+
uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos+2));
|
386
388
|
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
387
389
|
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
388
390
|
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
@@ -424,7 +426,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
424
426
|
while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
|
425
427
|
flags2 = _get_flags(++pos);
|
426
428
|
}
|
427
|
-
|
429
|
+
uint32_t cpt2 = _get_cpt(pos);
|
428
430
|
while (cpt2 == '\r' || cpt2 == '\n') {
|
429
431
|
cpt2 = _get_cpt(++pos);
|
430
432
|
}
|
@@ -435,7 +437,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
435
437
|
size_t num_whitespaces = 0;
|
436
438
|
size_t last_end_r_or_n = 0;
|
437
439
|
while (_get_flags(pos+num_whitespaces).is_whitespace) {
|
438
|
-
|
440
|
+
uint32_t cpt2 = _get_cpt(pos+num_whitespaces);
|
439
441
|
if (cpt2 == '\r' || cpt2 == '\n') {
|
440
442
|
last_end_r_or_n = pos + num_whitespaces + 1;
|
441
443
|
}
|
@@ -450,7 +452,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
450
452
|
}
|
451
453
|
|
452
454
|
// regex: \s+(?!\S)
|
453
|
-
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) !=
|
455
|
+
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
454
456
|
pos += num_whitespaces - 1;
|
455
457
|
_add_token(pos);
|
456
458
|
continue;
|
@@ -594,6 +596,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|
594
596
|
|
595
597
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
596
598
|
std::vector<uint32_t> result;
|
599
|
+
result.reserve(utf8.size());
|
597
600
|
size_t offset = 0;
|
598
601
|
while (offset < utf8.size()) {
|
599
602
|
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
@@ -626,7 +629,7 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
|
626
629
|
return map.at(utf8);
|
627
630
|
}
|
628
631
|
|
629
|
-
|
632
|
+
uint32_t unicode_tolower(uint32_t cp) {
|
630
633
|
auto it = unicode_map_lowercase.find(cp);
|
631
634
|
return it == unicode_map_lowercase.end() ? cp : it->second;
|
632
635
|
}
|
@@ -679,10 +682,14 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
679
682
|
continue;
|
680
683
|
}
|
681
684
|
|
682
|
-
const
|
685
|
+
const auto flags = unicode_cpt_flags(cpts[i]);
|
683
686
|
|
684
|
-
if (
|
685
|
-
|
687
|
+
if (flags.is_whitespace) {
|
688
|
+
//NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
|
689
|
+
//text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
|
690
|
+
text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
|
691
|
+
} else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
|
692
|
+
text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
|
686
693
|
} else {
|
687
694
|
text_collapsed[i] = (char) 0xD0; // fallback
|
688
695
|
}
|
@@ -766,9 +773,16 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
766
773
|
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
767
774
|
} else {
|
768
775
|
// no unicode category used, we can use std::wregex directly
|
769
|
-
const std::wstring wtext = unicode_wstring_from_utf8(text);
|
770
776
|
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
771
777
|
|
778
|
+
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
|
779
|
+
std::wstring wtext(cpts.begin(), cpts.end());
|
780
|
+
for (size_t i = 0; i < wtext.size(); ++i) {
|
781
|
+
if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
|
782
|
+
wtext[i] = 0x0B;
|
783
|
+
}
|
784
|
+
}
|
785
|
+
|
772
786
|
//printf("text: %s\n", text.c_str());
|
773
787
|
//printf("regex_expr: %s\n", regex_expr.c_str());
|
774
788
|
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
|
@@ -58,6 +58,6 @@ codepoint_flags unicode_cpt_flags(const std::string & utf8);
|
|
58
58
|
std::string unicode_byte_to_utf8(uint8_t byte);
|
59
59
|
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
60
60
|
|
61
|
-
|
61
|
+
uint32_t unicode_tolower(uint32_t cp);
|
62
62
|
|
63
63
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.16.
|
4
|
+
version: 0.16.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|