llama_cpp 0.16.0 → 0.16.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/ext/llama_cpp/extconf.rb +3 -0
  4. data/ext/llama_cpp/llama_cpp.cpp +14 -0
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +4 -0
  7. data/vendor/tmp/llama.cpp/Makefile +119 -54
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
  17. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
  18. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
  19. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
  20. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
  21. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
  23. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
  24. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
  25. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
  26. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
  27. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
  28. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
  29. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
  30. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
  31. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
  32. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
  33. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
  34. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
  35. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
  36. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
  37. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
  38. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
  39. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
  125. data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
  126. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
  127. data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
  128. data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
  129. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
  130. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
  131. data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
  132. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
  133. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
  134. data/vendor/tmp/llama.cpp/ggml.c +158 -414
  135. data/vendor/tmp/llama.cpp/ggml.h +6 -0
  136. data/vendor/tmp/llama.cpp/llama.cpp +628 -279
  137. data/vendor/tmp/llama.cpp/llama.h +9 -1
  138. data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
  139. data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
  140. data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
  141. data/vendor/tmp/llama.cpp/unicode.h +1 -1
  142. metadata +15 -3
@@ -226,8 +226,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
226
226
  assert(offset_end <= cpts.size());
227
227
  start = offset_end;
228
228
 
229
- auto _get_cpt = [&] (const size_t pos) -> char32_t {
230
- return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
229
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
230
+ auto _get_cpt = [&] (const size_t pos) -> uint32_t {
231
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
231
232
  };
232
233
 
233
234
  auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
@@ -253,18 +254,18 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
253
254
  };
254
255
 
255
256
  for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
256
- const char32_t cpt = _get_cpt(pos);
257
+ const uint32_t cpt = _get_cpt(pos);
257
258
  const auto flags = _get_flags(pos);
258
259
 
259
260
  // regex: 's|'t|'re|'ve|'m|'ll|'d
260
261
  if (cpt == '\'' && pos+1 < offset_end) {
261
- char32_t cpt_next = _get_cpt(pos+1);
262
+ uint32_t cpt_next = _get_cpt(pos+1);
262
263
  if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
263
264
  pos += _add_token(pos+2);
264
265
  continue;
265
266
  }
266
267
  if (pos+2 < offset_end) {
267
- char32_t cpt_next_next = _get_cpt(pos+2);
268
+ uint32_t cpt_next_next = _get_cpt(pos+2);
268
269
  if ((cpt_next == 'r' && cpt_next_next == 'e') ||
269
270
  (cpt_next == 'v' && cpt_next_next == 'e') ||
270
271
  (cpt_next == 'l' && cpt_next_next == 'l')) {
@@ -309,7 +310,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
309
310
  }
310
311
 
311
312
  // regex: \s+(?!\S)
312
- if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
313
+ if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
313
314
  pos += num_whitespaces - 1;
314
315
  _add_token(pos);
315
316
  continue;
@@ -344,8 +345,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
344
345
  assert(offset_end <= cpts.size());
345
346
  start = offset_end;
346
347
 
347
- auto _get_cpt = [&] (const size_t pos) -> char32_t {
348
- return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
348
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
349
+ auto _get_cpt = [&] (const size_t pos) -> uint32_t {
350
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
349
351
  };
350
352
 
351
353
  auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
@@ -371,18 +373,18 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
371
373
  };
372
374
 
373
375
  for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
374
- const char32_t cpt = _get_cpt(pos);
376
+ const uint32_t cpt = _get_cpt(pos);
375
377
  const auto flags = _get_flags(pos);
376
378
 
377
379
  // regex: (?i:'s|'t|'re|'ve|'m|'ll|'d) // case insensitive
378
380
  if (cpt == '\'' && pos+1 < offset_end) {
379
- char32_t cpt_next = unicode_tolower(_get_cpt(pos+1));
381
+ uint32_t cpt_next = unicode_tolower(_get_cpt(pos+1));
380
382
  if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
381
383
  pos += _add_token(pos+2);
382
384
  continue;
383
385
  }
384
386
  if (pos+2 < offset_end) {
385
- char32_t cpt_next_next = unicode_tolower(_get_cpt(pos+2));
387
+ uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos+2));
386
388
  if ((cpt_next == 'r' && cpt_next_next == 'e') ||
387
389
  (cpt_next == 'v' && cpt_next_next == 'e') ||
388
390
  (cpt_next == 'l' && cpt_next_next == 'l')) {
@@ -424,7 +426,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
424
426
  while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number || flags2.is_undefined)) {
425
427
  flags2 = _get_flags(++pos);
426
428
  }
427
- char32_t cpt2 = _get_cpt(pos);
429
+ uint32_t cpt2 = _get_cpt(pos);
428
430
  while (cpt2 == '\r' || cpt2 == '\n') {
429
431
  cpt2 = _get_cpt(++pos);
430
432
  }
@@ -435,7 +437,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
435
437
  size_t num_whitespaces = 0;
436
438
  size_t last_end_r_or_n = 0;
437
439
  while (_get_flags(pos+num_whitespaces).is_whitespace) {
438
- char32_t cpt2 = _get_cpt(pos+num_whitespaces);
440
+ uint32_t cpt2 = _get_cpt(pos+num_whitespaces);
439
441
  if (cpt2 == '\r' || cpt2 == '\n') {
440
442
  last_end_r_or_n = pos + num_whitespaces + 1;
441
443
  }
@@ -450,7 +452,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
450
452
  }
451
453
 
452
454
  // regex: \s+(?!\S)
453
- if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
455
+ if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
454
456
  pos += num_whitespaces - 1;
455
457
  _add_token(pos);
456
458
  continue;
@@ -594,6 +596,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
594
596
 
595
597
  std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
596
598
  std::vector<uint32_t> result;
599
+ result.reserve(utf8.size());
597
600
  size_t offset = 0;
598
601
  while (offset < utf8.size()) {
599
602
  result.push_back(unicode_cpt_from_utf8(utf8, offset));
@@ -626,7 +629,7 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
626
629
  return map.at(utf8);
627
630
  }
628
631
 
629
- char32_t unicode_tolower(char32_t cp) {
632
+ uint32_t unicode_tolower(uint32_t cp) {
630
633
  auto it = unicode_map_lowercase.find(cp);
631
634
  return it == unicode_map_lowercase.end() ? cp : it->second;
632
635
  }
@@ -679,10 +682,14 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
679
682
  continue;
680
683
  }
681
684
 
682
- const int cpt_flag = unicode_cpt_flags(cpts[i]).category_flag();
685
+ const auto flags = unicode_cpt_flags(cpts[i]);
683
686
 
684
- if (k_ucat_cpt.find(cpt_flag) != k_ucat_cpt.end()) {
685
- text_collapsed[i] = k_ucat_cpt.at(cpt_flag);
687
+ if (flags.is_whitespace) {
688
+ //NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
689
+ //text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
690
+ text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
691
+ } else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
692
+ text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
686
693
  } else {
687
694
  text_collapsed[i] = (char) 0xD0; // fallback
688
695
  }
@@ -766,9 +773,16 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
766
773
  bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
767
774
  } else {
768
775
  // no unicode category used, we can use std::wregex directly
769
- const std::wstring wtext = unicode_wstring_from_utf8(text);
770
776
  const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
771
777
 
778
+ // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
779
+ std::wstring wtext(cpts.begin(), cpts.end());
780
+ for (size_t i = 0; i < wtext.size(); ++i) {
781
+ if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
782
+ wtext[i] = 0x0B;
783
+ }
784
+ }
785
+
772
786
  //printf("text: %s\n", text.c_str());
773
787
  //printf("regex_expr: %s\n", regex_expr.c_str());
774
788
  bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
@@ -58,6 +58,6 @@ codepoint_flags unicode_cpt_flags(const std::string & utf8);
58
58
  std::string unicode_byte_to_utf8(uint8_t byte);
59
59
  uint8_t unicode_utf8_to_byte(const std::string & utf8);
60
60
 
61
- char32_t unicode_tolower(char32_t cp);
61
+ uint32_t unicode_tolower(uint32_t cp);
62
62
 
63
63
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.16.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-08 00:00:00.000000000 Z
11
+ date: 2024-06-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -42,6 +42,8 @@ files:
42
42
  - vendor/tmp/llama.cpp/ggml-backend-impl.h
43
43
  - vendor/tmp/llama.cpp/ggml-backend.c
44
44
  - vendor/tmp/llama.cpp/ggml-backend.h
45
+ - vendor/tmp/llama.cpp/ggml-blas.cpp
46
+ - vendor/tmp/llama.cpp/ggml-blas.h
45
47
  - vendor/tmp/llama.cpp/ggml-common.h
46
48
  - vendor/tmp/llama.cpp/ggml-cuda.cu
47
49
  - vendor/tmp/llama.cpp/ggml-cuda.h
@@ -161,6 +163,16 @@ files:
161
163
  - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
162
164
  - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
163
165
  - vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
166
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu
167
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu
168
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu
169
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu
170
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu
171
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu
172
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu
173
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu
174
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu
175
+ - vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu
164
176
  - vendor/tmp/llama.cpp/ggml-cuda/tsembd.cu
165
177
  - vendor/tmp/llama.cpp/ggml-cuda/unary.cu
166
178
  - vendor/tmp/llama.cpp/ggml-cuda/upscale.cu
@@ -214,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
214
226
  - !ruby/object:Gem::Version
215
227
  version: '0'
216
228
  requirements: []
217
- rubygems_version: 3.5.10
229
+ rubygems_version: 3.5.9
218
230
  signing_key:
219
231
  specification_version: 4
220
232
  summary: Ruby bindings for the llama.cpp.