cui-llama.rn 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/build-arm64/CMakeCache.txt +429 -0
  4. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +81 -0
  5. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
  6. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  7. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  8. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +15 -0
  9. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +904 -0
  10. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  11. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +919 -0
  12. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  13. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +431 -0
  14. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  15. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
  16. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
  17. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
  18. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
  19. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
  20. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
  21. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
  22. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
  23. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
  24. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
  25. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
  26. package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
  27. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  28. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
  29. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  30. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
  31. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  32. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
  33. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  34. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
  35. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  36. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
  37. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  38. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
  39. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  40. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
  41. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  42. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
  43. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  44. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
  45. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  46. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
  47. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  48. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
  49. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  50. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
  51. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  52. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
  53. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  54. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
  55. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
  56. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
  57. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
  58. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
  59. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
  60. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
  61. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
  62. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
  63. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
  64. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
  65. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
  66. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
  67. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
  68. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
  69. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
  70. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
  71. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
  72. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
  73. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
  74. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
  75. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
  76. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
  77. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
  78. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
  79. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
  80. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
  81. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
  82. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
  83. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
  84. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
  85. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
  86. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
  87. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
  88. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
  89. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
  90. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
  91. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
  92. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
  93. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
  94. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
  95. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
  96. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
  97. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
  98. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
  99. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
  100. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
  101. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
  102. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
  103. package/android/src/main/build-arm64/Makefile +1862 -0
  104. package/android/src/main/build-arm64/cmake_install.cmake +66 -0
  105. package/android/src/main/java/com/rnllama/LlamaContext.java +92 -18
  106. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  107. package/android/src/main/jni-utils.h +6 -0
  108. package/android/src/main/jni.cpp +287 -31
  109. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  110. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  111. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  112. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  113. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  114. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  115. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  116. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  117. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  118. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  119. package/cpp/chat-template.hpp +529 -0
  120. package/cpp/chat.cpp +1085 -0
  121. package/cpp/chat.hpp +55 -0
  122. package/cpp/common.cpp +159 -36
  123. package/cpp/common.h +64 -19
  124. package/cpp/ggml-alloc.c +1 -13
  125. package/cpp/ggml-common.h +0 -2
  126. package/cpp/ggml-cpu-impl.h +6 -12
  127. package/cpp/ggml-cpu-quants.c +937 -340
  128. package/cpp/ggml-cpu.c +207 -113
  129. package/cpp/ggml-cpu.cpp +4 -6
  130. package/cpp/ggml-cpu.h +1 -1
  131. package/cpp/ggml-metal.h +66 -66
  132. package/cpp/ggml-metal.m +141 -23
  133. package/cpp/ggml.c +24 -14
  134. package/cpp/ggml.h +2 -2
  135. package/cpp/json-schema-to-grammar.cpp +46 -66
  136. package/cpp/json-schema-to-grammar.h +15 -1
  137. package/cpp/llama-arch.cpp +7 -2
  138. package/cpp/llama-arch.h +3 -1
  139. package/cpp/llama-chat.cpp +10 -1
  140. package/cpp/llama-chat.h +1 -0
  141. package/cpp/llama-grammar.cpp +86 -6
  142. package/cpp/llama-grammar.h +22 -1
  143. package/cpp/llama-impl.h +6 -6
  144. package/cpp/llama-kv-cache.h +1 -1
  145. package/cpp/llama-mmap.h +1 -0
  146. package/cpp/llama-model-loader.cpp +1 -1
  147. package/cpp/llama-model.cpp +32 -6
  148. package/cpp/llama-sampling.cpp +178 -61
  149. package/cpp/llama-vocab.cpp +8 -3
  150. package/cpp/llama.cpp +188 -128
  151. package/cpp/llama.h +27 -10
  152. package/cpp/log.cpp +32 -10
  153. package/cpp/log.h +12 -1
  154. package/cpp/minja.hpp +2883 -0
  155. package/cpp/rn-llama.cpp +82 -5
  156. package/cpp/rn-llama.h +16 -1
  157. package/cpp/sampling.cpp +68 -41
  158. package/cpp/sampling.h +3 -0
  159. package/cpp/sgemm.cpp +9 -8
  160. package/cpp/unicode.cpp +9 -2
  161. package/ios/CMakeLists.txt +6 -0
  162. package/ios/RNLlama.h +0 -8
  163. package/ios/RNLlama.mm +27 -3
  164. package/ios/RNLlamaContext.h +10 -1
  165. package/ios/RNLlamaContext.mm +269 -57
  166. package/jest/mock.js +21 -2
  167. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  168. package/lib/commonjs/grammar.js +3 -0
  169. package/lib/commonjs/grammar.js.map +1 -1
  170. package/lib/commonjs/index.js +87 -13
  171. package/lib/commonjs/index.js.map +1 -1
  172. package/lib/module/NativeRNLlama.js.map +1 -1
  173. package/lib/module/grammar.js +3 -0
  174. package/lib/module/grammar.js.map +1 -1
  175. package/lib/module/index.js +86 -13
  176. package/lib/module/index.js.map +1 -1
  177. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  178. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  179. package/lib/typescript/grammar.d.ts.map +1 -1
  180. package/lib/typescript/index.d.ts +32 -7
  181. package/lib/typescript/index.d.ts.map +1 -1
  182. package/llama-rn.podspec +1 -1
  183. package/package.json +2 -2
  184. package/src/NativeRNLlama.ts +115 -3
  185. package/src/grammar.ts +3 -0
  186. package/src/index.ts +138 -21
@@ -316,6 +316,13 @@ static uint32_t get_rng_seed(uint32_t seed) {
316
316
 
317
317
  // llama_sampler API
318
318
 
319
+ struct llama_sampler * llama_sampler_init(const struct llama_sampler_i * iface, llama_sampler_context_t ctx) {
320
+ return new llama_sampler {
321
+ /* .iface = */ iface,
322
+ /* .ctx = */ ctx,
323
+ };
324
+ }
325
+
319
326
  const char * llama_sampler_name(const struct llama_sampler * smpl) {
320
327
  if (!smpl->iface) {
321
328
  return "(null)";
@@ -347,10 +354,10 @@ struct llama_sampler * llama_sampler_clone(const struct llama_sampler * smpl) {
347
354
  }
348
355
 
349
356
  if (smpl->ctx == nullptr) {
350
- return new llama_sampler {
357
+ return llama_sampler_init(
351
358
  /* .iface = */ smpl->iface,
352
- /* .ctx = */ nullptr,
353
- };
359
+ /* .ctx = */ nullptr
360
+ );
354
361
  }
355
362
 
356
363
  LM_GGML_ABORT("the sampler does not support cloning");
@@ -472,15 +479,15 @@ static struct llama_sampler_i llama_sampler_chain_i = {
472
479
  };
473
480
 
474
481
  struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_params params) {
475
- return new llama_sampler {
482
+ return llama_sampler_init(
476
483
  /* .iface = */ &llama_sampler_chain_i,
477
484
  /* .ctx = */ new llama_sampler_chain {
478
485
  /* .params = */ params,
479
486
  /* .samplers = */ {},
480
487
  /* .t_sample_us = */ 0,
481
488
  /* .n_sample = */ 0,
482
- },
483
- };
489
+ }
490
+ );
484
491
  }
485
492
 
486
493
  void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) {
@@ -547,10 +554,10 @@ static struct llama_sampler_i llama_sampler_greedy_i = {
547
554
  };
548
555
 
549
556
  struct llama_sampler * llama_sampler_init_greedy() {
550
- return new llama_sampler {
557
+ return llama_sampler_init(
551
558
  /* .iface = */ &llama_sampler_greedy_i,
552
- /* .ctx = */ nullptr,
553
- };
559
+ /* .ctx = */ nullptr
560
+ );
554
561
  }
555
562
 
556
563
  // dist
@@ -609,14 +616,14 @@ static struct llama_sampler_i llama_sampler_dist_i = {
609
616
 
610
617
  struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
611
618
  auto seed_cur = get_rng_seed(seed);
612
- return new llama_sampler {
619
+ return llama_sampler_init(
613
620
  /* .iface = */ &llama_sampler_dist_i,
614
621
  /* .ctx = */ new llama_sampler_dist {
615
622
  /* .seed = */ seed,
616
623
  /* .seed_cur = */ seed_cur,
617
624
  /* .rng = */ std::mt19937(seed_cur),
618
- },
619
- };
625
+ }
626
+ );
620
627
  }
621
628
 
622
629
  // softmax
@@ -639,10 +646,10 @@ static struct llama_sampler_i llama_sampler_softmax_i = {
639
646
  };
640
647
 
641
648
  struct llama_sampler * llama_sampler_init_softmax() {
642
- return new llama_sampler {
649
+ return llama_sampler_init(
643
650
  /* .iface = */ &llama_sampler_softmax_i,
644
- /* .ctx = */ nullptr,
645
- };
651
+ /* .ctx = */ nullptr
652
+ );
646
653
  }
647
654
 
648
655
  // top-k
@@ -679,12 +686,12 @@ static struct llama_sampler_i llama_sampler_top_k_i = {
679
686
  };
680
687
 
681
688
  struct llama_sampler * llama_sampler_init_top_k(int32_t k) {
682
- return new llama_sampler {
689
+ return llama_sampler_init(
683
690
  /* .iface = */ &llama_sampler_top_k_i,
684
691
  /* .ctx = */ new llama_sampler_top_k {
685
692
  /* .k = */ k,
686
- },
687
- };
693
+ }
694
+ );
688
695
  }
689
696
 
690
697
  // top-p
@@ -745,13 +752,13 @@ static struct llama_sampler_i llama_sampler_top_p_i = {
745
752
  };
746
753
 
747
754
  struct llama_sampler * llama_sampler_init_top_p(float p, size_t min_keep) {
748
- return new llama_sampler {
755
+ return llama_sampler_init(
749
756
  /* .iface = */ &llama_sampler_top_p_i,
750
757
  /* .ctx = */ new llama_sampler_top_p {
751
758
  /* .p = */ p,
752
759
  /* .min_keep = */ min_keep,
753
- },
754
- };
760
+ }
761
+ );
755
762
  }
756
763
 
757
764
  // min-p
@@ -841,13 +848,13 @@ static struct llama_sampler_i llama_sampler_min_p_i = {
841
848
  };
842
849
 
843
850
  struct llama_sampler * llama_sampler_init_min_p(float p, size_t min_keep) {
844
- return new llama_sampler {
851
+ return llama_sampler_init(
845
852
  /* .iface = */ &llama_sampler_min_p_i,
846
853
  /* .ctx = */ new llama_sampler_min_p {
847
854
  /* .p = */ p,
848
855
  /* .min_keep = */ min_keep,
849
- },
850
- };
856
+ }
857
+ );
851
858
  }
852
859
 
853
860
  // typical
@@ -940,13 +947,13 @@ static struct llama_sampler_i llama_sampler_typical_i = {
940
947
  };
941
948
 
942
949
  struct llama_sampler * llama_sampler_init_typical(float p, size_t min_keep) {
943
- return new llama_sampler {
950
+ return llama_sampler_init(
944
951
  /* .iface = */ &llama_sampler_typical_i,
945
952
  /* .ctx = */ new llama_sampler_typical {
946
953
  /* .p = */ p,
947
954
  /* .min_keep = */ min_keep,
948
- },
949
- };
955
+ }
956
+ );
950
957
  }
951
958
 
952
959
  // temp
@@ -984,12 +991,12 @@ static struct llama_sampler_i llama_sampler_temp_i = {
984
991
  };
985
992
 
986
993
  struct llama_sampler * llama_sampler_init_temp(float temp) {
987
- return new llama_sampler {
994
+ return llama_sampler_init(
988
995
  /* .iface = */ &llama_sampler_temp_i,
989
996
  /* .ctx = */ new llama_sampler_temp {
990
997
  /*.temp = */ temp,
991
- },
992
- };
998
+ }
999
+ );
993
1000
  }
994
1001
 
995
1002
  // temp-ext
@@ -1094,14 +1101,14 @@ static struct llama_sampler_i llama_sampler_temp_ext_i = {
1094
1101
  };
1095
1102
 
1096
1103
  struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent) {
1097
- return new llama_sampler {
1104
+ return llama_sampler_init(
1098
1105
  /* .iface = */ &llama_sampler_temp_ext_i,
1099
1106
  /* .ctx = */ new llama_sampler_temp_ext {
1100
1107
  /* .temp = */ temp,
1101
1108
  /* .delta = */ delta,
1102
1109
  /* .exponent = */ exponent,
1103
- },
1104
- };
1110
+ }
1111
+ );
1105
1112
  }
1106
1113
 
1107
1114
  // xtc
@@ -1186,7 +1193,7 @@ static struct llama_sampler_i llama_sampler_xtc_i = {
1186
1193
 
1187
1194
  struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep, uint32_t seed) {
1188
1195
  auto seed_cur = get_rng_seed(seed);
1189
- return new llama_sampler {
1196
+ return llama_sampler_init(
1190
1197
  /* .iface = */ &llama_sampler_xtc_i,
1191
1198
  /* .ctx = */ new llama_sampler_xtc {
1192
1199
  /* .probability = */ p,
@@ -1195,8 +1202,8 @@ struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep,
1195
1202
  /* .seed = */ seed,
1196
1203
  /* .seed_cur = */ seed_cur,
1197
1204
  /* .rng = */ std::mt19937(seed_cur),
1198
- },
1199
- };
1205
+ }
1206
+ );
1200
1207
  }
1201
1208
 
1202
1209
  // mirostat
@@ -1293,7 +1300,7 @@ static struct llama_sampler_i llama_sampler_mirostat_i = {
1293
1300
 
1294
1301
  struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t seed, float tau, float eta, int32_t m) {
1295
1302
  auto seed_cur = get_rng_seed(seed);
1296
- return new llama_sampler {
1303
+ return llama_sampler_init(
1297
1304
  /* .iface = */ &llama_sampler_mirostat_i,
1298
1305
  /* .ctx = */ new llama_sampler_mirostat {
1299
1306
  /* .n_vocab = */ n_vocab,
@@ -1304,8 +1311,8 @@ struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t see
1304
1311
  /* .m = */ m,
1305
1312
  /* .mu = */ 2.0f*tau,
1306
1313
  /* .rng = */ std::mt19937(seed_cur),
1307
- },
1308
- };
1314
+ }
1315
+ );
1309
1316
  }
1310
1317
 
1311
1318
  // mirostat v2
@@ -1392,7 +1399,7 @@ static struct llama_sampler_i llama_sampler_mirostat_v2_i = {
1392
1399
 
1393
1400
  struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau, float eta) {
1394
1401
  auto seed_cur = get_rng_seed(seed);
1395
- return new llama_sampler {
1402
+ return llama_sampler_init(
1396
1403
  /* .iface = */ &llama_sampler_mirostat_v2_i,
1397
1404
  /* .ctx = */ new llama_sampler_mirostat_v2 {
1398
1405
  /* .seed = */ seed,
@@ -1401,8 +1408,8 @@ struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau,
1401
1408
  /* .eta = */ eta,
1402
1409
  /* .mu = */ 2.0f*tau,
1403
1410
  /* .rng = */ std::mt19937(seed_cur),
1404
- },
1405
- };
1411
+ }
1412
+ );
1406
1413
  }
1407
1414
 
1408
1415
  // grammar
@@ -1434,13 +1441,30 @@ static void llama_sampler_grammar_apply(struct llama_sampler * smpl, llama_token
1434
1441
  }
1435
1442
  }
1436
1443
 
1444
+ // Fwd declare to break reset --> init_impl --> llama_sampler_grammar_i --> reset cycle.
1445
+ static struct llama_sampler * llama_sampler_init_grammar_impl(
1446
+ const struct llama_vocab * vocab,
1447
+ const char * grammar_str,
1448
+ const char * grammar_root,
1449
+ bool lazy,
1450
+ const char ** trigger_words,
1451
+ size_t num_trigger_words,
1452
+ const llama_token * trigger_tokens,
1453
+ size_t num_trigger_tokens);
1454
+
1437
1455
  static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
1438
1456
  auto * ctx = (llama_sampler_grammar *) smpl->ctx;
1439
1457
  if (!ctx->grammar) {
1440
1458
  return;
1441
1459
  }
1442
1460
 
1443
- auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str());
1461
+ std::vector<const char *> trigger_words;
1462
+ for (auto & word : ctx->grammar->trigger_words) {
1463
+ trigger_words.push_back(word.c_str());
1464
+ }
1465
+ auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str(),
1466
+ ctx->grammar->lazy, trigger_words.data(), trigger_words.size(),
1467
+ ctx->grammar->trigger_tokens.data(), ctx->grammar->trigger_tokens.size());
1444
1468
 
1445
1469
  llama_grammar_free_impl(ctx->grammar);
1446
1470
  ctx->grammar = grammar_new;
@@ -1449,7 +1473,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
1449
1473
  static struct llama_sampler * llama_sampler_grammar_clone(const struct llama_sampler * smpl) {
1450
1474
  const auto * ctx = (const llama_sampler_grammar *) smpl->ctx;
1451
1475
 
1452
- auto * result = llama_sampler_init_grammar(ctx->vocab, nullptr, nullptr);
1476
+ auto * result = llama_sampler_init_grammar_impl(ctx->vocab, nullptr, nullptr, false, nullptr, 0, nullptr, 0);
1453
1477
 
1454
1478
  // copy the state
1455
1479
  {
@@ -1485,7 +1509,15 @@ static struct llama_sampler_i llama_sampler_grammar_i = {
1485
1509
  /* .free = */ llama_sampler_grammar_free,
1486
1510
  };
1487
1511
 
1488
- struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) {
1512
+ static struct llama_sampler * llama_sampler_init_grammar_impl(
1513
+ const struct llama_vocab * vocab,
1514
+ const char * grammar_str,
1515
+ const char * grammar_root,
1516
+ bool lazy,
1517
+ const char ** trigger_words,
1518
+ size_t num_trigger_words,
1519
+ const llama_token * trigger_tokens,
1520
+ size_t num_trigger_tokens) {
1489
1521
  auto * ctx = new llama_sampler_grammar;
1490
1522
 
1491
1523
  if (grammar_str != nullptr && grammar_str[0] != '\0') {
@@ -1493,7 +1525,7 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc
1493
1525
  /* .vocab = */ vocab,
1494
1526
  /* .grammar_str = */ grammar_str,
1495
1527
  /* .grammar_root = */ grammar_root,
1496
- /* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root),
1528
+ /* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, lazy, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
1497
1529
  };
1498
1530
  } else {
1499
1531
  *ctx = {
@@ -1504,10 +1536,28 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc
1504
1536
  };
1505
1537
  }
1506
1538
 
1507
- return new llama_sampler {
1539
+ return llama_sampler_init(
1508
1540
  /* .iface = */ &llama_sampler_grammar_i,
1509
- /* .ctx = */ ctx,
1510
- };
1541
+ /* .ctx = */ ctx
1542
+ );
1543
+ }
1544
+
1545
+ struct llama_sampler * llama_sampler_init_grammar(
1546
+ const struct llama_vocab * vocab,
1547
+ const char * grammar_str,
1548
+ const char * grammar_root) {
1549
+ return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ false, nullptr, 0, nullptr, 0);
1550
+ }
1551
+
1552
+ struct llama_sampler * llama_sampler_init_grammar_lazy(
1553
+ const struct llama_vocab * vocab,
1554
+ const char * grammar_str,
1555
+ const char * grammar_root,
1556
+ const char ** trigger_words,
1557
+ size_t num_trigger_words,
1558
+ const llama_token * trigger_tokens,
1559
+ size_t num_trigger_tokens) {
1560
+ return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens);
1511
1561
  }
1512
1562
 
1513
1563
  // penalties
@@ -1636,7 +1686,7 @@ struct llama_sampler * llama_sampler_init_penalties(
1636
1686
  float penalty_present) {
1637
1687
  penalty_last_n = std::max(penalty_last_n, 0);
1638
1688
 
1639
- return new llama_sampler {
1689
+ return llama_sampler_init(
1640
1690
  /* .iface = */ &llama_sampler_penalties_i,
1641
1691
  /* .ctx = */ new llama_sampler_penalties {
1642
1692
  /* .penalty_last_n = */ penalty_last_n,
@@ -1645,8 +1695,75 @@ struct llama_sampler * llama_sampler_init_penalties(
1645
1695
  /* .penalty_present = */ penalty_present,
1646
1696
  /* .prev = */ ring_buffer<llama_token>(penalty_last_n),
1647
1697
  /* .token_count = */ {},
1648
- },
1649
- };
1698
+ }
1699
+ );
1700
+ }
1701
+
1702
+ // top-n-sigma
1703
+
1704
+ struct llama_sampler_top_n_sigma {
1705
+ const float n;
1706
+ };
1707
+
1708
+ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler * /*smpl*/) {
1709
+ return "top-n-sigma";
1710
+ }
1711
+
1712
+ static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
1713
+ const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
1714
+
1715
+ // find max logit and calculate mean
1716
+ float max = cur_p->data[0].logit;
1717
+ float logits_sum = 0;
1718
+ for (size_t i = 0; i < cur_p->size; ++i) {
1719
+ if (cur_p->data[i].logit > max) {
1720
+ max = cur_p->data[i].logit;
1721
+ }
1722
+ logits_sum += cur_p->data[i].logit;
1723
+ }
1724
+ float mean = logits_sum/cur_p->size;
1725
+
1726
+ // calculate standard deviation
1727
+ float acc = 0;
1728
+ for (size_t i = 0; i < cur_p->size; ++i) {
1729
+ acc += pow(cur_p->data[i].logit - mean, 2);
1730
+ }
1731
+ float std = sqrt(acc/cur_p->size);
1732
+
1733
+ //apply mask
1734
+ for (size_t i = 0; i < cur_p->size; ++i) {
1735
+ if (cur_p->data[i].logit < max - (ctx->n * std)) {
1736
+ cur_p->data[i].logit = -INFINITY;
1737
+ }
1738
+ }
1739
+ llama_sampler_softmax_impl(cur_p);
1740
+ }
1741
+
1742
+ static struct llama_sampler * llama_sampler_top_n_sigma_clone(const struct llama_sampler * smpl) {
1743
+ const auto * ctx = (const llama_sampler_top_n_sigma *) smpl->ctx;
1744
+ return llama_sampler_init_top_n_sigma(ctx->n);
1745
+ }
1746
+
1747
+ static void llama_sampler_top_n_sigma_free(struct llama_sampler * smpl) {
1748
+ delete (llama_sampler_top_n_sigma *) smpl->ctx;
1749
+ }
1750
+
1751
+ static struct llama_sampler_i llama_sampler_top_n_sigma_i = {
1752
+ /* .name = */ llama_sampler_top_n_sigma_name,
1753
+ /* .accept = */ nullptr,
1754
+ /* .apply = */ llama_sampler_top_n_sigma_apply,
1755
+ /* .reset = */ nullptr,
1756
+ /* .clone = */ llama_sampler_top_n_sigma_clone,
1757
+ /* .free = */ llama_sampler_top_n_sigma_free,
1758
+ };
1759
+
1760
+ struct llama_sampler * llama_sampler_init_top_n_sigma(float n) {
1761
+ return llama_sampler_init(
1762
+ /* .iface = */ &llama_sampler_top_n_sigma_i,
1763
+ /* .ctx = */ new llama_sampler_top_n_sigma {
1764
+ /* .n = */ n,
1765
+ }
1766
+ );
1650
1767
  }
1651
1768
 
1652
1769
  // DRY
@@ -1999,7 +2116,7 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
1999
2116
  }
2000
2117
  }
2001
2118
 
2002
- return new llama_sampler {
2119
+ return llama_sampler_init(
2003
2120
  /* .iface = */ &llama_sampler_dry_i,
2004
2121
  /* .ctx = */ new llama_sampler_dry {
2005
2122
  /* .total_context_size = */ context_size,
@@ -2011,8 +2128,8 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
2011
2128
  /* .dry_repeat_count = */ dry_enabled ? std::vector<int>(effective_dry_penalty_last_n, 0) : std::vector<int>{},
2012
2129
  /* .dry_max_token_repeat = */ {},
2013
2130
  /* .last_tokens = */ dry_enabled ? ring_buffer<llama_token>(effective_dry_penalty_last_n) : ring_buffer<llama_token>(0),
2014
- },
2015
- };
2131
+ }
2132
+ );
2016
2133
  }
2017
2134
 
2018
2135
  // wrapper for test-sampling.cpp
@@ -2113,14 +2230,14 @@ struct llama_sampler * llama_sampler_init_logit_bias(
2113
2230
  int32_t n_vocab,
2114
2231
  int32_t n_logit_bias,
2115
2232
  const llama_logit_bias * logit_bias) {
2116
- return new llama_sampler {
2233
+ return llama_sampler_init(
2117
2234
  /* .iface = */ &llama_sampler_logit_bias_i,
2118
2235
  /* .ctx = */ new llama_sampler_logit_bias {
2119
2236
  /* .n_vocab = */ n_vocab,
2120
2237
  /* .logit_bias = */ std::vector<llama_logit_bias>(logit_bias, logit_bias + n_logit_bias),
2121
2238
  /* .to_search = */ {},
2122
- },
2123
- };
2239
+ }
2240
+ );
2124
2241
  }
2125
2242
 
2126
2243
  // infill
@@ -2335,14 +2452,14 @@ static struct llama_sampler_i llama_sampler_infill_i = {
2335
2452
  };
2336
2453
 
2337
2454
  struct llama_sampler * llama_sampler_init_infill(const struct llama_vocab * vocab) {
2338
- return new llama_sampler {
2455
+ return llama_sampler_init(
2339
2456
  /* .iface = */ &llama_sampler_infill_i,
2340
2457
  /* .ctx = */ new llama_sampler_infill {
2341
2458
  /* .vocab = */ vocab,
2342
2459
  /* .buf0 = */ std::vector<char>(512),
2343
2460
  /* .buf1 = */ std::vector<char>(512),
2344
- },
2345
- };
2461
+ }
2462
+ );
2346
2463
  }
2347
2464
 
2348
2465
  // utils
@@ -1245,8 +1245,13 @@ struct llama_vocab::impl {
1245
1245
 
1246
1246
  std::vector<llama_token> cache_special_tokens;
1247
1247
  std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true);
1248
-
1249
- std::map<std::pair<std::string, std::string>, int> bpe_ranks;
1248
+ struct pair_hash {
1249
+ size_t operator()(const std::pair<std::string, std::string> & p) const {
1250
+ return std::hash<std::string>{}(p.first) ^ //create some hash for pair
1251
+ (std::hash<std::string>{}(p.second) << 1);
1252
+ }
1253
+ };
1254
+ std::unordered_map<std::pair<std::string, std::string>, int, pair_hash> bpe_ranks;
1250
1255
 
1251
1256
  // set of all tokens that cause "end of generation"
1252
1257
  std::set<llama_token> special_eog_ids;
@@ -1687,7 +1692,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1687
1692
  LM_GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
1688
1693
  linefeed_id = ids[0];
1689
1694
  } else {
1690
- const std::vector<int> ids = tokenize("\xC4\x8A", false); // U+010A
1695
+ const std::vector<int> ids = tokenize("\n", false);
1691
1696
 
1692
1697
  //LM_GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
1693
1698
  if (ids.empty()) {