cui-llama.rn 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/build-arm64/CMakeCache.txt +429 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +21 -21
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +376 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
- package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
- package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
- package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/Makefile +1862 -0
- package/android/src/main/build-arm64/cmake_install.cmake +66 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +287 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1085 -0
- package/cpp/chat.hpp +55 -0
- package/cpp/common.cpp +159 -36
- package/cpp/common.h +64 -19
- package/cpp/ggml-alloc.c +1 -13
- package/cpp/ggml-common.h +0 -2
- package/cpp/ggml-cpu-impl.h +6 -12
- package/cpp/ggml-cpu-quants.c +937 -340
- package/cpp/ggml-cpu.c +207 -113
- package/cpp/ggml-cpu.cpp +4 -6
- package/cpp/ggml-cpu.h +1 -1
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +141 -23
- package/cpp/ggml.c +24 -14
- package/cpp/ggml.h +2 -2
- package/cpp/json-schema-to-grammar.cpp +46 -66
- package/cpp/json-schema-to-grammar.h +15 -1
- package/cpp/llama-arch.cpp +7 -2
- package/cpp/llama-arch.h +3 -1
- package/cpp/llama-chat.cpp +10 -1
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-grammar.cpp +86 -6
- package/cpp/llama-grammar.h +22 -1
- package/cpp/llama-impl.h +6 -6
- package/cpp/llama-kv-cache.h +1 -1
- package/cpp/llama-mmap.h +1 -0
- package/cpp/llama-model-loader.cpp +1 -1
- package/cpp/llama-model.cpp +32 -6
- package/cpp/llama-sampling.cpp +178 -61
- package/cpp/llama-vocab.cpp +8 -3
- package/cpp/llama.cpp +188 -128
- package/cpp/llama.h +27 -10
- package/cpp/log.cpp +32 -10
- package/cpp/log.h +12 -1
- package/cpp/minja.hpp +2883 -0
- package/cpp/rn-llama.cpp +82 -5
- package/cpp/rn-llama.h +16 -1
- package/cpp/sampling.cpp +68 -41
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +9 -8
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +3 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
package/cpp/llama-sampling.cpp
CHANGED
@@ -316,6 +316,13 @@ static uint32_t get_rng_seed(uint32_t seed) {
|
|
316
316
|
|
317
317
|
// llama_sampler API
|
318
318
|
|
319
|
+
struct llama_sampler * llama_sampler_init(const struct llama_sampler_i * iface, llama_sampler_context_t ctx) {
|
320
|
+
return new llama_sampler {
|
321
|
+
/* .iface = */ iface,
|
322
|
+
/* .ctx = */ ctx,
|
323
|
+
};
|
324
|
+
}
|
325
|
+
|
319
326
|
const char * llama_sampler_name(const struct llama_sampler * smpl) {
|
320
327
|
if (!smpl->iface) {
|
321
328
|
return "(null)";
|
@@ -347,10 +354,10 @@ struct llama_sampler * llama_sampler_clone(const struct llama_sampler * smpl) {
|
|
347
354
|
}
|
348
355
|
|
349
356
|
if (smpl->ctx == nullptr) {
|
350
|
-
return
|
357
|
+
return llama_sampler_init(
|
351
358
|
/* .iface = */ smpl->iface,
|
352
|
-
/* .ctx = */ nullptr
|
353
|
-
|
359
|
+
/* .ctx = */ nullptr
|
360
|
+
);
|
354
361
|
}
|
355
362
|
|
356
363
|
LM_GGML_ABORT("the sampler does not support cloning");
|
@@ -472,15 +479,15 @@ static struct llama_sampler_i llama_sampler_chain_i = {
|
|
472
479
|
};
|
473
480
|
|
474
481
|
struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_params params) {
|
475
|
-
return
|
482
|
+
return llama_sampler_init(
|
476
483
|
/* .iface = */ &llama_sampler_chain_i,
|
477
484
|
/* .ctx = */ new llama_sampler_chain {
|
478
485
|
/* .params = */ params,
|
479
486
|
/* .samplers = */ {},
|
480
487
|
/* .t_sample_us = */ 0,
|
481
488
|
/* .n_sample = */ 0,
|
482
|
-
}
|
483
|
-
|
489
|
+
}
|
490
|
+
);
|
484
491
|
}
|
485
492
|
|
486
493
|
void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) {
|
@@ -547,10 +554,10 @@ static struct llama_sampler_i llama_sampler_greedy_i = {
|
|
547
554
|
};
|
548
555
|
|
549
556
|
struct llama_sampler * llama_sampler_init_greedy() {
|
550
|
-
return
|
557
|
+
return llama_sampler_init(
|
551
558
|
/* .iface = */ &llama_sampler_greedy_i,
|
552
|
-
/* .ctx = */ nullptr
|
553
|
-
|
559
|
+
/* .ctx = */ nullptr
|
560
|
+
);
|
554
561
|
}
|
555
562
|
|
556
563
|
// dist
|
@@ -609,14 +616,14 @@ static struct llama_sampler_i llama_sampler_dist_i = {
|
|
609
616
|
|
610
617
|
struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
|
611
618
|
auto seed_cur = get_rng_seed(seed);
|
612
|
-
return
|
619
|
+
return llama_sampler_init(
|
613
620
|
/* .iface = */ &llama_sampler_dist_i,
|
614
621
|
/* .ctx = */ new llama_sampler_dist {
|
615
622
|
/* .seed = */ seed,
|
616
623
|
/* .seed_cur = */ seed_cur,
|
617
624
|
/* .rng = */ std::mt19937(seed_cur),
|
618
|
-
}
|
619
|
-
|
625
|
+
}
|
626
|
+
);
|
620
627
|
}
|
621
628
|
|
622
629
|
// softmax
|
@@ -639,10 +646,10 @@ static struct llama_sampler_i llama_sampler_softmax_i = {
|
|
639
646
|
};
|
640
647
|
|
641
648
|
struct llama_sampler * llama_sampler_init_softmax() {
|
642
|
-
return
|
649
|
+
return llama_sampler_init(
|
643
650
|
/* .iface = */ &llama_sampler_softmax_i,
|
644
|
-
/* .ctx = */ nullptr
|
645
|
-
|
651
|
+
/* .ctx = */ nullptr
|
652
|
+
);
|
646
653
|
}
|
647
654
|
|
648
655
|
// top-k
|
@@ -679,12 +686,12 @@ static struct llama_sampler_i llama_sampler_top_k_i = {
|
|
679
686
|
};
|
680
687
|
|
681
688
|
struct llama_sampler * llama_sampler_init_top_k(int32_t k) {
|
682
|
-
return
|
689
|
+
return llama_sampler_init(
|
683
690
|
/* .iface = */ &llama_sampler_top_k_i,
|
684
691
|
/* .ctx = */ new llama_sampler_top_k {
|
685
692
|
/* .k = */ k,
|
686
|
-
}
|
687
|
-
|
693
|
+
}
|
694
|
+
);
|
688
695
|
}
|
689
696
|
|
690
697
|
// top-p
|
@@ -745,13 +752,13 @@ static struct llama_sampler_i llama_sampler_top_p_i = {
|
|
745
752
|
};
|
746
753
|
|
747
754
|
struct llama_sampler * llama_sampler_init_top_p(float p, size_t min_keep) {
|
748
|
-
return
|
755
|
+
return llama_sampler_init(
|
749
756
|
/* .iface = */ &llama_sampler_top_p_i,
|
750
757
|
/* .ctx = */ new llama_sampler_top_p {
|
751
758
|
/* .p = */ p,
|
752
759
|
/* .min_keep = */ min_keep,
|
753
|
-
}
|
754
|
-
|
760
|
+
}
|
761
|
+
);
|
755
762
|
}
|
756
763
|
|
757
764
|
// min-p
|
@@ -841,13 +848,13 @@ static struct llama_sampler_i llama_sampler_min_p_i = {
|
|
841
848
|
};
|
842
849
|
|
843
850
|
struct llama_sampler * llama_sampler_init_min_p(float p, size_t min_keep) {
|
844
|
-
return
|
851
|
+
return llama_sampler_init(
|
845
852
|
/* .iface = */ &llama_sampler_min_p_i,
|
846
853
|
/* .ctx = */ new llama_sampler_min_p {
|
847
854
|
/* .p = */ p,
|
848
855
|
/* .min_keep = */ min_keep,
|
849
|
-
}
|
850
|
-
|
856
|
+
}
|
857
|
+
);
|
851
858
|
}
|
852
859
|
|
853
860
|
// typical
|
@@ -940,13 +947,13 @@ static struct llama_sampler_i llama_sampler_typical_i = {
|
|
940
947
|
};
|
941
948
|
|
942
949
|
struct llama_sampler * llama_sampler_init_typical(float p, size_t min_keep) {
|
943
|
-
return
|
950
|
+
return llama_sampler_init(
|
944
951
|
/* .iface = */ &llama_sampler_typical_i,
|
945
952
|
/* .ctx = */ new llama_sampler_typical {
|
946
953
|
/* .p = */ p,
|
947
954
|
/* .min_keep = */ min_keep,
|
948
|
-
}
|
949
|
-
|
955
|
+
}
|
956
|
+
);
|
950
957
|
}
|
951
958
|
|
952
959
|
// temp
|
@@ -984,12 +991,12 @@ static struct llama_sampler_i llama_sampler_temp_i = {
|
|
984
991
|
};
|
985
992
|
|
986
993
|
struct llama_sampler * llama_sampler_init_temp(float temp) {
|
987
|
-
return
|
994
|
+
return llama_sampler_init(
|
988
995
|
/* .iface = */ &llama_sampler_temp_i,
|
989
996
|
/* .ctx = */ new llama_sampler_temp {
|
990
997
|
/*.temp = */ temp,
|
991
|
-
}
|
992
|
-
|
998
|
+
}
|
999
|
+
);
|
993
1000
|
}
|
994
1001
|
|
995
1002
|
// temp-ext
|
@@ -1094,14 +1101,14 @@ static struct llama_sampler_i llama_sampler_temp_ext_i = {
|
|
1094
1101
|
};
|
1095
1102
|
|
1096
1103
|
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent) {
|
1097
|
-
return
|
1104
|
+
return llama_sampler_init(
|
1098
1105
|
/* .iface = */ &llama_sampler_temp_ext_i,
|
1099
1106
|
/* .ctx = */ new llama_sampler_temp_ext {
|
1100
1107
|
/* .temp = */ temp,
|
1101
1108
|
/* .delta = */ delta,
|
1102
1109
|
/* .exponent = */ exponent,
|
1103
|
-
}
|
1104
|
-
|
1110
|
+
}
|
1111
|
+
);
|
1105
1112
|
}
|
1106
1113
|
|
1107
1114
|
// xtc
|
@@ -1186,7 +1193,7 @@ static struct llama_sampler_i llama_sampler_xtc_i = {
|
|
1186
1193
|
|
1187
1194
|
struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep, uint32_t seed) {
|
1188
1195
|
auto seed_cur = get_rng_seed(seed);
|
1189
|
-
return
|
1196
|
+
return llama_sampler_init(
|
1190
1197
|
/* .iface = */ &llama_sampler_xtc_i,
|
1191
1198
|
/* .ctx = */ new llama_sampler_xtc {
|
1192
1199
|
/* .probability = */ p,
|
@@ -1195,8 +1202,8 @@ struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep,
|
|
1195
1202
|
/* .seed = */ seed,
|
1196
1203
|
/* .seed_cur = */ seed_cur,
|
1197
1204
|
/* .rng = */ std::mt19937(seed_cur),
|
1198
|
-
}
|
1199
|
-
|
1205
|
+
}
|
1206
|
+
);
|
1200
1207
|
}
|
1201
1208
|
|
1202
1209
|
// mirostat
|
@@ -1293,7 +1300,7 @@ static struct llama_sampler_i llama_sampler_mirostat_i = {
|
|
1293
1300
|
|
1294
1301
|
struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t seed, float tau, float eta, int32_t m) {
|
1295
1302
|
auto seed_cur = get_rng_seed(seed);
|
1296
|
-
return
|
1303
|
+
return llama_sampler_init(
|
1297
1304
|
/* .iface = */ &llama_sampler_mirostat_i,
|
1298
1305
|
/* .ctx = */ new llama_sampler_mirostat {
|
1299
1306
|
/* .n_vocab = */ n_vocab,
|
@@ -1304,8 +1311,8 @@ struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t see
|
|
1304
1311
|
/* .m = */ m,
|
1305
1312
|
/* .mu = */ 2.0f*tau,
|
1306
1313
|
/* .rng = */ std::mt19937(seed_cur),
|
1307
|
-
}
|
1308
|
-
|
1314
|
+
}
|
1315
|
+
);
|
1309
1316
|
}
|
1310
1317
|
|
1311
1318
|
// mirostat v2
|
@@ -1392,7 +1399,7 @@ static struct llama_sampler_i llama_sampler_mirostat_v2_i = {
|
|
1392
1399
|
|
1393
1400
|
struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau, float eta) {
|
1394
1401
|
auto seed_cur = get_rng_seed(seed);
|
1395
|
-
return
|
1402
|
+
return llama_sampler_init(
|
1396
1403
|
/* .iface = */ &llama_sampler_mirostat_v2_i,
|
1397
1404
|
/* .ctx = */ new llama_sampler_mirostat_v2 {
|
1398
1405
|
/* .seed = */ seed,
|
@@ -1401,8 +1408,8 @@ struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau,
|
|
1401
1408
|
/* .eta = */ eta,
|
1402
1409
|
/* .mu = */ 2.0f*tau,
|
1403
1410
|
/* .rng = */ std::mt19937(seed_cur),
|
1404
|
-
}
|
1405
|
-
|
1411
|
+
}
|
1412
|
+
);
|
1406
1413
|
}
|
1407
1414
|
|
1408
1415
|
// grammar
|
@@ -1434,13 +1441,30 @@ static void llama_sampler_grammar_apply(struct llama_sampler * smpl, llama_token
|
|
1434
1441
|
}
|
1435
1442
|
}
|
1436
1443
|
|
1444
|
+
// Fwd declare to break reset --> init_impl --> llama_sampler_grammar_i --> reset cycle.
|
1445
|
+
static struct llama_sampler * llama_sampler_init_grammar_impl(
|
1446
|
+
const struct llama_vocab * vocab,
|
1447
|
+
const char * grammar_str,
|
1448
|
+
const char * grammar_root,
|
1449
|
+
bool lazy,
|
1450
|
+
const char ** trigger_words,
|
1451
|
+
size_t num_trigger_words,
|
1452
|
+
const llama_token * trigger_tokens,
|
1453
|
+
size_t num_trigger_tokens);
|
1454
|
+
|
1437
1455
|
static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
|
1438
1456
|
auto * ctx = (llama_sampler_grammar *) smpl->ctx;
|
1439
1457
|
if (!ctx->grammar) {
|
1440
1458
|
return;
|
1441
1459
|
}
|
1442
1460
|
|
1443
|
-
|
1461
|
+
std::vector<const char *> trigger_words;
|
1462
|
+
for (auto & word : ctx->grammar->trigger_words) {
|
1463
|
+
trigger_words.push_back(word.c_str());
|
1464
|
+
}
|
1465
|
+
auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str(),
|
1466
|
+
ctx->grammar->lazy, trigger_words.data(), trigger_words.size(),
|
1467
|
+
ctx->grammar->trigger_tokens.data(), ctx->grammar->trigger_tokens.size());
|
1444
1468
|
|
1445
1469
|
llama_grammar_free_impl(ctx->grammar);
|
1446
1470
|
ctx->grammar = grammar_new;
|
@@ -1449,7 +1473,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
|
|
1449
1473
|
static struct llama_sampler * llama_sampler_grammar_clone(const struct llama_sampler * smpl) {
|
1450
1474
|
const auto * ctx = (const llama_sampler_grammar *) smpl->ctx;
|
1451
1475
|
|
1452
|
-
auto * result =
|
1476
|
+
auto * result = llama_sampler_init_grammar_impl(ctx->vocab, nullptr, nullptr, false, nullptr, 0, nullptr, 0);
|
1453
1477
|
|
1454
1478
|
// copy the state
|
1455
1479
|
{
|
@@ -1485,7 +1509,15 @@ static struct llama_sampler_i llama_sampler_grammar_i = {
|
|
1485
1509
|
/* .free = */ llama_sampler_grammar_free,
|
1486
1510
|
};
|
1487
1511
|
|
1488
|
-
struct llama_sampler *
|
1512
|
+
static struct llama_sampler * llama_sampler_init_grammar_impl(
|
1513
|
+
const struct llama_vocab * vocab,
|
1514
|
+
const char * grammar_str,
|
1515
|
+
const char * grammar_root,
|
1516
|
+
bool lazy,
|
1517
|
+
const char ** trigger_words,
|
1518
|
+
size_t num_trigger_words,
|
1519
|
+
const llama_token * trigger_tokens,
|
1520
|
+
size_t num_trigger_tokens) {
|
1489
1521
|
auto * ctx = new llama_sampler_grammar;
|
1490
1522
|
|
1491
1523
|
if (grammar_str != nullptr && grammar_str[0] != '\0') {
|
@@ -1493,7 +1525,7 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc
|
|
1493
1525
|
/* .vocab = */ vocab,
|
1494
1526
|
/* .grammar_str = */ grammar_str,
|
1495
1527
|
/* .grammar_root = */ grammar_root,
|
1496
|
-
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root),
|
1528
|
+
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, lazy, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
|
1497
1529
|
};
|
1498
1530
|
} else {
|
1499
1531
|
*ctx = {
|
@@ -1504,10 +1536,28 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc
|
|
1504
1536
|
};
|
1505
1537
|
}
|
1506
1538
|
|
1507
|
-
return
|
1539
|
+
return llama_sampler_init(
|
1508
1540
|
/* .iface = */ &llama_sampler_grammar_i,
|
1509
|
-
/* .ctx = */ ctx
|
1510
|
-
|
1541
|
+
/* .ctx = */ ctx
|
1542
|
+
);
|
1543
|
+
}
|
1544
|
+
|
1545
|
+
struct llama_sampler * llama_sampler_init_grammar(
|
1546
|
+
const struct llama_vocab * vocab,
|
1547
|
+
const char * grammar_str,
|
1548
|
+
const char * grammar_root) {
|
1549
|
+
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ false, nullptr, 0, nullptr, 0);
|
1550
|
+
}
|
1551
|
+
|
1552
|
+
struct llama_sampler * llama_sampler_init_grammar_lazy(
|
1553
|
+
const struct llama_vocab * vocab,
|
1554
|
+
const char * grammar_str,
|
1555
|
+
const char * grammar_root,
|
1556
|
+
const char ** trigger_words,
|
1557
|
+
size_t num_trigger_words,
|
1558
|
+
const llama_token * trigger_tokens,
|
1559
|
+
size_t num_trigger_tokens) {
|
1560
|
+
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens);
|
1511
1561
|
}
|
1512
1562
|
|
1513
1563
|
// penalties
|
@@ -1636,7 +1686,7 @@ struct llama_sampler * llama_sampler_init_penalties(
|
|
1636
1686
|
float penalty_present) {
|
1637
1687
|
penalty_last_n = std::max(penalty_last_n, 0);
|
1638
1688
|
|
1639
|
-
return
|
1689
|
+
return llama_sampler_init(
|
1640
1690
|
/* .iface = */ &llama_sampler_penalties_i,
|
1641
1691
|
/* .ctx = */ new llama_sampler_penalties {
|
1642
1692
|
/* .penalty_last_n = */ penalty_last_n,
|
@@ -1645,8 +1695,75 @@ struct llama_sampler * llama_sampler_init_penalties(
|
|
1645
1695
|
/* .penalty_present = */ penalty_present,
|
1646
1696
|
/* .prev = */ ring_buffer<llama_token>(penalty_last_n),
|
1647
1697
|
/* .token_count = */ {},
|
1648
|
-
}
|
1649
|
-
|
1698
|
+
}
|
1699
|
+
);
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
// top-n-sigma
|
1703
|
+
|
1704
|
+
struct llama_sampler_top_n_sigma {
|
1705
|
+
const float n;
|
1706
|
+
};
|
1707
|
+
|
1708
|
+
static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler * /*smpl*/) {
|
1709
|
+
return "top-n-sigma";
|
1710
|
+
}
|
1711
|
+
|
1712
|
+
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
1713
|
+
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
|
1714
|
+
|
1715
|
+
// find max logit and calculate mean
|
1716
|
+
float max = cur_p->data[0].logit;
|
1717
|
+
float logits_sum = 0;
|
1718
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
1719
|
+
if (cur_p->data[i].logit > max) {
|
1720
|
+
max = cur_p->data[i].logit;
|
1721
|
+
}
|
1722
|
+
logits_sum += cur_p->data[i].logit;
|
1723
|
+
}
|
1724
|
+
float mean = logits_sum/cur_p->size;
|
1725
|
+
|
1726
|
+
// calculate standard deviation
|
1727
|
+
float acc = 0;
|
1728
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
1729
|
+
acc += pow(cur_p->data[i].logit - mean, 2);
|
1730
|
+
}
|
1731
|
+
float std = sqrt(acc/cur_p->size);
|
1732
|
+
|
1733
|
+
//apply mask
|
1734
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
1735
|
+
if (cur_p->data[i].logit < max - (ctx->n * std)) {
|
1736
|
+
cur_p->data[i].logit = -INFINITY;
|
1737
|
+
}
|
1738
|
+
}
|
1739
|
+
llama_sampler_softmax_impl(cur_p);
|
1740
|
+
}
|
1741
|
+
|
1742
|
+
static struct llama_sampler * llama_sampler_top_n_sigma_clone(const struct llama_sampler * smpl) {
|
1743
|
+
const auto * ctx = (const llama_sampler_top_n_sigma *) smpl->ctx;
|
1744
|
+
return llama_sampler_init_top_n_sigma(ctx->n);
|
1745
|
+
}
|
1746
|
+
|
1747
|
+
static void llama_sampler_top_n_sigma_free(struct llama_sampler * smpl) {
|
1748
|
+
delete (llama_sampler_top_n_sigma *) smpl->ctx;
|
1749
|
+
}
|
1750
|
+
|
1751
|
+
static struct llama_sampler_i llama_sampler_top_n_sigma_i = {
|
1752
|
+
/* .name = */ llama_sampler_top_n_sigma_name,
|
1753
|
+
/* .accept = */ nullptr,
|
1754
|
+
/* .apply = */ llama_sampler_top_n_sigma_apply,
|
1755
|
+
/* .reset = */ nullptr,
|
1756
|
+
/* .clone = */ llama_sampler_top_n_sigma_clone,
|
1757
|
+
/* .free = */ llama_sampler_top_n_sigma_free,
|
1758
|
+
};
|
1759
|
+
|
1760
|
+
struct llama_sampler * llama_sampler_init_top_n_sigma(float n) {
|
1761
|
+
return llama_sampler_init(
|
1762
|
+
/* .iface = */ &llama_sampler_top_n_sigma_i,
|
1763
|
+
/* .ctx = */ new llama_sampler_top_n_sigma {
|
1764
|
+
/* .n = */ n,
|
1765
|
+
}
|
1766
|
+
);
|
1650
1767
|
}
|
1651
1768
|
|
1652
1769
|
// DRY
|
@@ -1999,7 +2116,7 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
|
|
1999
2116
|
}
|
2000
2117
|
}
|
2001
2118
|
|
2002
|
-
return
|
2119
|
+
return llama_sampler_init(
|
2003
2120
|
/* .iface = */ &llama_sampler_dry_i,
|
2004
2121
|
/* .ctx = */ new llama_sampler_dry {
|
2005
2122
|
/* .total_context_size = */ context_size,
|
@@ -2011,8 +2128,8 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
|
|
2011
2128
|
/* .dry_repeat_count = */ dry_enabled ? std::vector<int>(effective_dry_penalty_last_n, 0) : std::vector<int>{},
|
2012
2129
|
/* .dry_max_token_repeat = */ {},
|
2013
2130
|
/* .last_tokens = */ dry_enabled ? ring_buffer<llama_token>(effective_dry_penalty_last_n) : ring_buffer<llama_token>(0),
|
2014
|
-
}
|
2015
|
-
|
2131
|
+
}
|
2132
|
+
);
|
2016
2133
|
}
|
2017
2134
|
|
2018
2135
|
// wrapper for test-sampling.cpp
|
@@ -2113,14 +2230,14 @@ struct llama_sampler * llama_sampler_init_logit_bias(
|
|
2113
2230
|
int32_t n_vocab,
|
2114
2231
|
int32_t n_logit_bias,
|
2115
2232
|
const llama_logit_bias * logit_bias) {
|
2116
|
-
return
|
2233
|
+
return llama_sampler_init(
|
2117
2234
|
/* .iface = */ &llama_sampler_logit_bias_i,
|
2118
2235
|
/* .ctx = */ new llama_sampler_logit_bias {
|
2119
2236
|
/* .n_vocab = */ n_vocab,
|
2120
2237
|
/* .logit_bias = */ std::vector<llama_logit_bias>(logit_bias, logit_bias + n_logit_bias),
|
2121
2238
|
/* .to_search = */ {},
|
2122
|
-
}
|
2123
|
-
|
2239
|
+
}
|
2240
|
+
);
|
2124
2241
|
}
|
2125
2242
|
|
2126
2243
|
// infill
|
@@ -2335,14 +2452,14 @@ static struct llama_sampler_i llama_sampler_infill_i = {
|
|
2335
2452
|
};
|
2336
2453
|
|
2337
2454
|
struct llama_sampler * llama_sampler_init_infill(const struct llama_vocab * vocab) {
|
2338
|
-
return
|
2455
|
+
return llama_sampler_init(
|
2339
2456
|
/* .iface = */ &llama_sampler_infill_i,
|
2340
2457
|
/* .ctx = */ new llama_sampler_infill {
|
2341
2458
|
/* .vocab = */ vocab,
|
2342
2459
|
/* .buf0 = */ std::vector<char>(512),
|
2343
2460
|
/* .buf1 = */ std::vector<char>(512),
|
2344
|
-
}
|
2345
|
-
|
2461
|
+
}
|
2462
|
+
);
|
2346
2463
|
}
|
2347
2464
|
|
2348
2465
|
// utils
|
package/cpp/llama-vocab.cpp
CHANGED
@@ -1245,8 +1245,13 @@ struct llama_vocab::impl {
|
|
1245
1245
|
|
1246
1246
|
std::vector<llama_token> cache_special_tokens;
|
1247
1247
|
std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true);
|
1248
|
-
|
1249
|
-
|
1248
|
+
struct pair_hash {
|
1249
|
+
size_t operator()(const std::pair<std::string, std::string> & p) const {
|
1250
|
+
return std::hash<std::string>{}(p.first) ^ //create some hash for pair
|
1251
|
+
(std::hash<std::string>{}(p.second) << 1);
|
1252
|
+
}
|
1253
|
+
};
|
1254
|
+
std::unordered_map<std::pair<std::string, std::string>, int, pair_hash> bpe_ranks;
|
1250
1255
|
|
1251
1256
|
// set of all tokens that cause "end of generation"
|
1252
1257
|
std::set<llama_token> special_eog_ids;
|
@@ -1687,7 +1692,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1687
1692
|
LM_GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
1688
1693
|
linefeed_id = ids[0];
|
1689
1694
|
} else {
|
1690
|
-
const std::vector<int> ids = tokenize("\
|
1695
|
+
const std::vector<int> ids = tokenize("\n", false);
|
1691
1696
|
|
1692
1697
|
//LM_GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
1693
1698
|
if (ids.empty()) {
|