cui-llama.rn 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +6 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +38 -5
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +153 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/chat.cpp +128 -106
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +41 -76
- package/cpp/common.h +23 -19
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/cpp/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/cpp/ggml-cpu/ggml-cpu.c +5 -13
- package/cpp/ggml-cpu/ggml-cpu.cpp +29 -16
- package/cpp/ggml-cpu/ops.cpp +107 -13
- package/cpp/ggml-cpu/vec.cpp +0 -6
- package/cpp/ggml-cpu/vec.h +16 -0
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +321 -132
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +93 -38
- package/cpp/ggml.h +21 -7
- package/cpp/gguf.cpp +33 -33
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +3 -0
- package/cpp/llama-batch.cpp +3 -1
- package/cpp/llama-chat.cpp +8 -6
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +349 -135
- package/cpp/llama-context.h +30 -3
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +150 -234
- package/cpp/llama-graph.h +52 -7
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +34 -5
- package/cpp/llama-kv-cache.cpp +662 -321
- package/cpp/llama-kv-cache.h +203 -93
- package/cpp/llama-memory.h +3 -2
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +536 -132
- package/cpp/llama-model.h +7 -1
- package/cpp/llama-sampling.cpp +18 -6
- package/cpp/llama-vocab.cpp +46 -8
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +72 -131
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +611 -47
- package/cpp/rn-llama.h +33 -3
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +7 -0
- package/ios/RNLlama.mm +77 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +105 -10
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +23 -19
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +21 -7
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +30 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +7 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +72 -131
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +23 -19
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +21 -7
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +30 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +7 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +72 -131
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +50 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +67 -4
- package/src/index.ts +212 -38
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
package/cpp/rn-llama.h
CHANGED
@@ -41,6 +41,16 @@ struct completion_token_output
|
|
41
41
|
llama_token tok;
|
42
42
|
};
|
43
43
|
|
44
|
+
struct llama_rn_context_mtmd;
|
45
|
+
|
46
|
+
struct llama_rn_tokenize_result {
|
47
|
+
std::vector<llama_token> tokens;
|
48
|
+
bool has_media = false;
|
49
|
+
std::vector<std::string> bitmap_hashes;
|
50
|
+
std::vector<size_t> chunk_pos; // both text and media
|
51
|
+
std::vector<size_t> chunk_pos_media; // media only
|
52
|
+
};
|
53
|
+
|
44
54
|
// Main context class
|
45
55
|
struct llama_rn_context {
|
46
56
|
bool is_predicting = false;
|
@@ -51,8 +61,9 @@ struct llama_rn_context {
|
|
51
61
|
|
52
62
|
size_t num_prompt_tokens = 0;
|
53
63
|
size_t num_tokens_predicted = 0;
|
54
|
-
|
64
|
+
llama_pos n_past = 0;
|
55
65
|
size_t n_remain = 0;
|
66
|
+
std::vector<std::string> mtmd_bitmap_past_hashes;
|
56
67
|
|
57
68
|
std::vector<llama_token> embd;
|
58
69
|
common_params params;
|
@@ -78,6 +89,9 @@ struct llama_rn_context {
|
|
78
89
|
|
79
90
|
std::vector<common_adapter_lora_info> lora;
|
80
91
|
|
92
|
+
llama_rn_context_mtmd *mtmd_wrapper = nullptr;
|
93
|
+
bool has_multimodal = false;
|
94
|
+
|
81
95
|
~llama_rn_context();
|
82
96
|
|
83
97
|
void rewind();
|
@@ -97,8 +111,9 @@ struct llama_rn_context {
|
|
97
111
|
const std::string &chat_template
|
98
112
|
) const;
|
99
113
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens);
|
100
|
-
void loadPrompt();
|
114
|
+
void loadPrompt(const std::vector<std::string> &media_paths);
|
101
115
|
void beginCompletion();
|
116
|
+
void endCompletion();
|
102
117
|
completion_token_output nextToken();
|
103
118
|
size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
|
104
119
|
completion_token_output doCompletion();
|
@@ -107,7 +122,22 @@ struct llama_rn_context {
|
|
107
122
|
int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
|
108
123
|
void removeLoraAdapters();
|
109
124
|
std::vector<common_adapter_lora_info> getLoadedLoraAdapters();
|
110
|
-
|
125
|
+
|
126
|
+
// Multimodal methods
|
127
|
+
bool initMultimodal(const std::string &mmproj_path, bool use_gpu);
|
128
|
+
bool isMultimodalEnabled() const;
|
129
|
+
bool isMultimodalSupportVision() const;
|
130
|
+
bool isMultimodalSupportAudio() const;
|
131
|
+
void releaseMultimodal();
|
132
|
+
|
133
|
+
// Process multiple media and add them to the context
|
134
|
+
void processMedia(
|
135
|
+
const std::string &prompt,
|
136
|
+
const std::vector<std::string> &media_paths
|
137
|
+
);
|
138
|
+
|
139
|
+
llama_rn_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
|
140
|
+
};
|
111
141
|
|
112
142
|
// Logging macros
|
113
143
|
extern bool rnllama_verbose;
|
package/cpp/sampling.cpp
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "sampling.h"
|
2
2
|
|
3
3
|
#include "common.h"
|
4
|
+
#include "log.h"
|
4
5
|
|
5
6
|
#include <cmath>
|
6
7
|
#include <unordered_map>
|
@@ -229,51 +230,48 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
229
230
|
params.logit_bias.data()));
|
230
231
|
|
231
232
|
if (params.mirostat == 0) {
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
{
|
241
|
-
std::vector<const char *> c_breakers;
|
242
|
-
c_breakers.reserve(params.dry_sequence_breakers.size());
|
243
|
-
for (const auto & str : params.dry_sequence_breakers) {
|
244
|
-
c_breakers.push_back(str.c_str());
|
245
|
-
}
|
246
|
-
|
247
|
-
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
233
|
+
for (const auto & cnstr : params.samplers) {
|
234
|
+
switch (cnstr) {
|
235
|
+
case COMMON_SAMPLER_TYPE_DRY:
|
236
|
+
{
|
237
|
+
std::vector<const char *> c_breakers;
|
238
|
+
c_breakers.reserve(params.dry_sequence_breakers.size());
|
239
|
+
for (const auto & str : params.dry_sequence_breakers) {
|
240
|
+
c_breakers.push_back(str.c_str());
|
248
241
|
}
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
242
|
+
|
243
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
244
|
+
}
|
245
|
+
break;
|
246
|
+
case COMMON_SAMPLER_TYPE_TOP_K:
|
247
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
248
|
+
break;
|
249
|
+
case COMMON_SAMPLER_TYPE_TOP_P:
|
250
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
|
251
|
+
break;
|
252
|
+
case COMMON_SAMPLER_TYPE_TOP_N_SIGMA:
|
253
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma));
|
254
|
+
break;
|
255
|
+
case COMMON_SAMPLER_TYPE_MIN_P:
|
256
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
|
257
|
+
break;
|
258
|
+
case COMMON_SAMPLER_TYPE_XTC:
|
259
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
260
|
+
break;
|
261
|
+
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
262
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
|
263
|
+
break;
|
264
|
+
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
265
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
266
|
+
break;
|
267
|
+
case COMMON_SAMPLER_TYPE_INFILL:
|
268
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));
|
269
|
+
break;
|
270
|
+
case COMMON_SAMPLER_TYPE_PENALTIES:
|
271
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_penalties (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
272
|
+
break;
|
273
|
+
default:
|
274
|
+
LM_GGML_ASSERT(false && "unknown sampler type");
|
277
275
|
}
|
278
276
|
}
|
279
277
|
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
@@ -475,6 +473,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
|
475
473
|
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
|
476
474
|
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
|
477
475
|
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
|
476
|
+
case COMMON_SAMPLER_TYPE_TOP_N_SIGMA: return 's';
|
478
477
|
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
|
479
478
|
case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
|
480
479
|
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
@@ -490,6 +489,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
|
490
489
|
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
|
491
490
|
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
|
492
491
|
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
|
492
|
+
case COMMON_SAMPLER_TYPE_TOP_N_SIGMA: return "top_n_sigma";
|
493
493
|
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
|
494
494
|
case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
|
495
495
|
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
@@ -504,6 +504,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
504
504
|
{ "dry", COMMON_SAMPLER_TYPE_DRY },
|
505
505
|
{ "top_k", COMMON_SAMPLER_TYPE_TOP_K },
|
506
506
|
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
|
507
|
+
{ "top_n_sigma", COMMON_SAMPLER_TYPE_TOP_N_SIGMA },
|
507
508
|
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
508
509
|
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
|
509
510
|
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
@@ -517,6 +518,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
517
518
|
std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
|
518
519
|
{ "top-k", COMMON_SAMPLER_TYPE_TOP_K },
|
519
520
|
{ "top-p", COMMON_SAMPLER_TYPE_TOP_P },
|
521
|
+
{ "top-n-sigma", COMMON_SAMPLER_TYPE_TOP_N_SIGMA },
|
520
522
|
{ "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
|
521
523
|
{ "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
522
524
|
{ "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
@@ -533,14 +535,16 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
533
535
|
auto sampler = sampler_canonical_name_map.find(name);
|
534
536
|
if (sampler != sampler_canonical_name_map.end()) {
|
535
537
|
samplers.push_back(sampler->second);
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
538
|
+
continue;
|
539
|
+
}
|
540
|
+
if (allow_alt_names) {
|
541
|
+
sampler = sampler_alt_name_map.find(name);
|
542
|
+
if (sampler != sampler_alt_name_map.end()) {
|
543
|
+
samplers.push_back(sampler->second);
|
544
|
+
continue;
|
542
545
|
}
|
543
546
|
}
|
547
|
+
LOG_WRN("%s: unable to match sampler by name '%s'\n", __func__, name.c_str());
|
544
548
|
}
|
545
549
|
|
546
550
|
return samplers;
|
@@ -552,6 +556,7 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
|
|
552
556
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
|
553
557
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
|
554
558
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
|
559
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_N_SIGMA), COMMON_SAMPLER_TYPE_TOP_N_SIGMA },
|
555
560
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
|
556
561
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
|
557
562
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
@@ -566,6 +571,8 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
|
|
566
571
|
const auto sampler = sampler_name_map.find(c);
|
567
572
|
if (sampler != sampler_name_map.end()) {
|
568
573
|
samplers.push_back(sampler->second);
|
574
|
+
} else {
|
575
|
+
LOG_WRN("%s: unable to match sampler by char '%c'\n", __func__, c);
|
569
576
|
}
|
570
577
|
}
|
571
578
|
|