cui-llama.rn 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/build-arm64/CMakeCache.txt +429 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +81 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +15 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +904 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +919 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +431 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
- package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
- package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
- package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/Makefile +1862 -0
- package/android/src/main/build-arm64/cmake_install.cmake +66 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +92 -18
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +287 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1085 -0
- package/cpp/chat.hpp +55 -0
- package/cpp/common.cpp +159 -36
- package/cpp/common.h +64 -19
- package/cpp/ggml-alloc.c +1 -13
- package/cpp/ggml-common.h +0 -2
- package/cpp/ggml-cpu-impl.h +6 -12
- package/cpp/ggml-cpu-quants.c +937 -340
- package/cpp/ggml-cpu.c +207 -113
- package/cpp/ggml-cpu.cpp +4 -6
- package/cpp/ggml-cpu.h +1 -1
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +141 -23
- package/cpp/ggml.c +24 -14
- package/cpp/ggml.h +2 -2
- package/cpp/json-schema-to-grammar.cpp +46 -66
- package/cpp/json-schema-to-grammar.h +15 -1
- package/cpp/llama-arch.cpp +7 -2
- package/cpp/llama-arch.h +3 -1
- package/cpp/llama-chat.cpp +10 -1
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-grammar.cpp +86 -6
- package/cpp/llama-grammar.h +22 -1
- package/cpp/llama-impl.h +6 -6
- package/cpp/llama-kv-cache.h +1 -1
- package/cpp/llama-mmap.h +1 -0
- package/cpp/llama-model-loader.cpp +1 -1
- package/cpp/llama-model.cpp +32 -6
- package/cpp/llama-sampling.cpp +178 -61
- package/cpp/llama-vocab.cpp +8 -3
- package/cpp/llama.cpp +188 -128
- package/cpp/llama.h +27 -10
- package/cpp/log.cpp +32 -10
- package/cpp/log.h +12 -1
- package/cpp/minja.hpp +2883 -0
- package/cpp/rn-llama.cpp +82 -5
- package/cpp/rn-llama.h +16 -1
- package/cpp/sampling.cpp +68 -41
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +9 -8
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +2 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
package/cpp/rn-llama.cpp
CHANGED
@@ -194,6 +194,7 @@ bool llama_rn_context::loadModel(common_params ¶ms_)
|
|
194
194
|
LOG_ERROR("unable to load model: %s", params_.model.c_str());
|
195
195
|
return false;
|
196
196
|
}
|
197
|
+
templates = common_chat_templates_from_model(model, params.chat_template);
|
197
198
|
n_ctx = llama_n_ctx(ctx);
|
198
199
|
|
199
200
|
// We can uncomment for debugging or after this fix: https://github.com/ggerganov/llama.cpp/pull/11101
|
@@ -202,11 +203,87 @@ bool llama_rn_context::loadModel(common_params ¶ms_)
|
|
202
203
|
return true;
|
203
204
|
}
|
204
205
|
|
205
|
-
bool llama_rn_context::validateModelChatTemplate() const {
|
206
|
-
const char * tmpl = llama_model_chat_template(model);
|
207
|
-
|
208
|
-
|
209
|
-
|
206
|
+
bool llama_rn_context::validateModelChatTemplate(bool use_jinja, const char *name) const {
|
207
|
+
const char * tmpl = llama_model_chat_template(model, name);
|
208
|
+
if (tmpl == nullptr) {
|
209
|
+
return false;
|
210
|
+
}
|
211
|
+
return common_chat_verify_template(tmpl, use_jinja);
|
212
|
+
}
|
213
|
+
|
214
|
+
common_chat_params llama_rn_context::getFormattedChatWithJinja(
|
215
|
+
const std::string &messages,
|
216
|
+
const std::string &chat_template,
|
217
|
+
const std::string &json_schema,
|
218
|
+
const std::string &tools,
|
219
|
+
const bool ¶llel_tool_calls,
|
220
|
+
const std::string &tool_choice
|
221
|
+
) const {
|
222
|
+
common_chat_inputs inputs;
|
223
|
+
inputs.messages = json::parse(messages);
|
224
|
+
auto useTools = !tools.empty();
|
225
|
+
if (useTools) {
|
226
|
+
inputs.tools = json::parse(tools);
|
227
|
+
}
|
228
|
+
inputs.parallel_tool_calls = parallel_tool_calls;
|
229
|
+
if (!tool_choice.empty()) {
|
230
|
+
inputs.tool_choice = tool_choice;
|
231
|
+
}
|
232
|
+
if (!json_schema.empty()) {
|
233
|
+
inputs.json_schema = json::parse(json_schema);
|
234
|
+
}
|
235
|
+
inputs.extract_reasoning = params.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
236
|
+
inputs.stream = true;
|
237
|
+
|
238
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
239
|
+
if (!chat_template.empty()) {
|
240
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
241
|
+
const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
|
242
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
243
|
+
inputs.parallel_tool_calls = false;
|
244
|
+
}
|
245
|
+
return common_chat_params_init(*template_ptr, inputs);
|
246
|
+
} else {
|
247
|
+
const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
|
248
|
+
if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
|
249
|
+
inputs.parallel_tool_calls = false;
|
250
|
+
}
|
251
|
+
return common_chat_params_init(*template_ptr, inputs);
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
std::string llama_rn_context::getFormattedChat(
|
256
|
+
const std::string &messages,
|
257
|
+
const std::string &chat_template
|
258
|
+
) const {
|
259
|
+
auto chat_json = json::parse(messages);
|
260
|
+
|
261
|
+
// Handle regular chat without tools
|
262
|
+
std::vector<common_chat_msg> chat_msgs;
|
263
|
+
for (const auto &msg : chat_json) {
|
264
|
+
chat_msgs.push_back({
|
265
|
+
msg["role"].get<std::string>(),
|
266
|
+
msg["content"].get<std::string>()
|
267
|
+
});
|
268
|
+
}
|
269
|
+
|
270
|
+
// If chat_template is provided, create new one and use it (probably slow)
|
271
|
+
if (!chat_template.empty()) {
|
272
|
+
auto tmp = common_chat_templates_from_model(model, chat_template);
|
273
|
+
return common_chat_apply_template(
|
274
|
+
*tmp.template_default,
|
275
|
+
chat_msgs,
|
276
|
+
true,
|
277
|
+
false
|
278
|
+
);
|
279
|
+
} else {
|
280
|
+
return common_chat_apply_template(
|
281
|
+
*templates.template_default,
|
282
|
+
chat_msgs,
|
283
|
+
true,
|
284
|
+
false
|
285
|
+
);
|
286
|
+
}
|
210
287
|
}
|
211
288
|
|
212
289
|
void llama_rn_context::truncatePrompt(std::vector<llama_token> &prompt_tokens) {
|
package/cpp/rn-llama.h
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <sstream>
|
5
5
|
#include <iostream>
|
6
|
+
#include "chat.hpp"
|
7
|
+
#include "chat-template.hpp"
|
6
8
|
#include "common.h"
|
7
9
|
#include "ggml.h"
|
8
10
|
#include "gguf.h"
|
@@ -63,6 +65,7 @@ struct llama_rn_context {
|
|
63
65
|
|
64
66
|
llama_context *ctx = nullptr;
|
65
67
|
common_sampler *ctx_sampling = nullptr;
|
68
|
+
common_chat_templates templates;
|
66
69
|
|
67
70
|
int n_ctx;
|
68
71
|
|
@@ -80,7 +83,19 @@ struct llama_rn_context {
|
|
80
83
|
void rewind();
|
81
84
|
bool initSampling();
|
82
85
|
bool loadModel(common_params ¶ms_);
|
83
|
-
bool validateModelChatTemplate() const;
|
86
|
+
bool validateModelChatTemplate(bool use_jinja, const char *name) const;
|
87
|
+
common_chat_params getFormattedChatWithJinja(
|
88
|
+
const std::string &messages,
|
89
|
+
const std::string &chat_template,
|
90
|
+
const std::string &json_schema,
|
91
|
+
const std::string &tools,
|
92
|
+
const bool ¶llel_tool_calls,
|
93
|
+
const std::string &tool_choice
|
94
|
+
) const;
|
95
|
+
std::string getFormattedChat(
|
96
|
+
const std::string &messages,
|
97
|
+
const std::string &chat_template
|
98
|
+
) const;
|
84
99
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens);
|
85
100
|
void loadPrompt();
|
86
101
|
void beginCompletion();
|
package/cpp/sampling.cpp
CHANGED
@@ -134,11 +134,11 @@ std::string common_params_sampling::print() const {
|
|
134
134
|
snprintf(result, sizeof(result),
|
135
135
|
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
136
136
|
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
|
137
|
-
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
|
137
|
+
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
|
138
138
|
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
139
139
|
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
|
140
140
|
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
|
141
|
-
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
|
141
|
+
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
|
142
142
|
mirostat, mirostat_eta, mirostat_tau);
|
143
143
|
|
144
144
|
return std::string(result);
|
@@ -151,9 +151,30 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
151
151
|
|
152
152
|
lparams.no_perf = params.no_perf;
|
153
153
|
|
154
|
+
struct llama_sampler * grmr;
|
155
|
+
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
156
|
+
#ifdef LLAMA_USE_LLGUIDANCE
|
157
|
+
grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
|
158
|
+
#else
|
159
|
+
LM_GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
160
|
+
#endif // LLAMA_USE_LLGUIDANCE
|
161
|
+
} else {
|
162
|
+
std::vector<const char *> trigger_words;
|
163
|
+
trigger_words.reserve(params.grammar_trigger_words.size());
|
164
|
+
for (const auto & str : params.grammar_trigger_words) {
|
165
|
+
trigger_words.push_back(str.word.c_str());
|
166
|
+
}
|
167
|
+
|
168
|
+
grmr = params.grammar_lazy
|
169
|
+
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
|
170
|
+
trigger_words.data(), trigger_words.size(),
|
171
|
+
params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size())
|
172
|
+
: llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
|
173
|
+
}
|
174
|
+
|
154
175
|
auto * result = new common_sampler {
|
155
176
|
/* .params = */ params,
|
156
|
-
/* .grmr = */
|
177
|
+
/* .grmr = */ grmr,
|
157
178
|
/* .chain = */ llama_sampler_chain_init(lparams),
|
158
179
|
/* .prev = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
|
159
180
|
/* .cur = */ {},
|
@@ -167,45 +188,51 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
167
188
|
params.logit_bias.data()));
|
168
189
|
|
169
190
|
if (params.mirostat == 0) {
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
191
|
+
if (params.top_n_sigma >= 0) {
|
192
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
193
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp));
|
194
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma));
|
195
|
+
} else {
|
196
|
+
for (const auto & cnstr : params.samplers) {
|
197
|
+
switch (cnstr) {
|
198
|
+
case COMMON_SAMPLER_TYPE_DRY:
|
199
|
+
{
|
200
|
+
std::vector<const char *> c_breakers;
|
201
|
+
c_breakers.reserve(params.dry_sequence_breakers.size());
|
202
|
+
for (const auto & str : params.dry_sequence_breakers) {
|
203
|
+
c_breakers.push_back(str.c_str());
|
204
|
+
}
|
205
|
+
|
206
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
178
207
|
}
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
default:
|
208
|
-
LM_GGML_ASSERT(false && "unknown sampler type");
|
208
|
+
break;
|
209
|
+
case COMMON_SAMPLER_TYPE_TOP_K:
|
210
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
211
|
+
break;
|
212
|
+
case COMMON_SAMPLER_TYPE_TOP_P:
|
213
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
|
214
|
+
break;
|
215
|
+
case COMMON_SAMPLER_TYPE_MIN_P:
|
216
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
|
217
|
+
break;
|
218
|
+
case COMMON_SAMPLER_TYPE_XTC:
|
219
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
220
|
+
break;
|
221
|
+
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
222
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
|
223
|
+
break;
|
224
|
+
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
225
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
226
|
+
break;
|
227
|
+
case COMMON_SAMPLER_TYPE_INFILL:
|
228
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));
|
229
|
+
break;
|
230
|
+
case COMMON_SAMPLER_TYPE_PENALTIES:
|
231
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
232
|
+
break;
|
233
|
+
default:
|
234
|
+
LM_GGML_ASSERT(false && "unknown sampler type");
|
235
|
+
}
|
209
236
|
}
|
210
237
|
}
|
211
238
|
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
package/cpp/sampling.h
CHANGED
@@ -102,3 +102,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
|
|
102
102
|
|
103
103
|
std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
104
104
|
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
|
105
|
+
|
106
|
+
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab,
|
107
|
+
const char * grammar_kind, const char * grammar_data);
|
package/cpp/sgemm.cpp
CHANGED
@@ -280,14 +280,6 @@ template <> inline __m256bh load(const float *p) {
|
|
280
280
|
}
|
281
281
|
#endif
|
282
282
|
|
283
|
-
////////////////////////////////////////////////////////////////////////////////////////////////////
|
284
|
-
// CONSTANTS
|
285
|
-
|
286
|
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
|
287
|
-
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
288
|
-
static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
|
289
|
-
#endif
|
290
|
-
|
291
283
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
292
284
|
// FLOATING POINT MATRIX MULTIPLICATION
|
293
285
|
|
@@ -614,6 +606,14 @@ class tinyBLAS_Q0_AVX {
|
|
614
606
|
TC *C, int64_t ldc,
|
615
607
|
int ith, int nth)
|
616
608
|
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
609
|
+
const int8_t kvalues_iq4nl[16] = {
|
610
|
+
-127, -104, -83, -65,
|
611
|
+
-49, -35, -22, -10,
|
612
|
+
1, 13, 25, 38,
|
613
|
+
53, 69, 89, 113
|
614
|
+
};
|
615
|
+
|
616
|
+
iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
|
617
617
|
}
|
618
618
|
|
619
619
|
void matmul(int64_t m, int64_t n) {
|
@@ -1038,6 +1038,7 @@ class tinyBLAS_Q0_AVX {
|
|
1038
1038
|
const int64_t ldc;
|
1039
1039
|
const int ith;
|
1040
1040
|
const int nth;
|
1041
|
+
__m128i iq4nlt;
|
1041
1042
|
};
|
1042
1043
|
#endif // __AVX__
|
1043
1044
|
|
package/cpp/unicode.cpp
CHANGED
@@ -618,7 +618,14 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
|
618
618
|
result.reserve(utf8.size());
|
619
619
|
size_t offset = 0;
|
620
620
|
while (offset < utf8.size()) {
|
621
|
-
|
621
|
+
try {
|
622
|
+
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
623
|
+
}
|
624
|
+
catch (const std::invalid_argument & /*ex*/) {
|
625
|
+
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
|
626
|
+
++offset;
|
627
|
+
result.emplace_back(0xFFFD); // replacement character
|
628
|
+
}
|
622
629
|
}
|
623
630
|
return result;
|
624
631
|
}
|
@@ -701,7 +708,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
701
708
|
const auto cpts = unicode_cpts_from_utf8(text);
|
702
709
|
|
703
710
|
// generate a "collapsed" representation of the text, where all codepoints are replaced by a single byte
|
704
|
-
// ref: https://github.com/
|
711
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/6920#issuecomment-2081479935
|
705
712
|
std::string text_collapsed;
|
706
713
|
if (need_collapse) {
|
707
714
|
// collapse all unicode categories
|
package/ios/CMakeLists.txt
CHANGED
@@ -15,6 +15,7 @@ add_definitions(
|
|
15
15
|
-DLM_GGML_USE_CPU
|
16
16
|
-DLM_GGML_USE_ACCELERATE
|
17
17
|
-DLM_GGML_USE_METAL
|
18
|
+
-DLM_GGML_METAL_USE_BF16
|
18
19
|
)
|
19
20
|
|
20
21
|
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
|
@@ -66,6 +67,11 @@ add_library(rnllama SHARED
|
|
66
67
|
${SOURCE_DIR}/unicode.cpp
|
67
68
|
${SOURCE_DIR}/sgemm.cpp
|
68
69
|
${SOURCE_DIR}/common.cpp
|
70
|
+
${SOURCE_DIR}/chat.cpp
|
71
|
+
${SOURCE_DIR}/chat-template.hpp
|
72
|
+
${SOURCE_DIR}/json-schema-to-grammar.cpp
|
73
|
+
${SOURCE_DIR}/minja.hpp
|
74
|
+
${SOURCE_DIR}/json.hpp
|
69
75
|
${SOURCE_DIR}/amx/amx.cpp
|
70
76
|
${SOURCE_DIR}/amx/mmq.cpp
|
71
77
|
${SOURCE_DIR}/rn-llama.cpp
|
package/ios/RNLlama.h
CHANGED
package/ios/RNLlama.mm
CHANGED
@@ -13,6 +13,16 @@ dispatch_queue_t llamaDQueue;
|
|
13
13
|
|
14
14
|
RCT_EXPORT_MODULE()
|
15
15
|
|
16
|
+
RCT_EXPORT_METHOD(toggleNativeLog:(BOOL)enabled) {
|
17
|
+
void (^onEmitLog)(NSString *level, NSString *text) = nil;
|
18
|
+
if (enabled) {
|
19
|
+
onEmitLog = ^(NSString *level, NSString *text) {
|
20
|
+
[self sendEventWithName:@"@RNLlama_onNativeLog" body:@{ @"level": level, @"text": text }];
|
21
|
+
};
|
22
|
+
}
|
23
|
+
[RNLlamaContext toggleNativeLog:enabled onEmitLog:onEmitLog];
|
24
|
+
}
|
25
|
+
|
16
26
|
RCT_EXPORT_METHOD(setContextLimit:(double)limit
|
17
27
|
withResolver:(RCTPromiseResolveBlock)resolve
|
18
28
|
withRejecter:(RCTPromiseRejectBlock)reject)
|
@@ -41,7 +51,7 @@ RCT_EXPORT_METHOD(initContext:(double)contextId
|
|
41
51
|
}
|
42
52
|
|
43
53
|
if (llamaDQueue == nil) {
|
44
|
-
|
54
|
+
llamaDQueue = dispatch_queue_create("com.rnllama", DISPATCH_QUEUE_SERIAL);
|
45
55
|
}
|
46
56
|
|
47
57
|
if (llamaContexts == nil) {
|
@@ -77,8 +87,9 @@ RCT_EXPORT_METHOD(initContext:(double)contextId
|
|
77
87
|
}
|
78
88
|
|
79
89
|
RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
|
80
|
-
withMessages:(
|
90
|
+
withMessages:(NSString *)messages
|
81
91
|
withTemplate:(NSString *)chatTemplate
|
92
|
+
withParams:(NSDictionary *)params
|
82
93
|
withResolver:(RCTPromiseResolveBlock)resolve
|
83
94
|
withRejecter:(RCTPromiseRejectBlock)reject)
|
84
95
|
{
|
@@ -87,7 +98,19 @@ RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
|
|
87
98
|
reject(@"llama_error", @"Context not found", nil);
|
88
99
|
return;
|
89
100
|
}
|
90
|
-
|
101
|
+
try {
|
102
|
+
if ([params[@"jinja"] boolValue]) {
|
103
|
+
NSString *jsonSchema = params[@"json_schema"];
|
104
|
+
NSString *tools = params[@"tools"];
|
105
|
+
NSString *parallelToolCalls = params[@"parallel_tool_calls"];
|
106
|
+
NSString *toolChoice = params[@"tool_choice"];\
|
107
|
+
resolve([context getFormattedChatWithJinja:messages withChatTemplate:chatTemplate withJsonSchema:jsonSchema withTools:tools withParallelToolCalls:parallelToolCalls withToolChoice:toolChoice]);
|
108
|
+
} else {
|
109
|
+
resolve([context getFormattedChat:messages withChatTemplate:chatTemplate]);
|
110
|
+
}
|
111
|
+
} catch (const std::exception& e) { // catch cpp exceptions
|
112
|
+
reject(@"llama_error", [NSString stringWithUTF8String:e.what()], nil);
|
113
|
+
}
|
91
114
|
}
|
92
115
|
|
93
116
|
RCT_EXPORT_METHOD(loadSession:(double)contextId
|
@@ -146,6 +169,7 @@ RCT_EXPORT_METHOD(saveSession:(double)contextId
|
|
146
169
|
return@[
|
147
170
|
@"@RNLlama_onInitContextProgress",
|
148
171
|
@"@RNLlama_onToken",
|
172
|
+
@"@RNLlama_onNativeLog",
|
149
173
|
];
|
150
174
|
}
|
151
175
|
|
package/ios/RNLlamaContext.h
CHANGED
@@ -4,11 +4,13 @@
|
|
4
4
|
#import "llama-impl.h"
|
5
5
|
#import "ggml.h"
|
6
6
|
#import "rn-llama.h"
|
7
|
+
#import "json-schema-to-grammar.h"
|
7
8
|
#else
|
8
9
|
#import <rnllama/llama.h>
|
9
10
|
#import <rnllama/llama-impl.h>
|
10
11
|
#import <rnllama/ggml.h>
|
11
12
|
#import <rnllama/rn-llama.h>
|
13
|
+
#import <rnllama/json-schema-to-grammar.h>
|
12
14
|
#endif
|
13
15
|
#endif
|
14
16
|
|
@@ -23,6 +25,7 @@
|
|
23
25
|
rnllama::llama_rn_context * llama;
|
24
26
|
}
|
25
27
|
|
28
|
+
+ (void)toggleNativeLog:(BOOL)enabled onEmitLog:(void (^)(NSString *level, NSString *text))onEmitLog;
|
26
29
|
+ (NSDictionary *)modelInfo:(NSString *)path skip:(NSArray *)skip;
|
27
30
|
+ (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsigned int progress))onProgress;
|
28
31
|
- (void)interruptLoad;
|
@@ -36,7 +39,13 @@
|
|
36
39
|
- (NSArray *)tokenize:(NSString *)text;
|
37
40
|
- (NSString *)detokenize:(NSArray *)tokens;
|
38
41
|
- (NSDictionary *)embedding:(NSString *)text params:(NSDictionary *)params;
|
39
|
-
- (
|
42
|
+
- (NSDictionary *)getFormattedChatWithJinja:(NSString *)messages
|
43
|
+
withChatTemplate:(NSString *)chatTemplate
|
44
|
+
withJsonSchema:(NSString *)jsonSchema
|
45
|
+
withTools:(NSString *)tools
|
46
|
+
withParallelToolCalls:(BOOL)parallelToolCalls
|
47
|
+
withToolChoice:(NSString *)toolChoice;
|
48
|
+
- (NSString *)getFormattedChat:(NSString *)messages withChatTemplate:(NSString *)chatTemplate;
|
40
49
|
- (NSDictionary *)loadSession:(NSString *)path;
|
41
50
|
- (int)saveSession:(NSString *)path size:(int)size;
|
42
51
|
- (NSString *)bench:(int)pp tg:(int)tg pl:(int)pl nr:(int)nr;
|