cui-llama.rn 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -114
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/build-arm64/CMakeCache.txt +429 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +21 -21
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +376 -0
- package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
- package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
- package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
- package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
- package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
- package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
- package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
- package/android/src/main/build-arm64/Makefile +1862 -0
- package/android/src/main/build-arm64/cmake_install.cmake +66 -0
- package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
- package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
- package/android/src/main/jni-utils.h +6 -0
- package/android/src/main/jni.cpp +287 -31
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
- package/cpp/chat-template.hpp +529 -0
- package/cpp/chat.cpp +1085 -0
- package/cpp/chat.hpp +55 -0
- package/cpp/common.cpp +159 -36
- package/cpp/common.h +64 -19
- package/cpp/ggml-alloc.c +1 -13
- package/cpp/ggml-common.h +0 -2
- package/cpp/ggml-cpu-impl.h +6 -12
- package/cpp/ggml-cpu-quants.c +937 -340
- package/cpp/ggml-cpu.c +207 -113
- package/cpp/ggml-cpu.cpp +4 -6
- package/cpp/ggml-cpu.h +1 -1
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +141 -23
- package/cpp/ggml.c +24 -14
- package/cpp/ggml.h +2 -2
- package/cpp/json-schema-to-grammar.cpp +46 -66
- package/cpp/json-schema-to-grammar.h +15 -1
- package/cpp/llama-arch.cpp +7 -2
- package/cpp/llama-arch.h +3 -1
- package/cpp/llama-chat.cpp +10 -1
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-grammar.cpp +86 -6
- package/cpp/llama-grammar.h +22 -1
- package/cpp/llama-impl.h +6 -6
- package/cpp/llama-kv-cache.h +1 -1
- package/cpp/llama-mmap.h +1 -0
- package/cpp/llama-model-loader.cpp +1 -1
- package/cpp/llama-model.cpp +32 -6
- package/cpp/llama-sampling.cpp +178 -61
- package/cpp/llama-vocab.cpp +8 -3
- package/cpp/llama.cpp +188 -128
- package/cpp/llama.h +27 -10
- package/cpp/log.cpp +32 -10
- package/cpp/log.h +12 -1
- package/cpp/minja.hpp +2883 -0
- package/cpp/rn-llama.cpp +82 -5
- package/cpp/rn-llama.h +16 -1
- package/cpp/sampling.cpp +68 -41
- package/cpp/sampling.h +3 -0
- package/cpp/sgemm.cpp +9 -8
- package/cpp/unicode.cpp +9 -2
- package/ios/CMakeLists.txt +6 -0
- package/ios/RNLlama.h +0 -8
- package/ios/RNLlama.mm +27 -3
- package/ios/RNLlamaContext.h +10 -1
- package/ios/RNLlamaContext.mm +269 -57
- package/jest/mock.js +21 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +3 -0
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +87 -13
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +3 -0
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +86 -13
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +107 -2
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +32 -7
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +1 -1
- package/package.json +3 -2
- package/src/NativeRNLlama.ts +115 -3
- package/src/grammar.ts +3 -0
- package/src/index.ts +138 -21
package/android/src/main/jni.cpp
CHANGED
@@ -9,12 +9,12 @@
|
|
9
9
|
#include <string>
|
10
10
|
#include <thread>
|
11
11
|
#include <unordered_map>
|
12
|
+
#include "json-schema-to-grammar.h"
|
12
13
|
#include "llama.h"
|
13
14
|
#include "llama-impl.h"
|
14
15
|
#include "ggml.h"
|
15
16
|
#include "rn-llama.h"
|
16
17
|
#include "jni-utils.h"
|
17
|
-
|
18
18
|
#define UNUSED(x) (void)(x)
|
19
19
|
#define TAG "RNLLAMA_ANDROID_JNI"
|
20
20
|
|
@@ -25,7 +25,7 @@ static inline int min(int a, int b) {
|
|
25
25
|
return (a < b) ? a : b;
|
26
26
|
}
|
27
27
|
|
28
|
-
static void
|
28
|
+
static void rnllama_log_callback_default(lm_ggml_log_level level, const char * fmt, void * data) {
|
29
29
|
if (level == LM_GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
|
30
30
|
else if (level == LM_GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
|
31
31
|
else if (level == LM_GGML_LOG_LEVEL_WARN) __android_log_print(ANDROID_LOG_WARN, TAG, fmt, data);
|
@@ -230,6 +230,8 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
230
230
|
JNIEnv *env,
|
231
231
|
jobject thiz,
|
232
232
|
jstring model_path_str,
|
233
|
+
jstring chat_template,
|
234
|
+
jstring reasoning_format,
|
233
235
|
jboolean embedding,
|
234
236
|
jint embd_normalize,
|
235
237
|
jint n_ctx,
|
@@ -262,7 +264,17 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
262
264
|
|
263
265
|
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
264
266
|
defaultParams.model = model_path_chars;
|
265
|
-
|
267
|
+
|
268
|
+
const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
|
269
|
+
defaultParams.chat_template = chat_template_chars;
|
270
|
+
|
271
|
+
const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
|
272
|
+
if (strcmp(reasoning_format_chars, "deepseek") == 0) {
|
273
|
+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
274
|
+
} else {
|
275
|
+
defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
276
|
+
}
|
277
|
+
|
266
278
|
defaultParams.n_ctx = n_ctx;
|
267
279
|
defaultParams.n_batch = n_batch;
|
268
280
|
defaultParams.n_ubatch = n_ubatch;
|
@@ -329,6 +341,8 @@ Java_com_rnllama_LlamaContext_initContext(
|
|
329
341
|
bool is_model_loaded = llama->loadModel(defaultParams);
|
330
342
|
|
331
343
|
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
344
|
+
env->ReleaseStringUTFChars(chat_template, chat_template_chars);
|
345
|
+
env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
|
332
346
|
env->ReleaseStringUTFChars(cache_type_k, cache_type_k_chars);
|
333
347
|
env->ReleaseStringUTFChars(cache_type_v, cache_type_v_chars);
|
334
348
|
|
@@ -418,52 +432,136 @@ Java_com_rnllama_LlamaContext_loadModelDetails(
|
|
418
432
|
|
419
433
|
char desc[1024];
|
420
434
|
llama_model_desc(llama->model, desc, sizeof(desc));
|
435
|
+
|
421
436
|
putString(env, result, "desc", desc);
|
422
437
|
putDouble(env, result, "size", llama_model_size(llama->model));
|
423
438
|
putDouble(env, result, "nEmbd", llama_model_n_embd(llama->model));
|
424
439
|
putDouble(env, result, "nParams", llama_model_n_params(llama->model));
|
425
|
-
|
440
|
+
auto chat_templates = createWriteableMap(env);
|
441
|
+
putBoolean(env, chat_templates, "llamaChat", llama->validateModelChatTemplate(false, nullptr));
|
442
|
+
|
443
|
+
auto minja = createWriteableMap(env);
|
444
|
+
putBoolean(env, minja, "default", llama->validateModelChatTemplate(true, nullptr));
|
445
|
+
|
446
|
+
auto default_caps = createWriteableMap(env);
|
447
|
+
|
448
|
+
auto default_tmpl = llama->templates.template_default.get();
|
449
|
+
auto default_tmpl_caps = default_tmpl->original_caps();
|
450
|
+
putBoolean(env, default_caps, "tools", default_tmpl_caps.supports_tools);
|
451
|
+
putBoolean(env, default_caps, "toolCalls", default_tmpl_caps.supports_tool_calls);
|
452
|
+
putBoolean(env, default_caps, "parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
|
453
|
+
putBoolean(env, default_caps, "toolResponses", default_tmpl_caps.supports_tool_responses);
|
454
|
+
putBoolean(env, default_caps, "systemRole", default_tmpl_caps.supports_system_role);
|
455
|
+
putBoolean(env, default_caps, "toolCallId", default_tmpl_caps.supports_tool_call_id);
|
456
|
+
putMap(env, minja, "defaultCaps", default_caps);
|
457
|
+
|
458
|
+
putBoolean(env, minja, "toolUse", llama->validateModelChatTemplate(true, "tool_use"));
|
459
|
+
auto tool_use_tmpl = llama->templates.template_tool_use.get();
|
460
|
+
if (tool_use_tmpl != nullptr) {
|
461
|
+
auto tool_use_caps = createWriteableMap(env);
|
462
|
+
auto tool_use_tmpl_caps = tool_use_tmpl->original_caps();
|
463
|
+
putBoolean(env, tool_use_caps, "tools", tool_use_tmpl_caps.supports_tools);
|
464
|
+
putBoolean(env, tool_use_caps, "toolCalls", tool_use_tmpl_caps.supports_tool_calls);
|
465
|
+
putBoolean(env, tool_use_caps, "parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
|
466
|
+
putBoolean(env, tool_use_caps, "systemRole", tool_use_tmpl_caps.supports_system_role);
|
467
|
+
putBoolean(env, tool_use_caps, "toolResponses", tool_use_tmpl_caps.supports_tool_responses);
|
468
|
+
putBoolean(env, tool_use_caps, "toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
|
469
|
+
putMap(env, minja, "toolUseCaps", tool_use_caps);
|
470
|
+
}
|
471
|
+
|
472
|
+
putMap(env, chat_templates, "minja", minja);
|
426
473
|
putMap(env, result, "metadata", meta);
|
474
|
+
putMap(env, result, "chatTemplates", chat_templates);
|
475
|
+
|
476
|
+
// deprecated
|
477
|
+
putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate(false, nullptr));
|
427
478
|
|
428
479
|
return reinterpret_cast<jobject>(result);
|
429
480
|
}
|
430
481
|
|
431
482
|
JNIEXPORT jobject JNICALL
|
432
|
-
|
483
|
+
Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
|
433
484
|
JNIEnv *env,
|
434
485
|
jobject thiz,
|
435
486
|
jlong context_ptr,
|
436
|
-
|
437
|
-
jstring chat_template
|
487
|
+
jstring messages,
|
488
|
+
jstring chat_template,
|
489
|
+
jstring json_schema,
|
490
|
+
jstring tools,
|
491
|
+
jboolean parallel_tool_calls,
|
492
|
+
jstring tool_choice
|
438
493
|
) {
|
439
494
|
UNUSED(thiz);
|
440
495
|
auto llama = context_map[(long) context_ptr];
|
441
496
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
jclass msgClass = env->GetObjectClass(msg);
|
448
|
-
|
449
|
-
jmethodID getRoleMethod = env->GetMethodID(msgClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;");
|
450
|
-
jstring roleKey = env->NewStringUTF("role");
|
451
|
-
jstring contentKey = env->NewStringUTF("content");
|
497
|
+
const char *messages_chars = env->GetStringUTFChars(messages, nullptr);
|
498
|
+
const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
|
499
|
+
const char *json_schema_chars = env->GetStringUTFChars(json_schema, nullptr);
|
500
|
+
const char *tools_chars = env->GetStringUTFChars(tools, nullptr);
|
501
|
+
const char *tool_choice_chars = env->GetStringUTFChars(tool_choice, nullptr);
|
452
502
|
|
453
|
-
|
454
|
-
|
503
|
+
auto result = createWriteableMap(env);
|
504
|
+
try {
|
505
|
+
auto formatted = llama->getFormattedChatWithJinja(
|
506
|
+
messages_chars,
|
507
|
+
tmpl_chars,
|
508
|
+
json_schema_chars,
|
509
|
+
tools_chars,
|
510
|
+
parallel_tool_calls,
|
511
|
+
tool_choice_chars
|
512
|
+
);
|
513
|
+
putString(env, result, "prompt", formatted.prompt.get<std::string>().c_str());
|
514
|
+
putInt(env, result, "chat_format", static_cast<int>(formatted.format));
|
515
|
+
putString(env, result, "grammar", formatted.grammar.c_str());
|
516
|
+
putBoolean(env, result, "grammar_lazy", formatted.grammar_lazy);
|
517
|
+
auto grammar_triggers = createWritableArray(env);
|
518
|
+
for (const auto &trigger : formatted.grammar_triggers) {
|
519
|
+
auto trigger_map = createWriteableMap(env);
|
520
|
+
putString(env, trigger_map, "word", trigger.word.c_str());
|
521
|
+
putBoolean(env, trigger_map, "at_start", trigger.at_start);
|
522
|
+
pushMap(env, grammar_triggers, trigger_map);
|
523
|
+
}
|
524
|
+
putArray(env, result, "grammar_triggers", grammar_triggers);
|
525
|
+
auto preserved_tokens = createWritableArray(env);
|
526
|
+
for (const auto &token : formatted.preserved_tokens) {
|
527
|
+
pushString(env, preserved_tokens, token.c_str());
|
528
|
+
}
|
529
|
+
putArray(env, result, "preserved_tokens", preserved_tokens);
|
530
|
+
auto additional_stops = createWritableArray(env);
|
531
|
+
for (const auto &stop : formatted.additional_stops) {
|
532
|
+
pushString(env, additional_stops, stop.c_str());
|
533
|
+
}
|
534
|
+
putArray(env, result, "additional_stops", additional_stops);
|
535
|
+
} catch (const std::runtime_error &e) {
|
536
|
+
LOGI("[RNLlama] Error: %s", e.what());
|
537
|
+
putString(env, result, "_error", e.what());
|
538
|
+
}
|
539
|
+
env->ReleaseStringUTFChars(tools, tools_chars);
|
540
|
+
env->ReleaseStringUTFChars(messages, messages_chars);
|
541
|
+
env->ReleaseStringUTFChars(chat_template, tmpl_chars);
|
542
|
+
env->ReleaseStringUTFChars(json_schema, json_schema_chars);
|
543
|
+
env->ReleaseStringUTFChars(tool_choice, tool_choice_chars);
|
544
|
+
return reinterpret_cast<jobject>(result);
|
545
|
+
}
|
455
546
|
|
456
|
-
|
457
|
-
|
547
|
+
JNIEXPORT jobject JNICALL
|
548
|
+
Java_com_rnllama_LlamaContext_getFormattedChat(
|
549
|
+
JNIEnv *env,
|
550
|
+
jobject thiz,
|
551
|
+
jlong context_ptr,
|
552
|
+
jstring messages,
|
553
|
+
jstring chat_template
|
554
|
+
) {
|
555
|
+
UNUSED(thiz);
|
556
|
+
auto llama = context_map[(long) context_ptr];
|
458
557
|
|
459
|
-
|
558
|
+
const char *messages_chars = env->GetStringUTFChars(messages, nullptr);
|
559
|
+
const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
|
460
560
|
|
461
|
-
|
462
|
-
env->ReleaseStringUTFChars(content_str, content);
|
463
|
-
}
|
561
|
+
std::string formatted_chat = llama->getFormattedChat(messages_chars, tmpl_chars);
|
464
562
|
|
465
|
-
|
466
|
-
|
563
|
+
env->ReleaseStringUTFChars(messages, messages_chars);
|
564
|
+
env->ReleaseStringUTFChars(chat_template, tmpl_chars);
|
467
565
|
|
468
566
|
return env->NewStringUTF(formatted_chat.c_str());
|
469
567
|
}
|
@@ -552,7 +650,12 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
552
650
|
jobject thiz,
|
553
651
|
jlong context_ptr,
|
554
652
|
jstring prompt,
|
653
|
+
jint chat_format,
|
555
654
|
jstring grammar,
|
655
|
+
jstring json_schema,
|
656
|
+
jboolean grammar_lazy,
|
657
|
+
jobject grammar_triggers,
|
658
|
+
jobject preserved_tokens,
|
556
659
|
jfloat temperature,
|
557
660
|
jint n_threads,
|
558
661
|
jint n_predict,
|
@@ -578,6 +681,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
578
681
|
jfloat dry_base,
|
579
682
|
jint dry_allowed_length,
|
580
683
|
jint dry_penalty_last_n,
|
684
|
+
jfloat top_n_sigma,
|
581
685
|
jobjectArray dry_sequence_breakers,
|
582
686
|
jobject partial_completion_callback
|
583
687
|
) {
|
@@ -588,7 +692,8 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
588
692
|
|
589
693
|
//llama_reset_timings(llama->ctx);
|
590
694
|
|
591
|
-
|
695
|
+
auto prompt_chars = env->GetStringUTFChars(prompt, nullptr);
|
696
|
+
llama->params.prompt = prompt_chars;
|
592
697
|
llama->params.sampling.seed = (seed == -1) ? time(NULL) : seed;
|
593
698
|
|
594
699
|
int max_threads = std::thread::hardware_concurrency();
|
@@ -613,13 +718,59 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
613
718
|
sparams.min_p = min_p;
|
614
719
|
sparams.typ_p = typical_p;
|
615
720
|
sparams.n_probs = n_probs;
|
616
|
-
sparams.grammar = env->GetStringUTFChars(grammar, nullptr);
|
617
721
|
sparams.xtc_threshold = xtc_threshold;
|
618
722
|
sparams.xtc_probability = xtc_probability;
|
619
723
|
sparams.dry_multiplier = dry_multiplier;
|
620
724
|
sparams.dry_base = dry_base;
|
621
725
|
sparams.dry_allowed_length = dry_allowed_length;
|
622
726
|
sparams.dry_penalty_last_n = dry_penalty_last_n;
|
727
|
+
sparams.top_n_sigma = top_n_sigma;
|
728
|
+
|
729
|
+
// grammar
|
730
|
+
auto grammar_chars = env->GetStringUTFChars(grammar, nullptr);
|
731
|
+
if (grammar_chars && grammar_chars[0] != '\0') {
|
732
|
+
sparams.grammar = grammar_chars;
|
733
|
+
}
|
734
|
+
sparams.grammar_lazy = grammar_lazy;
|
735
|
+
if (grammar_triggers != nullptr) {
|
736
|
+
int grammar_triggers_size = readablearray::size(env, grammar_triggers);
|
737
|
+
for (int i = 0; i < grammar_triggers_size; i++) {
|
738
|
+
common_grammar_trigger trigger;
|
739
|
+
auto trigger_map = readablearray::getMap(env, grammar_triggers, i);
|
740
|
+
jstring trigger_word = readablemap::getString(env, trigger_map, "word", nullptr);
|
741
|
+
jboolean trigger_at_start = readablemap::getBool(env, trigger_map, "at_start", false);
|
742
|
+
trigger.word = env->GetStringUTFChars(trigger_word, nullptr);
|
743
|
+
trigger.at_start = trigger_at_start;
|
744
|
+
|
745
|
+
auto ids = common_tokenize(llama->ctx, trigger.word, /* add_special= */ false, /* parse_special= */ true);
|
746
|
+
if (ids.size() == 1) {
|
747
|
+
sparams.grammar_trigger_tokens.push_back(ids[0]);
|
748
|
+
sparams.preserved_tokens.insert(ids[0]);
|
749
|
+
continue;
|
750
|
+
}
|
751
|
+
sparams.grammar_trigger_words.push_back(trigger);
|
752
|
+
}
|
753
|
+
}
|
754
|
+
|
755
|
+
auto json_schema_chars = env->GetStringUTFChars(json_schema, nullptr);
|
756
|
+
if ((!grammar_chars || grammar_chars[0] == '\0') && json_schema_chars && json_schema_chars[0] != '\0') {
|
757
|
+
auto schema = json::parse(json_schema_chars);
|
758
|
+
sparams.grammar = json_schema_to_grammar(schema);
|
759
|
+
}
|
760
|
+
env->ReleaseStringUTFChars(json_schema, json_schema_chars);
|
761
|
+
|
762
|
+
if (preserved_tokens != nullptr) {
|
763
|
+
int preserved_tokens_size = readablearray::size(env, preserved_tokens);
|
764
|
+
for (int i = 0; i < preserved_tokens_size; i++) {
|
765
|
+
jstring preserved_token = readablearray::getString(env, preserved_tokens, i);
|
766
|
+
auto ids = common_tokenize(llama->ctx, env->GetStringUTFChars(preserved_token, nullptr), /* add_special= */ false, /* parse_special= */ true);
|
767
|
+
if (ids.size() == 1) {
|
768
|
+
sparams.preserved_tokens.insert(ids[0]);
|
769
|
+
} else {
|
770
|
+
LOGI("[RNLlama] Not preserved because more than 1 token (wrong chat template override?): %s", env->GetStringUTFChars(preserved_token, nullptr));
|
771
|
+
}
|
772
|
+
}
|
773
|
+
}
|
623
774
|
|
624
775
|
const llama_model * model = llama_get_model(llama->ctx);
|
625
776
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
@@ -744,11 +895,51 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
744
895
|
}
|
745
896
|
}
|
746
897
|
|
898
|
+
env->ReleaseStringUTFChars(grammar, grammar_chars);
|
899
|
+
env->ReleaseStringUTFChars(prompt, prompt_chars);
|
747
900
|
llama_perf_context_print(llama->ctx);
|
748
901
|
llama->is_predicting = false;
|
749
902
|
|
903
|
+
auto toolCalls = createWritableArray(env);
|
904
|
+
std::string reasoningContent = "";
|
905
|
+
std::string *content = nullptr;
|
906
|
+
auto toolCallsSize = 0;
|
907
|
+
if (!llama->is_interrupted) {
|
908
|
+
try {
|
909
|
+
common_chat_msg message = common_chat_parse(llama->generated_text, static_cast<common_chat_format>(chat_format));
|
910
|
+
if (!message.reasoning_content.empty()) {
|
911
|
+
reasoningContent = message.reasoning_content;
|
912
|
+
}
|
913
|
+
content = &message.content;
|
914
|
+
for (const auto &tc : message.tool_calls) {
|
915
|
+
auto toolCall = createWriteableMap(env);
|
916
|
+
putString(env, toolCall, "type", "function");
|
917
|
+
auto functionMap = createWriteableMap(env);
|
918
|
+
putString(env, functionMap, "name", tc.name.c_str());
|
919
|
+
putString(env, functionMap, "arguments", tc.arguments.c_str());
|
920
|
+
putMap(env, toolCall, "function", functionMap);
|
921
|
+
if (!tc.id.empty()) {
|
922
|
+
putString(env, toolCall, "id", tc.id.c_str());
|
923
|
+
}
|
924
|
+
pushMap(env, toolCalls, toolCall);
|
925
|
+
toolCallsSize++;
|
926
|
+
}
|
927
|
+
} catch (const std::exception &e) {
|
928
|
+
// LOGI("Error parsing tool calls: %s", e.what());
|
929
|
+
}
|
930
|
+
}
|
931
|
+
|
750
932
|
auto result = createWriteableMap(env);
|
751
933
|
putString(env, result, "text", llama->generated_text.c_str());
|
934
|
+
if (content) {
|
935
|
+
putString(env, result, "content", content->c_str());
|
936
|
+
}
|
937
|
+
if (!reasoningContent.empty()) {
|
938
|
+
putString(env, result, "reasoning_content", reasoningContent.c_str());
|
939
|
+
}
|
940
|
+
if (toolCallsSize > 0) {
|
941
|
+
putArray(env, result, "tool_calls", toolCalls);
|
942
|
+
}
|
752
943
|
putArray(env, result, "completion_probabilities", tokenProbsToMap(env, llama, llama->generated_token_probs));
|
753
944
|
putInt(env, result, "tokens_predicted", llama->num_tokens_predicted);
|
754
945
|
putInt(env, result, "tokens_evaluated", llama->num_prompt_tokens);
|
@@ -977,11 +1168,76 @@ Java_com_rnllama_LlamaContext_freeContext(
|
|
977
1168
|
delete llama;
|
978
1169
|
}
|
979
1170
|
|
1171
|
+
struct log_callback_context {
|
1172
|
+
JavaVM *jvm;
|
1173
|
+
jobject callback;
|
1174
|
+
};
|
1175
|
+
|
1176
|
+
static void rnllama_log_callback_to_j(lm_ggml_log_level level, const char * text, void * data) {
|
1177
|
+
auto level_c = "";
|
1178
|
+
if (level == LM_GGML_LOG_LEVEL_ERROR) {
|
1179
|
+
__android_log_print(ANDROID_LOG_ERROR, TAG, text, nullptr);
|
1180
|
+
level_c = "error";
|
1181
|
+
} else if (level == LM_GGML_LOG_LEVEL_INFO) {
|
1182
|
+
__android_log_print(ANDROID_LOG_INFO, TAG, text, nullptr);
|
1183
|
+
level_c = "info";
|
1184
|
+
} else if (level == LM_GGML_LOG_LEVEL_WARN) {
|
1185
|
+
__android_log_print(ANDROID_LOG_WARN, TAG, text, nullptr);
|
1186
|
+
level_c = "warn";
|
1187
|
+
} else {
|
1188
|
+
__android_log_print(ANDROID_LOG_DEFAULT, TAG, text, nullptr);
|
1189
|
+
}
|
1190
|
+
|
1191
|
+
log_callback_context *cb_ctx = (log_callback_context *) data;
|
1192
|
+
|
1193
|
+
JNIEnv *env;
|
1194
|
+
bool need_detach = false;
|
1195
|
+
int getEnvResult = cb_ctx->jvm->GetEnv((void**)&env, JNI_VERSION_1_6);
|
1196
|
+
|
1197
|
+
if (getEnvResult == JNI_EDETACHED) {
|
1198
|
+
if (cb_ctx->jvm->AttachCurrentThread(&env, nullptr) == JNI_OK) {
|
1199
|
+
need_detach = true;
|
1200
|
+
} else {
|
1201
|
+
return;
|
1202
|
+
}
|
1203
|
+
} else if (getEnvResult != JNI_OK) {
|
1204
|
+
return;
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
jobject callback = cb_ctx->callback;
|
1208
|
+
jclass cb_class = env->GetObjectClass(callback);
|
1209
|
+
jmethodID emitNativeLog = env->GetMethodID(cb_class, "emitNativeLog", "(Ljava/lang/String;Ljava/lang/String;)V");
|
1210
|
+
|
1211
|
+
jstring level_str = env->NewStringUTF(level_c);
|
1212
|
+
jstring text_str = env->NewStringUTF(text);
|
1213
|
+
env->CallVoidMethod(callback, emitNativeLog, level_str, text_str);
|
1214
|
+
env->DeleteLocalRef(level_str);
|
1215
|
+
env->DeleteLocalRef(text_str);
|
1216
|
+
|
1217
|
+
if (need_detach) {
|
1218
|
+
cb_ctx->jvm->DetachCurrentThread();
|
1219
|
+
}
|
1220
|
+
}
|
1221
|
+
|
1222
|
+
JNIEXPORT void JNICALL
|
1223
|
+
Java_com_rnllama_LlamaContext_setupLog(JNIEnv *env, jobject thiz, jobject logCallback) {
|
1224
|
+
UNUSED(thiz);
|
1225
|
+
|
1226
|
+
log_callback_context *cb_ctx = new log_callback_context;
|
1227
|
+
|
1228
|
+
JavaVM *jvm;
|
1229
|
+
env->GetJavaVM(&jvm);
|
1230
|
+
cb_ctx->jvm = jvm;
|
1231
|
+
cb_ctx->callback = env->NewGlobalRef(logCallback);
|
1232
|
+
|
1233
|
+
llama_log_set(rnllama_log_callback_to_j, cb_ctx);
|
1234
|
+
}
|
1235
|
+
|
980
1236
|
JNIEXPORT void JNICALL
|
981
|
-
|
1237
|
+
Java_com_rnllama_LlamaContext_unsetLog(JNIEnv *env, jobject thiz) {
|
982
1238
|
UNUSED(env);
|
983
1239
|
UNUSED(thiz);
|
984
|
-
llama_log_set(
|
1240
|
+
llama_log_set(rnllama_log_callback_default, NULL);
|
985
1241
|
}
|
986
1242
|
|
987
1243
|
} // extern "C"
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -33,6 +33,11 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
|
|
33
33
|
return NAME;
|
34
34
|
}
|
35
35
|
|
36
|
+
@ReactMethod
|
37
|
+
public void toggleNativeLog(boolean enabled, Promise promise) {
|
38
|
+
rnllama.toggleNativeLog(enabled, promise);
|
39
|
+
}
|
40
|
+
|
36
41
|
@ReactMethod
|
37
42
|
public void setContextLimit(double limit, Promise promise) {
|
38
43
|
rnllama.setContextLimit(limit, promise);
|
@@ -49,8 +54,8 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
|
|
49
54
|
}
|
50
55
|
|
51
56
|
@ReactMethod
|
52
|
-
public void getFormattedChat(double id,
|
53
|
-
rnllama.getFormattedChat(id, messages, chatTemplate, promise);
|
57
|
+
public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
|
58
|
+
rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
|
54
59
|
}
|
55
60
|
|
56
61
|
@ReactMethod
|
@@ -34,6 +34,11 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
|
|
34
34
|
return NAME;
|
35
35
|
}
|
36
36
|
|
37
|
+
@ReactMethod
|
38
|
+
public void toggleNativeLog(boolean enabled, Promise promise) {
|
39
|
+
rnllama.toggleNativeLog(enabled, promise);
|
40
|
+
}
|
41
|
+
|
37
42
|
@ReactMethod
|
38
43
|
public void setContextLimit(double limit, Promise promise) {
|
39
44
|
rnllama.setContextLimit(limit, promise);
|
@@ -50,8 +55,8 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
|
|
50
55
|
}
|
51
56
|
|
52
57
|
@ReactMethod
|
53
|
-
public void getFormattedChat(double id,
|
54
|
-
rnllama.getFormattedChat(id, messages, chatTemplate, promise);
|
58
|
+
public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
|
59
|
+
rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
|
55
60
|
}
|
56
61
|
|
57
62
|
@ReactMethod
|