@novastera-oss/llamarn 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +3 -2
- package/android/CMakeLists.txt +6 -3
- package/android/src/main/cpp/include/llama.h +12 -8
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +46 -65
- package/cpp/LlamaCppModel.h +5 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +5 -8
- package/cpp/llama.cpp/common/arg.cpp +8 -6
- package/cpp/llama.cpp/common/chat-parser.cpp +4 -3
- package/cpp/llama.cpp/common/chat-parser.h +2 -1
- package/cpp/llama.cpp/common/chat.cpp +4 -4
- package/cpp/llama.cpp/common/common.cpp +2 -0
- package/cpp/llama.cpp/common/json-partial.cpp +5 -4
- package/cpp/llama.cpp/common/json-partial.h +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +31 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +23 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +19 -8
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +9 -2
- package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
- package/cpp/llama.cpp/include/llama.h +12 -8
- package/cpp/llama.cpp/src/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +19 -12
- package/cpp/llama.cpp/src/llama-batch.h +15 -10
- package/cpp/llama.cpp/src/llama-context.cpp +226 -151
- package/cpp/llama.cpp/src/llama-context.h +25 -8
- package/cpp/llama.cpp/src/llama-graph.cpp +50 -47
- package/cpp/llama.cpp/src/llama-graph.h +25 -24
- package/cpp/llama.cpp/src/llama-kv-cache-recurrent.cpp +1132 -0
- package/cpp/llama.cpp/src/llama-kv-cache-recurrent.h +191 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +249 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +136 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1717 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +278 -0
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2746
- package/cpp/llama.cpp/src/llama-kv-cache.h +14 -472
- package/cpp/llama.cpp/src/llama-kv-cells.h +37 -6
- package/cpp/llama.cpp/src/llama-memory.h +44 -0
- package/cpp/llama.cpp/src/llama-model.cpp +23 -16
- package/cpp/llama.cpp/src/llama-vocab.cpp +7 -2
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
- package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
- package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
- package/cpp/rn-completion.cpp +101 -52
- package/cpp/rn-utils.hpp +8 -1
- package/ios/include/common/minja/chat-template.hpp +1 -1
- package/ios/include/common/minja/minja.hpp +1 -1
- package/ios/include/json-schema-to-grammar.h +4 -4
- package/ios/include/llama.h +12 -8
- package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
- package/ios/libs/llama.xcframework/Info.plist +22 -22
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4689 -4617
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4710 -4638
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3622 -3557
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4710 -4638
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3624 -3559
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4689 -4616
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4710 -4637
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3622 -3556
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4725 -4653
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4746 -4674
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3652 -3587
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +12 -8
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
package/RNLlamaCpp.podspec
CHANGED
|
@@ -39,7 +39,8 @@ Pod::Spec.new do |s|
|
|
|
39
39
|
"cpp/llama.cpp/common/speculative.{h,cpp}",
|
|
40
40
|
"cpp/llama.cpp/common/llguidance.{h,cpp}",
|
|
41
41
|
"cpp/llama.cpp/common/*.hpp",
|
|
42
|
-
"cpp/llama.cpp/
|
|
42
|
+
"cpp/llama.cpp/vendor/minja/*.hpp"
|
|
43
|
+
"cpp/llama.cpp/vendor/nlohmann/*.hpp"
|
|
43
44
|
|
|
44
45
|
# Include all necessary headers for compilation
|
|
45
46
|
s.preserve_paths = "ios/include/**/*.h",
|
|
@@ -51,7 +52,7 @@ Pod::Spec.new do |s|
|
|
|
51
52
|
|
|
52
53
|
# Compiler settings
|
|
53
54
|
s.pod_target_xcconfig = {
|
|
54
|
-
"HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
|
|
55
|
+
"HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
|
|
55
56
|
"OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
|
|
56
57
|
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
57
58
|
"GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
|
package/android/CMakeLists.txt
CHANGED
|
@@ -141,7 +141,8 @@ target_include_directories(common PRIVATE
|
|
|
141
141
|
${LLAMA_CPP_DIR}/ggml/include
|
|
142
142
|
${LLAMA_CPP_DIR}/include
|
|
143
143
|
${LLAMA_CPP_DIR}/common
|
|
144
|
-
${LLAMA_CPP_DIR}/
|
|
144
|
+
${LLAMA_CPP_DIR}/vendor/minja
|
|
145
|
+
${LLAMA_CPP_DIR}/vendor
|
|
145
146
|
${LLAMA_CPP_DIR}/src
|
|
146
147
|
)
|
|
147
148
|
|
|
@@ -150,7 +151,8 @@ target_include_directories(RNLlamaCpp PRIVATE
|
|
|
150
151
|
${LLAMA_CPP_DIR}/ggml/include
|
|
151
152
|
${LLAMA_CPP_DIR}/include
|
|
152
153
|
${LLAMA_CPP_DIR}/common
|
|
153
|
-
${LLAMA_CPP_DIR}/
|
|
154
|
+
${LLAMA_CPP_DIR}/vendor/minja # Add this for chat-template.hpp
|
|
155
|
+
${LLAMA_CPP_DIR}/vendor
|
|
154
156
|
${LLAMA_CPP_DIR}/src
|
|
155
157
|
# Add the generated headers path
|
|
156
158
|
${MODULE_ROOT}/android/generated/jni
|
|
@@ -244,6 +246,7 @@ target_include_directories(RNLlamaCpp INTERFACE
|
|
|
244
246
|
${LLAMA_CPP_DIR}/ggml/include
|
|
245
247
|
${LLAMA_CPP_DIR}/include
|
|
246
248
|
${LLAMA_CPP_DIR}/common
|
|
247
|
-
${LLAMA_CPP_DIR}/
|
|
249
|
+
${LLAMA_CPP_DIR}/vendor/minja
|
|
250
|
+
${LLAMA_CPP_DIR}/vendor
|
|
248
251
|
${LLAMA_CPP_DIR}/src
|
|
249
252
|
)
|
|
@@ -259,9 +259,9 @@ extern "C" {
|
|
|
259
259
|
llama_token * token;
|
|
260
260
|
float * embd;
|
|
261
261
|
llama_pos * pos;
|
|
262
|
-
int32_t * n_seq_id;
|
|
263
|
-
llama_seq_id ** seq_id;
|
|
264
|
-
int8_t * logits;
|
|
262
|
+
int32_t * n_seq_id; // TODO: remove, should belong to only 1 sequence
|
|
263
|
+
llama_seq_id ** seq_id; // TODO: become llama_seq_id * seq_id;
|
|
264
|
+
int8_t * logits; // TODO: rename this to "output"
|
|
265
265
|
} llama_batch;
|
|
266
266
|
|
|
267
267
|
enum llama_model_kv_override_type {
|
|
@@ -366,6 +366,8 @@ extern "C" {
|
|
|
366
366
|
bool no_perf; // measure performance timings
|
|
367
367
|
bool op_offload; // offload host tensor operations to device
|
|
368
368
|
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
369
|
+
// NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
|
|
370
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
|
|
369
371
|
};
|
|
370
372
|
|
|
371
373
|
// model quantization parameters
|
|
@@ -502,6 +504,7 @@ extern "C" {
|
|
|
502
504
|
LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
|
|
503
505
|
LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
|
|
504
506
|
LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
|
|
507
|
+
LLAMA_API int32_t llama_model_n_swa (const struct llama_model * model);
|
|
505
508
|
|
|
506
509
|
// Get the model's RoPE frequency scaling factor
|
|
507
510
|
LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
|
|
@@ -652,7 +655,6 @@ extern "C" {
|
|
|
652
655
|
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
|
653
656
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
|
654
657
|
// - lazily on next llama_decode()
|
|
655
|
-
// - explicitly with llama_kv_self_update()
|
|
656
658
|
// p0 < 0 : [0, p1]
|
|
657
659
|
// p1 < 0 : [p0, inf)
|
|
658
660
|
LLAMA_API void llama_kv_self_seq_add(
|
|
@@ -665,7 +667,6 @@ extern "C" {
|
|
|
665
667
|
// Integer division of the positions by factor of `d > 1`
|
|
666
668
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
|
667
669
|
// - lazily on next llama_decode()
|
|
668
|
-
// - explicitly with llama_kv_self_update()
|
|
669
670
|
// p0 < 0 : [0, p1]
|
|
670
671
|
// p1 < 0 : [p0, inf)
|
|
671
672
|
LLAMA_API void llama_kv_self_seq_div(
|
|
@@ -677,12 +678,14 @@ extern "C" {
|
|
|
677
678
|
|
|
678
679
|
// Returns the smallest position present in the KV cache for the specified sequence
|
|
679
680
|
// This is typically non-zero only for SWA caches
|
|
681
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
|
|
680
682
|
// Return -1 if the sequence is empty
|
|
681
683
|
LLAMA_API llama_pos llama_kv_self_seq_pos_min(
|
|
682
684
|
struct llama_context * ctx,
|
|
683
685
|
llama_seq_id seq_id);
|
|
684
686
|
|
|
685
687
|
// Returns the largest position present in the KV cache for the specified sequence
|
|
688
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
|
|
686
689
|
// Return -1 if the sequence is empty
|
|
687
690
|
LLAMA_API llama_pos llama_kv_self_seq_pos_max(
|
|
688
691
|
struct llama_context * ctx,
|
|
@@ -691,14 +694,15 @@ extern "C" {
|
|
|
691
694
|
// Defragment the KV cache
|
|
692
695
|
// This will be applied:
|
|
693
696
|
// - lazily on next llama_decode()
|
|
694
|
-
|
|
695
|
-
|
|
697
|
+
LLAMA_API DEPRECATED(void llama_kv_self_defrag(struct llama_context * ctx),
|
|
698
|
+
"simply remove this call, the context will automatically decide when to do a defragmentation based on 'defrag_thold'");
|
|
696
699
|
|
|
697
700
|
// Check if the context supports KV cache shifting
|
|
698
701
|
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
|
|
699
702
|
|
|
700
703
|
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
|
701
|
-
LLAMA_API void llama_kv_self_update(struct llama_context * ctx)
|
|
704
|
+
LLAMA_API DEPRECATED(void llama_kv_self_update(struct llama_context * ctx),
|
|
705
|
+
"simply remove this call, updates are applied lazily on the next llama_decode()");
|
|
702
706
|
|
|
703
707
|
//
|
|
704
708
|
// State / sessions
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/cpp/LlamaCppModel.cpp
CHANGED
|
@@ -242,38 +242,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
|
|
|
242
242
|
auto paramsVal = fnObj.getProperty(rt, "parameters");
|
|
243
243
|
if (paramsVal.isObject()) {
|
|
244
244
|
try {
|
|
245
|
-
// Convert the JSI object directly to nlohmann::json
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
size_t propCount = propNames.size(rt);
|
|
252
|
-
for (size_t i = 0; i < propCount; i++) {
|
|
253
|
-
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
254
|
-
std::string key = propName.utf8(rt);
|
|
255
|
-
auto value = paramsObj.getProperty(rt, propName);
|
|
256
|
-
|
|
257
|
-
if (value.isString()) {
|
|
258
|
-
fnParams[key] = value.asString(rt).utf8(rt);
|
|
259
|
-
} else if (value.isNumber()) {
|
|
260
|
-
fnParams[key] = value.asNumber();
|
|
261
|
-
} else if (value.isBool()) {
|
|
262
|
-
fnParams[key] = value.getBool();
|
|
263
|
-
} else if (value.isNull()) {
|
|
264
|
-
fnParams[key] = nullptr;
|
|
265
|
-
} else if (value.isObject()) {
|
|
266
|
-
if (value.getObject(rt).isArray(rt)) {
|
|
267
|
-
fnParams[key] = json::array();
|
|
268
|
-
} else {
|
|
269
|
-
fnParams[key] = json::object();
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
fnJson["parameters"] = fnParams;
|
|
275
|
-
} catch (const std::exception&) {
|
|
276
|
-
fnJson["parameters"] = json::object();
|
|
245
|
+
// Convert the JSI object directly to nlohmann::json using the new helper
|
|
246
|
+
fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
|
|
247
|
+
} catch (const std::exception& e) {
|
|
248
|
+
// Log error or handle as appropriate
|
|
249
|
+
fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
|
|
250
|
+
fnJson["parameters"] = json::object(); // Fallback to empty object
|
|
277
251
|
}
|
|
278
252
|
}
|
|
279
253
|
}
|
|
@@ -336,39 +310,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
|
|
|
336
310
|
auto paramsVal = fnObj.getProperty(rt, "parameters");
|
|
337
311
|
if (paramsVal.isObject()) {
|
|
338
312
|
try {
|
|
339
|
-
// Convert the JSI object directly to nlohmann::json
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
size_t propCount = propNames.size(rt);
|
|
346
|
-
for (size_t i = 0; i < propCount; i++) {
|
|
347
|
-
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
348
|
-
std::string key = propName.utf8(rt);
|
|
349
|
-
auto value = paramsObj.getProperty(rt, propName);
|
|
350
|
-
|
|
351
|
-
if (value.isString()) {
|
|
352
|
-
fnParams[key] = value.asString(rt).utf8(rt);
|
|
353
|
-
} else if (value.isNumber()) {
|
|
354
|
-
fnParams[key] = value.asNumber();
|
|
355
|
-
} else if (value.isBool()) {
|
|
356
|
-
fnParams[key] = value.getBool();
|
|
357
|
-
} else if (value.isNull()) {
|
|
358
|
-
fnParams[key] = nullptr;
|
|
359
|
-
} else if (value.isObject()) {
|
|
360
|
-
// For nested objects, we use a simplified approach
|
|
361
|
-
if (value.getObject(rt).isArray(rt)) {
|
|
362
|
-
fnParams[key] = json::array();
|
|
363
|
-
} else {
|
|
364
|
-
fnParams[key] = json::object();
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
fnJson["parameters"] = fnParams;
|
|
370
|
-
} catch (const std::exception&) {
|
|
371
|
-
fnJson["parameters"] = json::object();
|
|
313
|
+
// Convert the JSI object directly to nlohmann::json using the new helper
|
|
314
|
+
fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
|
|
315
|
+
} catch (const std::exception& e) {
|
|
316
|
+
// Log error or handle as appropriate
|
|
317
|
+
fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
|
|
318
|
+
fnJson["parameters"] = json::object(); // Fallback to empty object
|
|
372
319
|
}
|
|
373
320
|
}
|
|
374
321
|
}
|
|
@@ -553,6 +500,40 @@ jsi::Value LlamaCppModel::jsonToJsi(jsi::Runtime& rt, const json& j) {
|
|
|
553
500
|
return jsi::Value::undefined();
|
|
554
501
|
}
|
|
555
502
|
|
|
503
|
+
// Helper to convert JSI Value to nlohmann::json
|
|
504
|
+
json LlamaCppModel::jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val) {
|
|
505
|
+
if (val.isUndefined() || val.isNull()) {
|
|
506
|
+
return nullptr;
|
|
507
|
+
} else if (val.isBool()) {
|
|
508
|
+
return val.getBool();
|
|
509
|
+
} else if (val.isNumber()) {
|
|
510
|
+
return val.getNumber();
|
|
511
|
+
} else if (val.isString()) {
|
|
512
|
+
return val.getString(rt).utf8(rt);
|
|
513
|
+
} else if (val.isObject()) {
|
|
514
|
+
jsi::Object jsiObj = val.getObject(rt);
|
|
515
|
+
if (jsiObj.isArray(rt)) {
|
|
516
|
+
jsi::Array jsiArr = jsiObj.getArray(rt);
|
|
517
|
+
json jsonArr = json::array();
|
|
518
|
+
for (size_t i = 0; i < jsiArr.size(rt); ++i) {
|
|
519
|
+
jsonArr.push_back(jsiValueToJson(rt, jsiArr.getValueAtIndex(rt, i)));
|
|
520
|
+
}
|
|
521
|
+
return jsonArr;
|
|
522
|
+
} else {
|
|
523
|
+
json jsonObj = json::object();
|
|
524
|
+
jsi::Array propNames = jsiObj.getPropertyNames(rt);
|
|
525
|
+
for (size_t i = 0; i < propNames.size(rt); ++i) {
|
|
526
|
+
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
527
|
+
std::string key = propName.utf8(rt);
|
|
528
|
+
jsonObj[key] = jsiValueToJson(rt, jsiObj.getProperty(rt, propName));
|
|
529
|
+
}
|
|
530
|
+
return jsonObj;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
// Should not happen for valid JSON-like structures
|
|
534
|
+
return nullptr;
|
|
535
|
+
}
|
|
536
|
+
|
|
556
537
|
// JSI method for completions (synchronous - kept for compatibility)
|
|
557
538
|
jsi::Value LlamaCppModel::completionJsi(jsi::Runtime& rt, const jsi::Value* args, size_t count) {
|
|
558
539
|
if (count < 1 || !args[0].isObject()) {
|
package/cpp/LlamaCppModel.h
CHANGED
|
@@ -21,6 +21,9 @@
|
|
|
21
21
|
#include "rn-utils.hpp"
|
|
22
22
|
#include "rn-llama.hpp"
|
|
23
23
|
|
|
24
|
+
// Include json.hpp for json handling
|
|
25
|
+
#include "nlohmann/json.hpp"
|
|
26
|
+
|
|
24
27
|
namespace facebook::react {
|
|
25
28
|
|
|
26
29
|
// Chat message structure for representing messages in a conversation
|
|
@@ -166,6 +169,8 @@ private:
|
|
|
166
169
|
|
|
167
170
|
// Add CallInvoker for async operations
|
|
168
171
|
std::shared_ptr<CallInvoker> jsInvoker_;
|
|
172
|
+
|
|
173
|
+
static json jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val); // Declaration of new helper
|
|
169
174
|
};
|
|
170
175
|
|
|
171
176
|
} // namespace facebook::react
|
package/cpp/build-info.cpp
CHANGED
package/cpp/llama.cpp/README.md
CHANGED
|
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
|
|
|
130
130
|
<details>
|
|
131
131
|
<summary>Bindings</summary>
|
|
132
132
|
|
|
133
|
+
- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
|
|
133
134
|
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
|
134
135
|
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
|
135
136
|
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
|
|
@@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
|
|
|
58
58
|
arg.cpp
|
|
59
59
|
arg.h
|
|
60
60
|
base64.hpp
|
|
61
|
-
chat.cpp
|
|
62
|
-
chat.h
|
|
63
61
|
chat-parser.cpp
|
|
64
62
|
chat-parser.h
|
|
63
|
+
chat.cpp
|
|
64
|
+
chat.h
|
|
65
65
|
common.cpp
|
|
66
66
|
common.h
|
|
67
67
|
console.cpp
|
|
68
68
|
console.h
|
|
69
|
-
json-schema-to-grammar.cpp
|
|
70
|
-
json.hpp
|
|
71
|
-
json-partial.h
|
|
72
69
|
json-partial.cpp
|
|
70
|
+
json-partial.h
|
|
71
|
+
json-schema-to-grammar.cpp
|
|
73
72
|
llguidance.cpp
|
|
74
73
|
log.cpp
|
|
75
74
|
log.h
|
|
76
|
-
minja/chat-template.hpp
|
|
77
|
-
minja/minja.hpp
|
|
78
75
|
ngram-cache.cpp
|
|
79
76
|
ngram-cache.h
|
|
80
77
|
regex-partial.cpp
|
|
@@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
|
|
|
147
144
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
148
145
|
endif ()
|
|
149
146
|
|
|
150
|
-
target_include_directories(${TARGET} PUBLIC .)
|
|
147
|
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
151
148
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
152
149
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
153
150
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
#include "gguf.h" // for reading GGUF splits
|
|
2
1
|
#include "arg.h"
|
|
3
2
|
|
|
3
|
+
#include "chat.h"
|
|
4
4
|
#include "common.h"
|
|
5
|
+
#include "gguf.h" // for reading GGUF splits
|
|
6
|
+
#include "json-schema-to-grammar.h"
|
|
5
7
|
#include "log.h"
|
|
6
8
|
#include "sampling.h"
|
|
7
|
-
#include "chat.h"
|
|
8
9
|
|
|
9
10
|
// fix problem with std::min and std::max
|
|
10
11
|
#if defined(_WIN32)
|
|
@@ -15,6 +16,9 @@
|
|
|
15
16
|
#include <windows.h>
|
|
16
17
|
#endif
|
|
17
18
|
|
|
19
|
+
#define JSON_ASSERT GGML_ASSERT
|
|
20
|
+
#include <nlohmann/json.hpp>
|
|
21
|
+
|
|
18
22
|
#include <algorithm>
|
|
19
23
|
#include <climits>
|
|
20
24
|
#include <cstdarg>
|
|
@@ -34,8 +38,6 @@
|
|
|
34
38
|
#include <future>
|
|
35
39
|
#endif
|
|
36
40
|
|
|
37
|
-
#include "json-schema-to-grammar.h"
|
|
38
|
-
|
|
39
41
|
using json = nlohmann::ordered_json;
|
|
40
42
|
|
|
41
43
|
std::initializer_list<enum llama_example> mmproj_examples = {
|
|
@@ -1346,9 +1348,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1346
1348
|
));
|
|
1347
1349
|
add_opt(common_arg(
|
|
1348
1350
|
{"--prio"}, "N",
|
|
1349
|
-
string_format("set process/thread priority :
|
|
1351
|
+
string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
|
|
1350
1352
|
[](common_params & params, int prio) {
|
|
1351
|
-
if (prio <
|
|
1353
|
+
if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
|
|
1352
1354
|
throw std::invalid_argument("invalid value");
|
|
1353
1355
|
}
|
|
1354
1356
|
params.cpuparams.priority = (enum ggml_sched_priority) prio;
|
|
@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
|
|
|
154
154
|
if (!rest.empty()) {
|
|
155
155
|
handle_reasoning(rest, /* closed */ !is_partial());
|
|
156
156
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
157
|
+
// Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
|
|
158
|
+
// if (!syntax_.thinking_forced_open) {
|
|
159
|
+
// throw common_chat_msg_partial_exception(end_think);
|
|
160
|
+
// }
|
|
160
161
|
return true;
|
|
161
162
|
}
|
|
162
163
|
}
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
#include "chat.h"
|
|
2
2
|
#include "chat-parser.h"
|
|
3
3
|
#include "common.h"
|
|
4
|
+
#include "json-partial.h"
|
|
4
5
|
#include "json-schema-to-grammar.h"
|
|
5
6
|
#include "log.h"
|
|
6
|
-
#include "json-partial.h"
|
|
7
|
-
#include "minja/chat-template.hpp"
|
|
8
|
-
#include "minja/minja.hpp"
|
|
9
7
|
#include "regex-partial.h"
|
|
10
8
|
|
|
9
|
+
#include <minja/chat-template.hpp>
|
|
10
|
+
#include <minja/minja.hpp>
|
|
11
|
+
|
|
11
12
|
#include <cstdio>
|
|
12
13
|
#include <exception>
|
|
13
14
|
#include <iostream>
|
|
@@ -16,7 +17,6 @@
|
|
|
16
17
|
#include <string>
|
|
17
18
|
#include <vector>
|
|
18
19
|
|
|
19
|
-
|
|
20
20
|
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
|
|
21
21
|
auto time = std::chrono::system_clock::to_time_t(now);
|
|
22
22
|
auto local_time = *std::localtime(&time);
|
|
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|
|
203
203
|
|
|
204
204
|
DWORD p = NORMAL_PRIORITY_CLASS;
|
|
205
205
|
switch (prio) {
|
|
206
|
+
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
|
|
206
207
|
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
|
|
207
208
|
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
|
|
208
209
|
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
|
|
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|
|
228
229
|
|
|
229
230
|
int p = 0;
|
|
230
231
|
switch (prio) {
|
|
232
|
+
case GGML_SCHED_PRIO_LOW: p = 5; break;
|
|
231
233
|
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
|
|
232
234
|
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
|
|
233
235
|
case GGML_SCHED_PRIO_HIGH: p = -10; break;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
#include
|
|
3
|
+
#include <nlohmann/json_fwd.hpp>
|
|
4
|
+
|
|
5
|
+
#include <functional>
|
|
6
|
+
#include <string>
|
|
7
7
|
|
|
8
8
|
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
|
9
9
|
bool force_gbnf = false);
|
|
@@ -1047,6 +1047,10 @@ class TextModel(ModelBase):
|
|
|
1047
1047
|
special_vocab.chat_template = "rwkv-world"
|
|
1048
1048
|
# hack: Add '\n\n' as the EOT token to make it chat normally
|
|
1049
1049
|
special_vocab._set_special_token("eot", 261)
|
|
1050
|
+
# hack: Override these as they have already been set (incorrectly)
|
|
1051
|
+
special_vocab.special_token_ids["bos"] = 0
|
|
1052
|
+
special_vocab.special_token_ids["eos"] = 0
|
|
1053
|
+
|
|
1050
1054
|
special_vocab.add_to_gguf(self.gguf_writer)
|
|
1051
1055
|
|
|
1052
1056
|
def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int):
|
|
@@ -3810,7 +3814,7 @@ class BertModel(TextModel):
|
|
|
3810
3814
|
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
|
|
3811
3815
|
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
|
|
3812
3816
|
|
|
3813
|
-
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
|
|
3817
|
+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
|
|
3814
3818
|
else:
|
|
3815
3819
|
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
|
3816
3820
|
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
|
@@ -3823,7 +3827,7 @@ class BertModel(TextModel):
|
|
|
3823
3827
|
tokenizer = SentencePieceProcessor()
|
|
3824
3828
|
tokenizer.LoadFromFile(str(tokenizer_path))
|
|
3825
3829
|
|
|
3826
|
-
vocab_size = self.hparams.get(
|
|
3830
|
+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
|
|
3827
3831
|
|
|
3828
3832
|
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
|
3829
3833
|
scores: list[float] = [-10000.0] * vocab_size
|
|
@@ -3853,33 +3857,26 @@ class BertModel(TextModel):
|
|
|
3853
3857
|
unk_token = tokenizer_config_json.get("unk_token")
|
|
3854
3858
|
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
|
|
3855
3859
|
|
|
3856
|
-
for token_id in range(vocab_size):
|
|
3860
|
+
for token_id in range(tokenizer.vocab_size):
|
|
3857
3861
|
piece = tokenizer._convert_id_to_token(token_id)
|
|
3858
|
-
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
|
|
3872
|
-
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
if vocab_size > len(tokens):
|
|
3877
|
-
pad_count = vocab_size - len(tokens)
|
|
3878
|
-
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
|
|
3879
|
-
for i in range(1, pad_count + 1):
|
|
3880
|
-
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
|
|
3881
|
-
scores.append(-1000.0)
|
|
3882
|
-
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
|
3862
|
+
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
|
|
3863
|
+
text = piece.encode("utf-8")
|
|
3864
|
+
score = tokenizer_json["model"]["vocab"][token_id][1]
|
|
3865
|
+
|
|
3866
|
+
toktype = SentencePieceTokenTypes.NORMAL
|
|
3867
|
+
if token_id == unk_token_id:
|
|
3868
|
+
toktype = SentencePieceTokenTypes.UNKNOWN
|
|
3869
|
+
elif token_id in tokenizer.all_special_ids:
|
|
3870
|
+
toktype = SentencePieceTokenTypes.CONTROL
|
|
3871
|
+
elif token_id in added_vocab.values():
|
|
3872
|
+
toktype = SentencePieceTokenTypes.USER_DEFINED
|
|
3873
|
+
# No reliable way to detect this, but jina doesn't have any
|
|
3874
|
+
# elif tokenizer.IsByte(token_id):
|
|
3875
|
+
# toktype = SentencePieceTokenTypes.BYTE
|
|
3876
|
+
|
|
3877
|
+
tokens[token_id] = text
|
|
3878
|
+
scores[token_id] = score
|
|
3879
|
+
toktypes[token_id] = toktype
|
|
3883
3880
|
|
|
3884
3881
|
if isinstance(tokenizer, SentencePieceProcessor):
|
|
3885
3882
|
# realign tokens (see HF tokenizer code)
|
|
@@ -3892,6 +3889,12 @@ class BertModel(TextModel):
|
|
|
3892
3889
|
SentencePieceTokenTypes.UNKNOWN,
|
|
3893
3890
|
] + toktypes[3:-1]
|
|
3894
3891
|
|
|
3892
|
+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
|
|
3893
|
+
# Add mask token missing from sentencepiece.bpe.model
|
|
3894
|
+
tokens[250001] = b'<mask>'
|
|
3895
|
+
scores[250001] = 0.0
|
|
3896
|
+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
|
|
3897
|
+
|
|
3895
3898
|
self.gguf_writer.add_tokenizer_model("t5")
|
|
3896
3899
|
self.gguf_writer.add_tokenizer_pre("default")
|
|
3897
3900
|
self.gguf_writer.add_token_list(tokens)
|
|
@@ -2095,9 +2095,6 @@ extern "C" {
|
|
|
2095
2095
|
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2096
2096
|
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2097
2097
|
|
|
2098
|
-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
2099
|
-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
2100
|
-
|
|
2101
2098
|
// print info and performance information for the graph
|
|
2102
2099
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
2103
2100
|
|
|
@@ -2181,6 +2178,7 @@ extern "C" {
|
|
|
2181
2178
|
|
|
2182
2179
|
// scheduling priorities
|
|
2183
2180
|
enum ggml_sched_priority {
|
|
2181
|
+
GGML_SCHED_PRIO_LOW = -1,
|
|
2184
2182
|
GGML_SCHED_PRIO_NORMAL,
|
|
2185
2183
|
GGML_SCHED_PRIO_MEDIUM,
|
|
2186
2184
|
GGML_SCHED_PRIO_HIGH,
|