cui-llama.rn 1.7.3 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +94 -8
- package/android/src/main/java/com/rnllama/RNLlama.java +247 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/ggml.c
CHANGED
@@ -61,9 +61,6 @@
|
|
61
61
|
#define m512i(p) (__m512i)(p)
|
62
62
|
#endif
|
63
63
|
|
64
|
-
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
65
|
-
float lm_ggml_table_f32_f16[1 << 16];
|
66
|
-
|
67
64
|
#if defined(__linux__) || \
|
68
65
|
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
69
66
|
(defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
|
@@ -133,7 +130,7 @@ static void lm_ggml_print_backtrace_symbols(void) {
|
|
133
130
|
}
|
134
131
|
#endif
|
135
132
|
|
136
|
-
|
133
|
+
void lm_ggml_print_backtrace(void) {
|
137
134
|
const char * LM_GGML_NO_BACKTRACE = getenv("LM_GGML_NO_BACKTRACE");
|
138
135
|
if (LM_GGML_NO_BACKTRACE) {
|
139
136
|
return;
|
@@ -160,6 +157,10 @@ static void lm_ggml_print_backtrace(void) {
|
|
160
157
|
const int parent_pid = getpid();
|
161
158
|
const int child_pid = fork();
|
162
159
|
if (child_pid < 0) { // error
|
160
|
+
#if defined(__linux__)
|
161
|
+
close(lock[1]);
|
162
|
+
close(lock[0]);
|
163
|
+
#endif
|
163
164
|
return;
|
164
165
|
} else if (child_pid == 0) { // child
|
165
166
|
char attach[32];
|
@@ -167,6 +168,7 @@ static void lm_ggml_print_backtrace(void) {
|
|
167
168
|
#if defined(__linux__)
|
168
169
|
close(lock[1]);
|
169
170
|
(void) !read(lock[0], lock, 1);
|
171
|
+
close(lock[0]);
|
170
172
|
#endif
|
171
173
|
// try gdb
|
172
174
|
execlp("gdb", "gdb", "--batch",
|
@@ -195,7 +197,7 @@ static void lm_ggml_print_backtrace(void) {
|
|
195
197
|
}
|
196
198
|
}
|
197
199
|
#else
|
198
|
-
|
200
|
+
void lm_ggml_print_backtrace(void) {
|
199
201
|
// platform not supported
|
200
202
|
}
|
201
203
|
#endif
|
@@ -216,6 +218,8 @@ void lm_ggml_abort(const char * file, int line, const char * fmt, ...) {
|
|
216
218
|
abort();
|
217
219
|
}
|
218
220
|
|
221
|
+
// lm_ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
|
222
|
+
|
219
223
|
//
|
220
224
|
// logging
|
221
225
|
//
|
@@ -894,12 +898,6 @@ struct lm_ggml_context {
|
|
894
898
|
struct lm_ggml_object * objects_end;
|
895
899
|
};
|
896
900
|
|
897
|
-
struct lm_ggml_context_container {
|
898
|
-
bool used;
|
899
|
-
|
900
|
-
struct lm_ggml_context context;
|
901
|
-
};
|
902
|
-
|
903
901
|
//
|
904
902
|
// data types
|
905
903
|
//
|
@@ -967,6 +965,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
|
|
967
965
|
"UPSCALE",
|
968
966
|
"PAD",
|
969
967
|
"PAD_REFLECT_1D",
|
968
|
+
"ROLL",
|
970
969
|
"ARANGE",
|
971
970
|
"TIMESTEP_EMBEDDING",
|
972
971
|
"ARGSORT",
|
@@ -997,7 +996,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
|
|
997
996
|
"OPT_STEP_ADAMW",
|
998
997
|
};
|
999
998
|
|
1000
|
-
static_assert(LM_GGML_OP_COUNT ==
|
999
|
+
static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
|
1001
1000
|
|
1002
1001
|
static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
1003
1002
|
"none",
|
@@ -1062,6 +1061,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
|
1062
1061
|
"upscale(x)",
|
1063
1062
|
"pad(x)",
|
1064
1063
|
"pad_reflect_1d(x)",
|
1064
|
+
"roll(x)",
|
1065
1065
|
"arange(start, stop, step)",
|
1066
1066
|
"timestep_embedding(timesteps, dim, max_period)",
|
1067
1067
|
"argsort(x)",
|
@@ -1092,7 +1092,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
|
1092
1092
|
"adamw(x)",
|
1093
1093
|
};
|
1094
1094
|
|
1095
|
-
static_assert(LM_GGML_OP_COUNT ==
|
1095
|
+
static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
|
1096
1096
|
|
1097
1097
|
static_assert(LM_GGML_OP_POOL_COUNT == 2, "LM_GGML_OP_POOL_COUNT != 2");
|
1098
1098
|
|
@@ -1432,14 +1432,6 @@ struct lm_ggml_context * lm_ggml_init(struct lm_ggml_init_params params) {
|
|
1432
1432
|
// initialize time system (required on Windows)
|
1433
1433
|
lm_ggml_time_init();
|
1434
1434
|
|
1435
|
-
for (int i = 0; i < (1 << 16); ++i) {
|
1436
|
-
union {
|
1437
|
-
uint16_t u16;
|
1438
|
-
lm_ggml_fp16_t fp16;
|
1439
|
-
} u = {i};
|
1440
|
-
lm_ggml_table_f32_f16[i] = LM_GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
1441
|
-
}
|
1442
|
-
|
1443
1435
|
is_first_call = false;
|
1444
1436
|
}
|
1445
1437
|
|
@@ -2325,6 +2317,26 @@ struct lm_ggml_tensor * lm_ggml_repeat(
|
|
2325
2317
|
return result;
|
2326
2318
|
}
|
2327
2319
|
|
2320
|
+
struct lm_ggml_tensor * lm_ggml_repeat_4d(
|
2321
|
+
struct lm_ggml_context * ctx,
|
2322
|
+
struct lm_ggml_tensor * a,
|
2323
|
+
int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
2324
|
+
const bool can_repeat = lm_ggml_is_empty(a) || (
|
2325
|
+
(ne0 % a->ne[0] == 0) &&
|
2326
|
+
(ne1 % a->ne[1] == 0) &&
|
2327
|
+
(ne2 % a->ne[2] == 0) &&
|
2328
|
+
(ne3 % a->ne[3] == 0)
|
2329
|
+
);
|
2330
|
+
LM_GGML_ASSERT(can_repeat);
|
2331
|
+
|
2332
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
2333
|
+
|
2334
|
+
result->op = LM_GGML_OP_REPEAT;
|
2335
|
+
result->src[0] = a;
|
2336
|
+
|
2337
|
+
return result;
|
2338
|
+
}
|
2339
|
+
|
2328
2340
|
// lm_ggml_repeat_back
|
2329
2341
|
|
2330
2342
|
struct lm_ggml_tensor * lm_ggml_repeat_back(
|
@@ -4333,6 +4345,34 @@ struct lm_ggml_tensor * lm_ggml_pad_reflect_1d(
|
|
4333
4345
|
return result;
|
4334
4346
|
}
|
4335
4347
|
|
4348
|
+
// lm_ggml_roll
|
4349
|
+
|
4350
|
+
struct lm_ggml_tensor * lm_ggml_roll(
|
4351
|
+
struct lm_ggml_context * ctx,
|
4352
|
+
struct lm_ggml_tensor * a,
|
4353
|
+
int shift0,
|
4354
|
+
int shift1,
|
4355
|
+
int shift2,
|
4356
|
+
int shift3) {
|
4357
|
+
LM_GGML_ASSERT(a->nb[0] == lm_ggml_type_size(a->type));
|
4358
|
+
LM_GGML_ASSERT(abs(shift0) < a->ne[0]);
|
4359
|
+
LM_GGML_ASSERT(abs(shift1) < a->ne[1]);
|
4360
|
+
LM_GGML_ASSERT(abs(shift2) < a->ne[2]);
|
4361
|
+
LM_GGML_ASSERT(abs(shift3) < a->ne[3]);
|
4362
|
+
|
4363
|
+
struct lm_ggml_tensor * result = lm_ggml_dup_tensor(ctx, a);
|
4364
|
+
|
4365
|
+
lm_ggml_set_op_params_i32(result, 0, shift0);
|
4366
|
+
lm_ggml_set_op_params_i32(result, 1, shift1);
|
4367
|
+
lm_ggml_set_op_params_i32(result, 2, shift2);
|
4368
|
+
lm_ggml_set_op_params_i32(result, 3, shift3);
|
4369
|
+
|
4370
|
+
result->op = LM_GGML_OP_ROLL;
|
4371
|
+
result->src[0] = a;
|
4372
|
+
|
4373
|
+
return result;
|
4374
|
+
}
|
4375
|
+
|
4336
4376
|
// lm_ggml_arange
|
4337
4377
|
|
4338
4378
|
struct lm_ggml_tensor * lm_ggml_arange(
|
package/cpp/ggml.h
CHANGED
@@ -490,6 +490,7 @@ extern "C" {
|
|
490
490
|
LM_GGML_OP_UPSCALE, // nearest interpolate
|
491
491
|
LM_GGML_OP_PAD,
|
492
492
|
LM_GGML_OP_PAD_REFLECT_1D,
|
493
|
+
LM_GGML_OP_ROLL,
|
493
494
|
LM_GGML_OP_ARANGE,
|
494
495
|
LM_GGML_OP_TIMESTEP_EMBEDDING,
|
495
496
|
LM_GGML_OP_ARGSORT,
|
@@ -936,6 +937,15 @@ extern "C" {
|
|
936
937
|
struct lm_ggml_tensor * a,
|
937
938
|
struct lm_ggml_tensor * b);
|
938
939
|
|
940
|
+
// repeat a to the specified shape
|
941
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_4d(
|
942
|
+
struct lm_ggml_context * ctx,
|
943
|
+
struct lm_ggml_tensor * a,
|
944
|
+
int64_t ne0,
|
945
|
+
int64_t ne1,
|
946
|
+
int64_t ne2,
|
947
|
+
int64_t ne3);
|
948
|
+
|
939
949
|
// sums repetitions in a into shape of b
|
940
950
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
|
941
951
|
struct lm_ggml_context * ctx,
|
@@ -1793,6 +1803,17 @@ extern "C" {
|
|
1793
1803
|
int p0,
|
1794
1804
|
int p1);
|
1795
1805
|
|
1806
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
1807
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
1808
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_roll(
|
1809
|
+
struct lm_ggml_context * ctx,
|
1810
|
+
struct lm_ggml_tensor * a,
|
1811
|
+
int shift0,
|
1812
|
+
int shift1,
|
1813
|
+
int shift2,
|
1814
|
+
int shift3);
|
1815
|
+
|
1816
|
+
|
1796
1817
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1797
1818
|
// timesteps: [N,]
|
1798
1819
|
// return: [N, dim]
|
@@ -2087,9 +2108,6 @@ extern "C" {
|
|
2087
2108
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad (const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
|
2088
2109
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
|
2089
2110
|
|
2090
|
-
LM_GGML_API void lm_ggml_graph_export(const struct lm_ggml_cgraph * cgraph, const char * fname);
|
2091
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_import(const char * fname, struct lm_ggml_context ** ctx_data, struct lm_ggml_context ** ctx_eval);
|
2092
|
-
|
2093
2111
|
// print info and performance information for the graph
|
2094
2112
|
LM_GGML_API void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph);
|
2095
2113
|
|
@@ -2173,6 +2191,7 @@ extern "C" {
|
|
2173
2191
|
|
2174
2192
|
// scheduling priorities
|
2175
2193
|
enum lm_ggml_sched_priority {
|
2194
|
+
LM_GGML_SCHED_PRIO_LOW = -1,
|
2176
2195
|
LM_GGML_SCHED_PRIO_NORMAL,
|
2177
2196
|
LM_GGML_SCHED_PRIO_MEDIUM,
|
2178
2197
|
LM_GGML_SCHED_PRIO_HIGH,
|
package/cpp/gguf.cpp
CHANGED
@@ -335,7 +335,11 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
335
335
|
|
336
336
|
for (uint32_t i = 0; i < magic.size(); i++) {
|
337
337
|
if (magic[i] != LM_GGUF_MAGIC[i]) {
|
338
|
-
|
338
|
+
char c0 = isprint(magic[0]) ? magic[0] : '?';
|
339
|
+
char c1 = isprint(magic[1]) ? magic[1] : '?';
|
340
|
+
char c2 = isprint(magic[2]) ? magic[2] : '?';
|
341
|
+
char c3 = isprint(magic[3]) ? magic[3] : '?';
|
342
|
+
LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
|
339
343
|
lm_gguf_free(ctx);
|
340
344
|
return nullptr;
|
341
345
|
}
|
@@ -347,11 +351,28 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
347
351
|
int64_t n_tensors = 0;
|
348
352
|
|
349
353
|
if (ok && gr.read(ctx->version)) {
|
350
|
-
if (ctx->version ==
|
354
|
+
if (ok && ctx->version == 0) {
|
355
|
+
LM_GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
|
356
|
+
ok = false;
|
357
|
+
}
|
358
|
+
|
359
|
+
/*
|
360
|
+
* bit layout is different when reading non-native endian models.
|
361
|
+
* assuming that the GGUF version is 3, the non-native endian model
|
362
|
+
* would read it as 0x30000000. we can use the AND operation against
|
363
|
+
* the last 4 hexadecimal digits to check if the model is the same
|
364
|
+
* endianness as the host system.
|
365
|
+
*/
|
366
|
+
if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
|
367
|
+
LM_GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
|
368
|
+
ok = false;
|
369
|
+
}
|
370
|
+
|
371
|
+
if (ok && ctx->version == 1) {
|
351
372
|
LM_GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
|
352
373
|
ok = false;
|
353
374
|
}
|
354
|
-
if (ctx->version > LM_GGUF_VERSION) {
|
375
|
+
if (ok && ctx->version > LM_GGUF_VERSION) {
|
355
376
|
LM_GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
356
377
|
__func__, ctx->version, LM_GGUF_VERSION);
|
357
378
|
ok = false;
|
@@ -0,0 +1,256 @@
|
|
1
|
+
#include "json-partial.h"
|
2
|
+
|
3
|
+
#include "log.h"
|
4
|
+
|
5
|
+
#include "nlohmann/json.hpp"
|
6
|
+
|
7
|
+
#include <string>
|
8
|
+
|
9
|
+
using json = nlohmann::ordered_json;
|
10
|
+
|
11
|
+
enum common_json_stack_element_type {
|
12
|
+
COMMON_JSON_STACK_ELEMENT_OBJECT,
|
13
|
+
COMMON_JSON_STACK_ELEMENT_KEY,
|
14
|
+
COMMON_JSON_STACK_ELEMENT_ARRAY,
|
15
|
+
};
|
16
|
+
|
17
|
+
struct common_json_stack_element {
|
18
|
+
common_json_stack_element_type type;
|
19
|
+
std::string key;
|
20
|
+
};
|
21
|
+
|
22
|
+
bool common_json_parse(
|
23
|
+
const std::string & input,
|
24
|
+
const std::string & healing_marker,
|
25
|
+
common_json & out)
|
26
|
+
{
|
27
|
+
std::string::const_iterator it = input.begin();
|
28
|
+
const auto end = input.end();
|
29
|
+
return common_json_parse(it, end, healing_marker, out);
|
30
|
+
}
|
31
|
+
|
32
|
+
bool common_json_parse(
|
33
|
+
std::string::const_iterator & it,
|
34
|
+
const std::string::const_iterator & end,
|
35
|
+
const std::string & healing_marker,
|
36
|
+
common_json & out)
|
37
|
+
{
|
38
|
+
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
39
|
+
struct json_error_locator : public nlohmann::json_sax<json> {
|
40
|
+
std::size_t position;
|
41
|
+
bool found_error;
|
42
|
+
std::string last_token;
|
43
|
+
std::string exception_message;
|
44
|
+
std::vector<common_json_stack_element> stack;
|
45
|
+
|
46
|
+
json_error_locator() : position(0), found_error(false) {}
|
47
|
+
|
48
|
+
bool parse_error(std::size_t position, const std::string & last_token, const json::exception & ex) override { // NOLINT
|
49
|
+
this->position = position - 1;
|
50
|
+
this->found_error = true;
|
51
|
+
this->last_token = last_token;
|
52
|
+
this->exception_message = ex.what();
|
53
|
+
return false;
|
54
|
+
}
|
55
|
+
void close_value() {
|
56
|
+
if (!stack.empty() && (stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY)) {
|
57
|
+
stack.pop_back();
|
58
|
+
}
|
59
|
+
}
|
60
|
+
bool null() override { // NOLINT
|
61
|
+
close_value();
|
62
|
+
return true;
|
63
|
+
}
|
64
|
+
bool boolean(bool) override { // NOLINT
|
65
|
+
close_value();
|
66
|
+
return true;
|
67
|
+
}
|
68
|
+
bool number_integer(number_integer_t) override { // NOLINT
|
69
|
+
close_value();
|
70
|
+
return true;
|
71
|
+
}
|
72
|
+
bool number_unsigned(number_unsigned_t) override { // NOLINT
|
73
|
+
close_value();
|
74
|
+
return true;
|
75
|
+
}
|
76
|
+
bool number_float(number_float_t, const string_t &) override { // NOLINT
|
77
|
+
close_value();
|
78
|
+
return true;
|
79
|
+
}
|
80
|
+
bool string(string_t &) override { // NOLINT
|
81
|
+
close_value();
|
82
|
+
return true;
|
83
|
+
}
|
84
|
+
bool binary(binary_t &) override { // NOLINT
|
85
|
+
close_value();
|
86
|
+
return true;
|
87
|
+
}
|
88
|
+
bool start_object(std::size_t) override { // NOLINT
|
89
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_OBJECT, ""});
|
90
|
+
return true;
|
91
|
+
}
|
92
|
+
bool end_object() override {
|
93
|
+
LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT);
|
94
|
+
stack.pop_back();
|
95
|
+
close_value();
|
96
|
+
return true;
|
97
|
+
}
|
98
|
+
bool key(string_t & key) override { // NOLINT
|
99
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_KEY, key});
|
100
|
+
return true;
|
101
|
+
}
|
102
|
+
bool start_array(std::size_t) override { // NOLINT
|
103
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_ARRAY, ""});
|
104
|
+
return true;
|
105
|
+
}
|
106
|
+
bool end_array() override {
|
107
|
+
LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY);
|
108
|
+
stack.pop_back();
|
109
|
+
close_value();
|
110
|
+
return true;
|
111
|
+
}
|
112
|
+
};
|
113
|
+
json_error_locator err_loc;
|
114
|
+
auto start = it;
|
115
|
+
json::sax_parse(it, end, &err_loc);
|
116
|
+
|
117
|
+
if (err_loc.found_error) {
|
118
|
+
it = start;
|
119
|
+
auto temptative_end = it + err_loc.position;
|
120
|
+
// LOG_DBG("Error at position %zu (is_end = %s): %s\n", err_loc.position, temptative_end == end ? "true" : "false", err_loc.exception_message.c_str());
|
121
|
+
|
122
|
+
auto input = std::string(it, temptative_end);
|
123
|
+
try {
|
124
|
+
out.json = json::parse(input);
|
125
|
+
// out.json = json::parse(it, temptative_end);
|
126
|
+
it = temptative_end;
|
127
|
+
return true;
|
128
|
+
} catch (const std::exception & ex) {
|
129
|
+
// No, needs healing.
|
130
|
+
LOG_DBG("Failed to parse up to error: %s: <<<%s>>>\n", ex.what(), std::string(it, temptative_end).c_str());
|
131
|
+
}
|
132
|
+
auto can_parse = [](const std::string & str) {
|
133
|
+
try {
|
134
|
+
auto _ = json::parse(str); // NOLINT
|
135
|
+
return true;
|
136
|
+
} catch (const std::exception &) {
|
137
|
+
return false;
|
138
|
+
}
|
139
|
+
};
|
140
|
+
if (!healing_marker.empty() && !err_loc.stack.empty()) {
|
141
|
+
std::string str(it, temptative_end);
|
142
|
+
auto last_non_sp_pos = str.find_last_not_of(" \n\r\t");
|
143
|
+
if (last_non_sp_pos == std::string::npos) {
|
144
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
145
|
+
}
|
146
|
+
auto last_non_sp_char = str[last_non_sp_pos];
|
147
|
+
// Used to detect stops on a number, which may not be complete.
|
148
|
+
auto was_maybe_number = [&]() {
|
149
|
+
if (!str.empty() && std::isspace(str.back())) {
|
150
|
+
return false;
|
151
|
+
}
|
152
|
+
return std::isdigit(last_non_sp_char) ||
|
153
|
+
last_non_sp_char == '.' ||
|
154
|
+
last_non_sp_char == 'e' ||
|
155
|
+
last_non_sp_char == 'E' ||
|
156
|
+
last_non_sp_char == '-';
|
157
|
+
};
|
158
|
+
|
159
|
+
std::string closing;
|
160
|
+
for (size_t i = err_loc.stack.size(); i > 0; i--) {
|
161
|
+
auto & el = err_loc.stack[i - 1];
|
162
|
+
if (el.type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
163
|
+
closing += "}";
|
164
|
+
} else if (el.type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
165
|
+
closing += "]";
|
166
|
+
} else if (el.type != COMMON_JSON_STACK_ELEMENT_KEY) {
|
167
|
+
throw std::runtime_error("Unexpected stack element type");
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
|
172
|
+
|
173
|
+
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
|
174
|
+
// We're inside an object value
|
175
|
+
if (last_non_sp_char == ':' && can_parse(str + "1" + closing)) {
|
176
|
+
// Was about to create an object value
|
177
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
178
|
+
} else if (can_parse(str + ": 1" + closing)) {
|
179
|
+
str += (out.healing_marker.json_dump_marker = ":\"" + magic_seed) + "\"" + closing;
|
180
|
+
} else if (last_non_sp_char == '{' && can_parse(str + closing)) {
|
181
|
+
// Was about to create an object
|
182
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
183
|
+
} else if (can_parse(str + "\"" + closing)) {
|
184
|
+
// Was inside an object value string
|
185
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
186
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
187
|
+
// Was inside an object value string after an escape
|
188
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
189
|
+
} else {
|
190
|
+
// find last :
|
191
|
+
auto last_pos = str.find_last_of(':');
|
192
|
+
if (last_pos == std::string::npos) {
|
193
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
194
|
+
}
|
195
|
+
// Cutting back to opening : for object value
|
196
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
197
|
+
}
|
198
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
199
|
+
if ((last_non_sp_char == ',' || last_non_sp_char == '[') && can_parse(str + "1" + closing)) {
|
200
|
+
// Was about to create an array value
|
201
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
202
|
+
} else if (can_parse(str + "\"" + closing)) {
|
203
|
+
// Was inside an array value string
|
204
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
205
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
206
|
+
// Was inside an array value string after an escape
|
207
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
208
|
+
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
|
209
|
+
// Had just finished a value
|
210
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
|
211
|
+
} else {
|
212
|
+
auto last_pos = str.find_last_of("[,");
|
213
|
+
if (last_pos == std::string::npos) {
|
214
|
+
throw std::runtime_error("Cannot heal a truncated JSON array stopped in an unknown location");
|
215
|
+
}
|
216
|
+
// Cutting back to last [ or , for array value
|
217
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
218
|
+
}
|
219
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
220
|
+
if ((last_non_sp_char == '{' && can_parse(str + closing)) ||
|
221
|
+
(last_non_sp_char == ',' && can_parse(str + "\"\": 1" + closing))) {
|
222
|
+
// Was about to create an object key+value
|
223
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
224
|
+
} else if (!was_maybe_number() && can_parse(str + ",\"\": 1" + closing)) {
|
225
|
+
// Was about to create an object key+value
|
226
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\": 1" + closing;
|
227
|
+
} else if (can_parse(str + "\": 1" + closing)) {
|
228
|
+
// Was inside an object key string
|
229
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\": 1" + closing;
|
230
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
|
231
|
+
// Was inside an object key string after an escape
|
232
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
|
233
|
+
} else {
|
234
|
+
auto last_pos = str.find_last_of(':');
|
235
|
+
if (last_pos == std::string::npos) {
|
236
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
237
|
+
}
|
238
|
+
// fprintf(stderr, "Cutting back to last : for object key+value\n");
|
239
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
240
|
+
}
|
241
|
+
} else {
|
242
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
243
|
+
}
|
244
|
+
// fprintf(stderr, "HEALED:\nSTRING <<<\n%s\n>>>\n\nmagic_cut: <<<\n%s\n>>>\n\n", str.c_str(), out.healing_marker.json_dump_marker.c_str());
|
245
|
+
out.json = json::parse(str);
|
246
|
+
it = temptative_end;
|
247
|
+
return true;
|
248
|
+
}
|
249
|
+
// TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...)
|
250
|
+
// fprintf(stderr, "Closing: TODO\n");
|
251
|
+
return false;
|
252
|
+
}
|
253
|
+
out.json = json::parse(it, end);
|
254
|
+
it = end;
|
255
|
+
return true;
|
256
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "nlohmann/json.hpp"
|
4
|
+
|
5
|
+
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
|
6
|
+
struct common_healing_marker {
|
7
|
+
// Raw marker.
|
8
|
+
std::string marker;
|
9
|
+
|
10
|
+
// Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
|
11
|
+
std::string json_dump_marker;
|
12
|
+
};
|
13
|
+
|
14
|
+
// Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
|
15
|
+
struct common_json {
|
16
|
+
nlohmann::ordered_json json;
|
17
|
+
|
18
|
+
common_healing_marker healing_marker;
|
19
|
+
};
|
20
|
+
|
21
|
+
// Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
|
22
|
+
//
|
23
|
+
// Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
|
24
|
+
// This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
|
25
|
+
// (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
|
26
|
+
//
|
27
|
+
// For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
|
28
|
+
bool common_json_parse(
|
29
|
+
const std::string & input,
|
30
|
+
const std::string & healing_marker,
|
31
|
+
common_json & out);
|
32
|
+
|
33
|
+
// Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
|
34
|
+
bool common_json_parse(
|
35
|
+
std::string::const_iterator & it,
|
36
|
+
const std::string::const_iterator & end,
|
37
|
+
const std::string & healing_marker,
|
38
|
+
common_json & out);
|
@@ -1,8 +1,9 @@
|
|
1
1
|
#include "json-schema-to-grammar.h"
|
2
2
|
#include "common.h"
|
3
3
|
|
4
|
+
#include "nlohmann/json.hpp"
|
5
|
+
|
4
6
|
#include <algorithm>
|
5
|
-
#include <fstream>
|
6
7
|
#include <map>
|
7
8
|
#include <regex>
|
8
9
|
#include <sstream>
|
@@ -40,49 +41,6 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
|
40
41
|
return result;
|
41
42
|
}
|
42
43
|
|
43
|
-
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
|
44
|
-
class string_view {
|
45
|
-
const std::string & _str;
|
46
|
-
const size_t _start;
|
47
|
-
const size_t _end;
|
48
|
-
public:
|
49
|
-
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
|
50
|
-
|
51
|
-
size_t size() const {
|
52
|
-
return _end - _start;
|
53
|
-
}
|
54
|
-
|
55
|
-
size_t length() const {
|
56
|
-
return size();
|
57
|
-
}
|
58
|
-
|
59
|
-
operator std::string() const {
|
60
|
-
return str();
|
61
|
-
}
|
62
|
-
|
63
|
-
std::string str() const {
|
64
|
-
return _str.substr(_start, _end - _start);
|
65
|
-
}
|
66
|
-
|
67
|
-
string_view substr(size_t pos, size_t len = std::string::npos) const {
|
68
|
-
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
|
69
|
-
}
|
70
|
-
|
71
|
-
char operator[](size_t pos) const {
|
72
|
-
auto index = _start + pos;
|
73
|
-
if (index >= _end) {
|
74
|
-
throw std::out_of_range("string_view index out of range");
|
75
|
-
}
|
76
|
-
return _str[_start + pos];
|
77
|
-
}
|
78
|
-
|
79
|
-
bool operator==(const string_view & other) const {
|
80
|
-
std::string this_str = *this;
|
81
|
-
std::string other_str = other;
|
82
|
-
return this_str == other_str;
|
83
|
-
}
|
84
|
-
};
|
85
|
-
|
86
44
|
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
87
45
|
auto has_min = min_value != std::numeric_limits<int>::min();
|
88
46
|
auto has_max = max_value != std::numeric_limits<int>::max();
|
@@ -111,14 +69,14 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|
111
69
|
}
|
112
70
|
out << "}";
|
113
71
|
};
|
114
|
-
std::function<void(const string_view &, const string_view &)> uniform_range =
|
115
|
-
[&](const string_view & from, const string_view & to) {
|
72
|
+
std::function<void(const std::string_view &, const std::string_view &)> uniform_range =
|
73
|
+
[&](const std::string_view & from, const std::string_view & to) {
|
116
74
|
size_t i = 0;
|
117
75
|
while (i < from.length() && i < to.length() && from[i] == to[i]) {
|
118
76
|
i++;
|
119
77
|
}
|
120
78
|
if (i > 0) {
|
121
|
-
out << "\"" << from.substr(0, i)
|
79
|
+
out << "\"" << from.substr(0, i) << "\"";
|
122
80
|
}
|
123
81
|
if (i < from.length() && i < to.length()) {
|
124
82
|
if (i > 0) {
|
@@ -1,9 +1,9 @@
|
|
1
1
|
#pragma once
|
2
2
|
|
3
|
-
#include "
|
4
|
-
|
5
|
-
#
|
6
|
-
#include
|
3
|
+
#include "nlohmann/json_fwd.hpp"
|
4
|
+
|
5
|
+
#include <functional>
|
6
|
+
#include <string>
|
7
7
|
|
8
8
|
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
9
9
|
bool force_gbnf = false);
|