cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/rn-llama.cpp
CHANGED
@@ -1,8 +1,86 @@
|
|
1
1
|
#include "rn-llama.h"
|
2
2
|
|
3
|
+
// Include multimodal support
|
4
|
+
#include "tools/mtmd/mtmd.h"
|
5
|
+
#include "tools/mtmd/clip.h"
|
6
|
+
|
3
7
|
namespace rnllama {
|
4
8
|
|
5
|
-
|
9
|
+
// Computes FNV-1a hash of the data
|
10
|
+
static std::string fnv_hash(const uint8_t * data, size_t len) {
|
11
|
+
const uint64_t fnv_prime = 0x100000001b3ULL;
|
12
|
+
uint64_t hash = 0xcbf29ce484222325ULL;
|
13
|
+
|
14
|
+
for (size_t i = 0; i < len; ++i) {
|
15
|
+
hash ^= data[i];
|
16
|
+
hash *= fnv_prime;
|
17
|
+
}
|
18
|
+
return std::to_string(hash);
|
19
|
+
}
|
20
|
+
|
21
|
+
static const std::string base64_chars =
|
22
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
23
|
+
"abcdefghijklmnopqrstuvwxyz"
|
24
|
+
"0123456789+/";
|
25
|
+
|
26
|
+
// Base64 decoding function
|
27
|
+
static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
|
28
|
+
std::vector<uint8_t> decoded;
|
29
|
+
int in_len = encoded_string.size();
|
30
|
+
int i = 0;
|
31
|
+
int j = 0;
|
32
|
+
int in_ = 0;
|
33
|
+
unsigned char char_array_4[4], char_array_3[3];
|
34
|
+
|
35
|
+
while (in_len-- && (encoded_string[in_] != '=')) {
|
36
|
+
if (isspace(encoded_string[in_])) {
|
37
|
+
in_++;
|
38
|
+
continue;
|
39
|
+
}
|
40
|
+
|
41
|
+
if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
|
42
|
+
break;
|
43
|
+
}
|
44
|
+
|
45
|
+
char_array_4[i++] = encoded_string[in_]; in_++;
|
46
|
+
if (i == 4) {
|
47
|
+
for (i = 0; i < 4; i++) {
|
48
|
+
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
49
|
+
}
|
50
|
+
|
51
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
52
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
53
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
54
|
+
|
55
|
+
for (i = 0; i < 3; i++) {
|
56
|
+
decoded.push_back(char_array_3[i]);
|
57
|
+
}
|
58
|
+
i = 0;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
if (i) {
|
63
|
+
for (j = i; j < 4; j++) {
|
64
|
+
char_array_4[j] = 0;
|
65
|
+
}
|
66
|
+
|
67
|
+
for (j = 0; j < 4; j++) {
|
68
|
+
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
69
|
+
}
|
70
|
+
|
71
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
72
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
73
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
74
|
+
|
75
|
+
for (j = 0; j < i - 1; j++) {
|
76
|
+
decoded.push_back(char_array_3[j]);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
return decoded;
|
81
|
+
}
|
82
|
+
|
83
|
+
static const std::vector<lm_ggml_type> kv_cache_types = {
|
6
84
|
LM_GGML_TYPE_F32,
|
7
85
|
LM_GGML_TYPE_F16,
|
8
86
|
LM_GGML_TYPE_BF16,
|
@@ -149,10 +227,16 @@ std::string tokens_to_str(llama_context *ctx, const std::vector<llama_token>::co
|
|
149
227
|
return ret;
|
150
228
|
}
|
151
229
|
|
230
|
+
struct llama_rn_context_mtmd {
|
231
|
+
mtmd_context *mtmd_ctx = nullptr;
|
232
|
+
};
|
233
|
+
|
152
234
|
llama_rn_context::~llama_rn_context() {
|
153
235
|
if (ctx_sampling != nullptr) {
|
154
236
|
common_sampler_free(ctx_sampling);
|
155
237
|
}
|
238
|
+
|
239
|
+
releaseMultimodal();
|
156
240
|
}
|
157
241
|
|
158
242
|
void llama_rn_context::rewind() {
|
@@ -165,6 +249,7 @@ void llama_rn_context::rewind() {
|
|
165
249
|
generated_text.reserve(params.n_ctx);
|
166
250
|
generated_token_probs.clear();
|
167
251
|
truncated = false;
|
252
|
+
context_full = false;
|
168
253
|
stopped_eos = false;
|
169
254
|
stopped_word = false;
|
170
255
|
stopped_limit = false;
|
@@ -197,6 +282,9 @@ bool llama_rn_context::loadModel(common_params ¶ms_)
|
|
197
282
|
templates = common_chat_templates_init(model, params.chat_template);
|
198
283
|
n_ctx = llama_n_ctx(ctx);
|
199
284
|
|
285
|
+
// Initialize context shift flag
|
286
|
+
LOG_INFO("ctx_shift: %s", params.ctx_shift ? "enabled" : "disabled");
|
287
|
+
|
200
288
|
// We can uncomment for debugging or after this fix: https://github.com/ggerganov/llama.cpp/pull/11101
|
201
289
|
// LOG_INFO("%s\n", common_params_get_system_info(params).c_str());
|
202
290
|
|
@@ -271,11 +359,11 @@ void llama_rn_context::truncatePrompt(std::vector<llama_token> &prompt_tokens) {
|
|
271
359
|
|
272
360
|
new_tokens.insert(new_tokens.end(), prompt_tokens.begin() + params.n_keep + erased_blocks * n_block_size, prompt_tokens.end());
|
273
361
|
|
274
|
-
|
362
|
+
LOG_INFO("input truncated, n_ctx: %d, n_keep: %d, n_left: %d, old_size: %d, new_size: %d",
|
275
363
|
n_ctx,
|
276
364
|
params.n_keep,
|
277
365
|
n_left,
|
278
|
-
|
366
|
+
prompt_tokens.size(),
|
279
367
|
new_tokens.size()
|
280
368
|
);
|
281
369
|
|
@@ -283,65 +371,71 @@ void llama_rn_context::truncatePrompt(std::vector<llama_token> &prompt_tokens) {
|
|
283
371
|
prompt_tokens = new_tokens;
|
284
372
|
}
|
285
373
|
|
286
|
-
void llama_rn_context::loadPrompt() {
|
287
|
-
|
288
|
-
num_prompt_tokens = prompt_tokens.size();
|
374
|
+
void llama_rn_context::loadPrompt(const std::vector<std::string> &media_paths) {
|
375
|
+
bool has_media = !media_paths.empty();
|
289
376
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
}
|
296
|
-
LOG_INFO("%s\n", ss.str().c_str());
|
377
|
+
if (!has_media) {
|
378
|
+
std::vector<llama_token> text_tokens;
|
379
|
+
// Text-only path
|
380
|
+
text_tokens = ::common_tokenize(ctx, params.prompt, true, true);
|
381
|
+
num_prompt_tokens = text_tokens.size();
|
297
382
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
383
|
+
// LOG tokens
|
384
|
+
std::stringstream ss;
|
385
|
+
ss << "\n" << __func__ << ": prompt_tokens = ";
|
386
|
+
for (auto& token : text_tokens) {
|
387
|
+
ss << token << " ";
|
388
|
+
}
|
389
|
+
LOG_INFO("%s\n", ss.str().c_str());
|
303
390
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
num_prompt_tokens = prompt_tokens.size();
|
391
|
+
if (params.n_keep < 0) {
|
392
|
+
params.n_keep = (int)num_prompt_tokens;
|
393
|
+
}
|
394
|
+
params.n_keep = std::min(n_ctx - 4, params.n_keep);
|
309
395
|
|
310
|
-
|
311
|
-
|
396
|
+
// Handle truncation if needed
|
397
|
+
if (num_prompt_tokens >= (size_t)n_ctx) {
|
398
|
+
if (!params.ctx_shift) {
|
399
|
+
context_full = true;
|
400
|
+
return;
|
401
|
+
}
|
402
|
+
truncatePrompt(text_tokens);
|
403
|
+
num_prompt_tokens = text_tokens.size();
|
404
|
+
LM_GGML_ASSERT(num_prompt_tokens < (size_t)n_ctx);
|
405
|
+
}
|
312
406
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
407
|
+
// Update sampling context
|
408
|
+
for (auto & token : text_tokens) {
|
409
|
+
common_sampler_accept(ctx_sampling, token, false);
|
410
|
+
}
|
317
411
|
|
412
|
+
// compare the evaluated prompt with the new prompt
|
413
|
+
n_past = common_part(embd, text_tokens);
|
318
414
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
415
|
+
embd = text_tokens;
|
416
|
+
if (n_past == num_prompt_tokens) {
|
417
|
+
// we have to evaluate at least 1 token to generate logits.
|
418
|
+
n_past--;
|
419
|
+
}
|
324
420
|
|
325
|
-
|
326
|
-
|
421
|
+
// Manage KV cache
|
422
|
+
llama_kv_self_seq_rm(ctx, 0, n_past, -1);
|
327
423
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
424
|
+
LOG_VERBOSE("prompt ingested, n_past: %d, cached: %s, to_eval: %s",
|
425
|
+
n_past,
|
426
|
+
tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past).c_str(),
|
427
|
+
tokens_to_str(ctx, embd.cbegin() + n_past, embd.cend()).c_str()
|
428
|
+
);
|
429
|
+
} else {
|
430
|
+
// Multimodal path - process all media paths
|
431
|
+
processMedia(params.prompt, media_paths);
|
432
|
+
num_prompt_tokens = embd.size();
|
333
433
|
}
|
334
434
|
|
335
|
-
// since #3228 we now have to manually manage the KV cache
|
336
|
-
llama_kv_self_seq_rm(ctx, 0, n_past, -1);
|
337
|
-
|
338
|
-
LOG_VERBOSE("prompt ingested, n_past: %d, cached: %s, to_eval: %s",
|
339
|
-
n_past,
|
340
|
-
tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past).c_str(),
|
341
|
-
tokens_to_str(ctx, embd.cbegin() + n_past, embd.cend()).c_str()
|
342
|
-
);
|
343
|
-
|
344
435
|
has_next_token = true;
|
436
|
+
|
437
|
+
LOG_INFO("[DEBUG] Input processed: n_past=%d, embd.size=%zu, num_prompt_tokens=%zu, has_media=%d",
|
438
|
+
n_past, embd.size(), num_prompt_tokens, has_media ? 1 : 0);
|
345
439
|
}
|
346
440
|
|
347
441
|
void llama_rn_context::beginCompletion() {
|
@@ -351,6 +445,10 @@ void llama_rn_context::beginCompletion() {
|
|
351
445
|
is_predicting = true;
|
352
446
|
}
|
353
447
|
|
448
|
+
void llama_rn_context::endCompletion() {
|
449
|
+
is_predicting = false;
|
450
|
+
}
|
451
|
+
|
354
452
|
completion_token_output llama_rn_context::nextToken()
|
355
453
|
{
|
356
454
|
completion_token_output result;
|
@@ -358,6 +456,14 @@ completion_token_output llama_rn_context::nextToken()
|
|
358
456
|
|
359
457
|
if (embd.size() >= (size_t)params.n_ctx)
|
360
458
|
{
|
459
|
+
if (!params.ctx_shift) {
|
460
|
+
// If context shifting is disabled, stop generation
|
461
|
+
LOG_WARNING("context full, n_ctx: %d, tokens: %d", params.n_ctx, embd.size());
|
462
|
+
has_next_token = false;
|
463
|
+
context_full = true;
|
464
|
+
return result;
|
465
|
+
}
|
466
|
+
|
361
467
|
// Shift context
|
362
468
|
|
363
469
|
const int n_left = n_past - params.n_keep - 1;
|
@@ -373,12 +479,9 @@ completion_token_output llama_rn_context::nextToken()
|
|
373
479
|
embd.resize(embd.size() - n_discard);
|
374
480
|
|
375
481
|
n_past -= n_discard;
|
482
|
+
truncated = true;
|
376
483
|
|
377
|
-
LOG_VERBOSE("
|
378
|
-
params.n_ctx,
|
379
|
-
params.n_keep,
|
380
|
-
n_left
|
381
|
-
);
|
484
|
+
LOG_VERBOSE("context shifted, new n_past: %d, new size: %d", n_past, embd.size());
|
382
485
|
}
|
383
486
|
|
384
487
|
bool tg = true;
|
@@ -677,7 +780,7 @@ std::string llama_rn_context::bench(int pp, int tg, int pl, int nr)
|
|
677
780
|
}
|
678
781
|
|
679
782
|
if (is_interrupted) llama_kv_self_clear(ctx);
|
680
|
-
|
783
|
+
endCompletion();
|
681
784
|
|
682
785
|
char model_desc[128];
|
683
786
|
llama_model_desc(model, model_desc, sizeof(model_desc));
|
@@ -712,162 +815,471 @@ void llama_rn_context::removeLoraAdapters() {
|
|
712
815
|
std::vector<common_adapter_lora_info> llama_rn_context::getLoadedLoraAdapters() {
|
713
816
|
return this->lora;
|
714
817
|
}
|
715
|
-
std::vector<int> llama_rn_context::longest_common_subseq(const std::vector<int> x, const std::vector<int> y){
|
716
|
-
int m = x.size(), n = y.size();
|
717
|
-
|
718
|
-
//int LCSuff[m+1][n+1];
|
719
|
-
std::vector<std::vector<int>> LCSuff(m+1, std::vector<int>(n+1));
|
720
|
-
|
721
|
-
for (int j = 0; j <= n; j++)
|
722
|
-
LCSuff[0][j] = 0;
|
723
|
-
for (int i = 0; i <= m; i++)
|
724
|
-
LCSuff[i][0] = 0;
|
725
|
-
|
726
|
-
for (int i = 1; i <= m; i++)
|
727
|
-
{
|
728
|
-
for (int j = 1; j <= n; j++)
|
729
|
-
{
|
730
|
-
if (x[i - 1] == y[j - 1])
|
731
|
-
LCSuff[i][j] = LCSuff[i - 1][j - 1] + 1;
|
732
|
-
else
|
733
|
-
LCSuff[i][j] = 0;
|
734
|
-
}
|
735
|
-
}
|
736
|
-
|
737
|
-
std::vector<int> longest;
|
738
|
-
for (int i = 1; i <= m; i++)
|
739
|
-
{
|
740
|
-
for (int j = 1; j <= n; j++)
|
741
|
-
{
|
742
|
-
if (LCSuff[i][j] > longest.size())
|
743
|
-
{
|
744
|
-
auto off1 = ((i - LCSuff[i][j] + 1) - 1);
|
745
|
-
auto off2 = off1 + LCSuff[i][j];
|
746
|
-
longest.clear();
|
747
|
-
// std::vector<int>().swap(longest);
|
748
|
-
longest = std::vector<int>(x.begin() + off1, x.begin() + off2);
|
749
|
-
// x.substr((i - LCSuff[i][j] + 1) - 1, LCSuff[i][j]);
|
750
|
-
}
|
751
|
-
}
|
752
|
-
}
|
753
|
-
return longest;
|
754
|
-
}
|
755
|
-
|
756
|
-
bool llama_rn_context::arr_start_with(const std::vector<int> targetArray, const std::vector<int> searchSeq)
|
757
|
-
{
|
758
|
-
int ss = searchSeq.size();
|
759
|
-
if(targetArray.size()<ss)
|
760
|
-
{
|
761
|
-
return false;
|
762
|
-
}
|
763
|
-
for(int i=0;i<ss;++i)
|
764
|
-
{
|
765
|
-
if(targetArray[i]!=searchSeq[i])
|
766
|
-
{
|
767
|
-
return false;
|
768
|
-
}
|
769
|
-
}
|
770
|
-
return true;
|
771
|
-
}
|
772
|
-
|
773
|
-
int llama_rn_context::arr_find_index_of(const std::vector<int> targetArray, const std::vector<int> searchSeq)
|
774
|
-
{
|
775
|
-
int ss = searchSeq.size();
|
776
|
-
int tas = targetArray.size();
|
777
|
-
if(tas<ss)
|
778
|
-
{
|
779
|
-
return -1;
|
780
|
-
}
|
781
|
-
for(int i=0;i<tas;++i)
|
782
|
-
{
|
783
|
-
int srch = 0;
|
784
|
-
bool fail = false;
|
785
|
-
for(int srch=0;srch<ss;++srch)
|
786
|
-
{
|
787
|
-
if ((i + srch) >= tas || targetArray[i + srch] != searchSeq[srch])
|
788
|
-
{
|
789
|
-
fail = true;
|
790
|
-
break;
|
791
|
-
}
|
792
|
-
}
|
793
|
-
if(!fail)
|
794
|
-
{
|
795
|
-
return i;
|
796
|
-
}
|
797
|
-
}
|
798
|
-
return -1;
|
799
|
-
}
|
800
|
-
|
801
|
-
void llama_rn_context::purge_missing_tokens(llama_context * ctx, std::vector<int> ¤t_context_tokens, std::vector<int> &new_context_tokens, const int genamt, const int nctx)
|
802
|
-
{
|
803
|
-
//scan from start old and new ctx, until first mismatch found, save as p0
|
804
|
-
//check remaining old and new ctx for longest common subseq, which needs to be at 256 tokens
|
805
|
-
//test: longest common subseq (LCQ) MUST start within 0 tokens from end of memory, otherwise purge fails
|
806
|
-
//if passed, save beginning of LCQ from old ctx as p1
|
807
|
-
//remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal
|
808
818
|
|
809
|
-
|
810
|
-
|
819
|
+
bool llama_rn_context::initMultimodal(const std::string &mmproj_path, bool use_gpu) {
|
820
|
+
LOG_INFO("[DEBUG] Initializing multimodal with mmproj path: %s", mmproj_path.c_str());
|
811
821
|
|
812
|
-
|
813
|
-
|
814
|
-
|
822
|
+
if (model == nullptr) {
|
823
|
+
LOG_ERROR("[DEBUG] Model not loaded, cannot initialize multimodal", "");
|
824
|
+
return false;
|
825
|
+
}
|
815
826
|
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
827
|
+
LOG_INFO("[DEBUG] Model info: n_ctx=%d, n_embd=%d",
|
828
|
+
llama_n_ctx(ctx),
|
829
|
+
llama_model_n_embd(model));
|
830
|
+
|
831
|
+
// Initialize mtmd context
|
832
|
+
mtmd_context_params mtmd_params = mtmd_context_params_default();
|
833
|
+
mtmd_params.use_gpu = use_gpu;
|
834
|
+
mtmd_params.print_timings = false;
|
835
|
+
mtmd_params.n_threads = params.cpuparams.n_threads;
|
836
|
+
mtmd_params.verbosity = (lm_ggml_log_level)LM_GGML_LOG_LEVEL_INFO;
|
837
|
+
|
838
|
+
LOG_INFO("[DEBUG] Initializing mtmd context with threads=%d", mtmd_params.n_threads);
|
839
|
+
|
840
|
+
auto mtmd_ctx = mtmd_init_from_file(mmproj_path.c_str(), model, mtmd_params);
|
841
|
+
if (mtmd_ctx == nullptr) {
|
842
|
+
LOG_ERROR("[DEBUG] Failed to initialize multimodal context with mmproj: %s", mmproj_path.c_str());
|
843
|
+
return false;
|
844
|
+
}
|
845
|
+
mtmd_wrapper = new llama_rn_context_mtmd();
|
846
|
+
mtmd_wrapper->mtmd_ctx = mtmd_ctx;
|
847
|
+
|
848
|
+
has_multimodal = true;
|
849
|
+
|
850
|
+
// Check if the model uses M-RoPE or non-causal attention
|
851
|
+
bool uses_mrope = mtmd_decode_use_mrope(mtmd_ctx);
|
852
|
+
bool uses_non_causal = mtmd_decode_use_non_causal(mtmd_ctx);
|
853
|
+
LOG_INFO("[DEBUG] Model multimodal properties: uses_mrope=%d, uses_non_causal=%d",
|
854
|
+
uses_mrope ? 1 : 0,
|
855
|
+
uses_non_causal ? 1 : 0);
|
856
|
+
|
857
|
+
// Disable context shifting when multimodal is enabled
|
858
|
+
// This is because an media chunk may contain multiple tokens
|
859
|
+
// and context shifting could break the media representation
|
860
|
+
params.ctx_shift = false;
|
861
|
+
|
862
|
+
// params.n_cache_reuse = 0;
|
863
|
+
|
864
|
+
LOG_INFO("Multimodal context initialized successfully with mmproj: %s", mmproj_path.c_str());
|
865
|
+
LOG_INFO("Context shifting disabled for multimodal support");
|
866
|
+
return true;
|
867
|
+
}
|
868
|
+
|
869
|
+
struct mtmd_tokenize_result {
|
870
|
+
std::vector<std::string> bitmap_hashes;
|
871
|
+
std::vector<llama_token> tokens;
|
872
|
+
std::vector<size_t> chunk_pos; // both text and media
|
873
|
+
std::vector<size_t> chunk_pos_media; // media only
|
874
|
+
mtmd_input_chunks* chunks = nullptr;
|
875
|
+
};
|
876
|
+
|
877
|
+
mtmd_tokenize_result tokenizeWithMedia(llama_rn_context_mtmd *mtmd_wrapper, const std::string &prompt, const std::vector<std::string> &media_paths) {
|
878
|
+
mtmd_tokenize_result result;
|
879
|
+
mtmd::bitmaps bitmaps;
|
880
|
+
|
881
|
+
// Load all media paths
|
882
|
+
for (const auto& media_path : media_paths) {
|
883
|
+
LOG_INFO("[DEBUG] Loading media: %s",
|
884
|
+
media_path.substr(0, 50).c_str()); // Only log part of path for base64
|
885
|
+
|
886
|
+
// Check if it's a base64 media
|
887
|
+
if (media_path.compare(0, 11, "data:image/") == 0 || media_path.compare(0, 11, "data:audio/") == 0) {
|
888
|
+
LOG_INFO("[DEBUG] Detected base64 encoded media");
|
889
|
+
|
890
|
+
// Parse base64 data
|
891
|
+
std::vector<std::string> parts;
|
892
|
+
size_t comma_pos = media_path.find(',');
|
893
|
+
if (comma_pos == std::string::npos) {
|
894
|
+
throw std::runtime_error("Invalid base64 media format, missing comma separator");
|
895
|
+
}
|
896
|
+
|
897
|
+
std::string header = media_path.substr(0, comma_pos);
|
898
|
+
std::string base64_data = media_path.substr(comma_pos + 1);
|
899
|
+
|
900
|
+
if (header.find("base64") == std::string::npos) {
|
901
|
+
bitmaps.entries.clear();
|
902
|
+
throw std::runtime_error("Image must be base64 encoded");
|
903
|
+
}
|
904
|
+
|
905
|
+
// Decode base64
|
906
|
+
std::vector<uint8_t> media_data = base64_decode(base64_data);
|
907
|
+
LOG_INFO("[DEBUG] Base64 decoded, size: %zu bytes", media_data.size());
|
908
|
+
|
909
|
+
// Load bitmap from memory buffer using direct initialization
|
910
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(), media_data.size()));
|
911
|
+
if (!bmp.ptr) {
|
912
|
+
bitmaps.entries.clear();
|
913
|
+
throw std::runtime_error("Failed to load base64 media");
|
914
|
+
}
|
915
|
+
|
916
|
+
// Calculate bitmap hash (for KV caching)
|
917
|
+
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
918
|
+
bmp.set_id(hash.c_str());
|
919
|
+
LOG_INFO("[DEBUG] Bitmap hash: %s", hash.c_str());
|
920
|
+
bitmaps.entries.push_back(std::move(bmp));
|
921
|
+
result.bitmap_hashes.push_back(hash.c_str());
|
922
|
+
} else if (media_path.compare(0, 7, "http://") == 0 || media_path.compare(0, 8, "https://") == 0) {
|
923
|
+
// HTTP URLs are not supported yet
|
924
|
+
LOG_ERROR("[DEBUG] HTTP/HTTPS URLs are not supported yet: %s", media_path.c_str());
|
925
|
+
throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
|
926
|
+
} else {
|
927
|
+
// Regular file path
|
928
|
+
LOG_INFO("[DEBUG] Loading media from file");
|
929
|
+
|
930
|
+
// Check if file exists
|
931
|
+
FILE* file = fopen(media_path.c_str(), "rb");
|
932
|
+
if (file == nullptr) {
|
933
|
+
bitmaps.entries.clear();
|
934
|
+
throw std::runtime_error("File does not exist or cannot be opened");
|
935
|
+
}
|
936
|
+
|
937
|
+
// Get file size
|
938
|
+
fseek(file, 0, SEEK_END);
|
939
|
+
long file_size = ftell(file);
|
940
|
+
fseek(file, 0, SEEK_SET);
|
941
|
+
LOG_INFO("[DEBUG] File exists and size is %ld bytes", file_size);
|
942
|
+
fclose(file);
|
943
|
+
|
944
|
+
// Create bitmap directly
|
945
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
|
946
|
+
if (!bmp.ptr) {
|
947
|
+
bitmaps.entries.clear();
|
948
|
+
throw std::runtime_error("Failed to load media");
|
949
|
+
}
|
950
|
+
|
951
|
+
// Calculate bitmap hash (for KV caching)
|
952
|
+
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
953
|
+
bmp.set_id(hash.c_str());
|
954
|
+
LOG_INFO("[DEBUG] Bitmap hash: %s", hash.c_str());
|
955
|
+
bitmaps.entries.push_back(std::move(bmp));
|
956
|
+
result.bitmap_hashes.push_back(hash.c_str());
|
821
957
|
}
|
822
|
-
|
823
|
-
|
958
|
+
}
|
959
|
+
|
960
|
+
// Create input chunks
|
961
|
+
LOG_INFO("[DEBUG] Initializing input chunks");
|
962
|
+
result.chunks = mtmd_input_chunks_init();
|
963
|
+
if (result.chunks == nullptr) {
|
964
|
+
bitmaps.entries.clear();
|
965
|
+
throw std::runtime_error("Failed to initialize input chunks");
|
966
|
+
}
|
967
|
+
|
968
|
+
mtmd_input_text input_text;
|
969
|
+
input_text.text = prompt.c_str(); // Use the full prompt with image marker
|
970
|
+
input_text.add_special = true; // Add BOS token if this is the first message
|
971
|
+
input_text.parse_special = true; // Parse special tokens like <__media__>
|
972
|
+
|
973
|
+
/**
|
974
|
+
* Tokenize the text and media together.
|
975
|
+
*
|
976
|
+
* Example of tokenization for "foo bar <__media__> baz <__media__>":
|
977
|
+
*
|
978
|
+
* 1. Input text with media markers:
|
979
|
+
*
|
980
|
+
* "foo bar <__media__> baz <__media__>"
|
981
|
+
*
|
982
|
+
* 2. Model-specific markers are added.
|
983
|
+
*
|
984
|
+
* 3. Text is split and tokenized into chunks:
|
985
|
+
*
|
986
|
+
* ┌─────────────┐ ┌─────────────────────────┐ ┌─────────┐ ┌─────────────────────────┐
|
987
|
+
* │ TEXT CHUNK │ │ IMAGE CHUNK │ │ TEXT │ │ IMAGE CHUNK │
|
988
|
+
* │ "foo bar " │ │ │ │ " baz " │ │ │
|
989
|
+
* └─────────────┘ └─────────────────────────┘ └─────────┘ └─────────────────────────┘
|
990
|
+
* │ │ │ │
|
991
|
+
* ▼ ▼ ▼ ▼
|
992
|
+
* ┌─────────────┐ ┌─────────────────────────┐ ┌─────────┐ ┌─────────────────────────┐
|
993
|
+
* │ [1234,5678] │ │ Image Data Structure │ │ [9012] │ │ Image Data Structure │
|
994
|
+
* └─────────────┘ └─────────────────────────┘ └─────────┘ └─────────────────────────┘
|
995
|
+
*
|
996
|
+
* 4. Image token structure differences:
|
997
|
+
*
|
998
|
+
* For Qwen2VL (uses M-RoPE with 2D positions):
|
999
|
+
* ┌─────────────────────────────────────────┐
|
1000
|
+
* │ MEDIA_CHUNK │
|
1001
|
+
* │ ┌───────────────────────────────────┐ │
|
1002
|
+
* │ │ mtmd_image_tokens: │ │
|
1003
|
+
* │ │ nx = 16, ny = 16 │ │ ← 2D grid (16×16 = 256 tokens)
|
1004
|
+
* │ │ use_mrope_pos = true │ │ ← Uses M-RoPE positioning
|
1005
|
+
* │ │ batch_f32 = [image_embeddings] │ │
|
1006
|
+
* │ └───────────────────────────────────┘ │
|
1007
|
+
* └─────────────────────────────────────────┘
|
1008
|
+
*
|
1009
|
+
* For other models (uses 1D positions):
|
1010
|
+
* ┌─────────────────────────────────────────┐
|
1011
|
+
* │ MEDIA_CHUNK │
|
1012
|
+
* │ ┌───────────────────────────────────┐ │
|
1013
|
+
* │ │ mtmd_image_tokens: │ │
|
1014
|
+
* │ │ nx = 256, ny = 1 │ │ ← 1D sequence (256 tokens)
|
1015
|
+
* │ │ use_mrope_pos = false │ │ ← Uses standard positioning
|
1016
|
+
* │ │ batch_f32 = [image_embeddings] │ │
|
1017
|
+
* │ └───────────────────────────────────┘ │
|
1018
|
+
* └─────────────────────────────────────────┘
|
1019
|
+
*
|
1020
|
+
* 5. Final chunks array:
|
1021
|
+
* chunks[0] = TEXT_CHUNK([1234, 5678])
|
1022
|
+
* chunks[1] = MEDIA_CHUNK(first_image)
|
1023
|
+
* chunks[2] = TEXT_CHUNK([9012])
|
1024
|
+
* chunks[3] = MEDIA_CHUNK(second_image)
|
1025
|
+
*/
|
1026
|
+
LOG_INFO("[DEBUG] Tokenizing text and %zu media", bitmaps.entries.size());
|
1027
|
+
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
1028
|
+
int32_t res = mtmd_tokenize(mtmd_wrapper->mtmd_ctx, result.chunks, &input_text, bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
|
1029
|
+
if (res != 0) {
|
1030
|
+
mtmd_input_chunks_free(result.chunks);
|
1031
|
+
bitmaps.entries.clear();
|
1032
|
+
throw std::runtime_error("Failed to tokenize text and media");
|
1033
|
+
}
|
1034
|
+
|
1035
|
+
// Log chunk information
|
1036
|
+
size_t num_chunks = mtmd_input_chunks_size(result.chunks);
|
1037
|
+
LOG_INFO("[DEBUG] Tokenization successful: num_chunks=%zu", num_chunks);
|
1038
|
+
|
1039
|
+
// Track the total number of tokens (both text and image)
|
1040
|
+
size_t total_token_count = 0;
|
1041
|
+
|
1042
|
+
/**
|
1043
|
+
* Evaluate the chunks.
|
1044
|
+
*
|
1045
|
+
* For our example "foo bar <__media__> baz <__media__>":
|
1046
|
+
*
|
1047
|
+
* Token organization in memory:
|
1048
|
+
*
|
1049
|
+
* all_tokens: [t0][t1][NULL][NULL]...[NULL][t2][NULL][NULL]...[NULL]
|
1050
|
+
* positions: 0 1 2 3 ... 257 258 259 260 ... 514
|
1051
|
+
* chunk_pos: 0 2 258 259
|
1052
|
+
*
|
1053
|
+
* Where:
|
1054
|
+
* - [t0][t1] are text tokens for "foo bar " (positions 0-1)
|
1055
|
+
* - [NULL]x256 are placeholder tokens for the first image (positions 2-257)
|
1056
|
+
* - [t2] is the text token for " baz " (position 258)
|
1057
|
+
* - [NULL]x256 are placeholder tokens for the second image (positions 259-514)
|
1058
|
+
*/
|
1059
|
+
for (size_t i = 0; i < num_chunks; i++) {
|
1060
|
+
result.chunk_pos.push_back(total_token_count);
|
1061
|
+
|
1062
|
+
const mtmd_input_chunk* chunk = mtmd_input_chunks_get(result.chunks, i);
|
1063
|
+
mtmd_input_chunk_type chunk_type = mtmd_input_chunk_get_type(chunk);
|
1064
|
+
|
1065
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
1066
|
+
size_t n_tokens;
|
1067
|
+
const llama_token* tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
1068
|
+
LOG_INFO("[DEBUG] Chunk %zu: type=TEXT, n_tokens=%zu", i, n_tokens);
|
1069
|
+
|
1070
|
+
// Add text tokens
|
1071
|
+
result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
|
1072
|
+
total_token_count += n_tokens;
|
1073
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
1074
|
+
result.chunk_pos_media.push_back(total_token_count);
|
1075
|
+
|
1076
|
+
size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
|
1077
|
+
size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
|
1078
|
+
LOG_INFO("[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu",
|
1079
|
+
i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO", n_tokens, n_pos);
|
1080
|
+
|
1081
|
+
for (size_t j = 0; j < n_pos; j++) {
|
1082
|
+
result.tokens.push_back(LLAMA_TOKEN_NULL); // Placeholder token
|
1083
|
+
}
|
1084
|
+
total_token_count += n_pos;
|
1085
|
+
}
|
1086
|
+
}
|
1087
|
+
|
1088
|
+
bitmaps.entries.clear();
|
1089
|
+
|
1090
|
+
return result;
|
1091
|
+
}
|
1092
|
+
void llama_rn_context::processMedia(
|
1093
|
+
const std::string &prompt,
|
1094
|
+
const std::vector<std::string> &media_paths
|
1095
|
+
) {
|
1096
|
+
if (!isMultimodalEnabled()) {
|
1097
|
+
throw std::runtime_error("Multimodal is not enabled but image paths are provided");
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
// Multimodal path
|
1101
|
+
std::string full_prompt = prompt;
|
1102
|
+
auto default_media_marker = mtmd_default_marker();
|
1103
|
+
// Add media marker if it doesn't already exist
|
1104
|
+
if (full_prompt.find(default_media_marker) == std::string::npos) {
|
1105
|
+
full_prompt += " ";
|
1106
|
+
full_prompt += default_media_marker;
|
1107
|
+
}
|
1108
|
+
|
1109
|
+
LOG_INFO("[DEBUG] Processing message with role=user, content=%s", full_prompt.c_str());
|
1110
|
+
LOG_INFO("[DEBUG] Processing %zu media with prompt: %s", media_paths.size(), prompt.c_str());
|
1111
|
+
LOG_INFO("[DEBUG] Current context state: n_past=%d, n_ctx=%d", n_past, n_ctx);
|
1112
|
+
|
1113
|
+
auto result = tokenizeWithMedia(mtmd_wrapper, full_prompt, media_paths);
|
1114
|
+
|
1115
|
+
auto all_tokens = result.tokens;
|
1116
|
+
auto chunks = result.chunks;
|
1117
|
+
auto chunk_pos = result.chunk_pos;
|
1118
|
+
auto chunk_pos_media = result.chunk_pos_media;
|
1119
|
+
auto bitmap_hashes = result.bitmap_hashes;
|
1120
|
+
|
1121
|
+
// Check if we have enough context space for all tokens
|
1122
|
+
if (all_tokens.size() >= (size_t)n_ctx) {
|
1123
|
+
mtmd_input_chunks_free(chunks);
|
1124
|
+
context_full = true;
|
1125
|
+
throw std::runtime_error("Not enough context space");
|
1126
|
+
}
|
1127
|
+
|
1128
|
+
n_past = common_part(embd, all_tokens);
|
1129
|
+
|
1130
|
+
llama_pos new_n_past = n_past;
|
1131
|
+
|
1132
|
+
// Adjust n_past to position of the text chunk
|
1133
|
+
// TODO: Edit the text chunk to remove the tokens before n_past to speed up
|
1134
|
+
// need to update the mtmd api
|
1135
|
+
auto adjusted_n_past = -1;
|
1136
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
1137
|
+
if (n_past < chunk_pos[i]) {
|
824
1138
|
break;
|
825
1139
|
}
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
1140
|
+
bool is_end = i + 1 == chunk_pos.size();
|
1141
|
+
if (
|
1142
|
+
chunk_pos[i] < n_past &&
|
1143
|
+
(!is_end && chunk_pos[i + 1] > n_past)
|
1144
|
+
// is_end & n_past < total_token_count:
|
1145
|
+
// don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
|
1146
|
+
) {
|
1147
|
+
adjusted_n_past = chunk_pos[i];
|
830
1148
|
}
|
831
1149
|
}
|
1150
|
+
if (adjusted_n_past != -1) {
|
1151
|
+
n_past = adjusted_n_past;
|
1152
|
+
new_n_past = n_past;
|
1153
|
+
LOG_INFO("[DEBUG] Adjusted n_past to %d", n_past);
|
1154
|
+
}
|
832
1155
|
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
1156
|
+
// Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
|
1157
|
+
if (mtmd_bitmap_past_hashes.size() > 0) {
|
1158
|
+
for (size_t i = 0; i < bitmap_hashes.size(); i++) {
|
1159
|
+
auto pos = chunk_pos_media[i];
|
1160
|
+
if (n_past < pos) {
|
1161
|
+
break;
|
1162
|
+
}
|
1163
|
+
if (i >= mtmd_bitmap_past_hashes.size()) {
|
1164
|
+
break;
|
1165
|
+
}
|
1166
|
+
if (bitmap_hashes[i] != mtmd_bitmap_past_hashes[i]) {
|
1167
|
+
LOG_INFO(
|
1168
|
+
"[DEBUG] Bitmap hash mismatch at position %zu, %s != %s",
|
1169
|
+
i, bitmap_hashes[i].c_str(), mtmd_bitmap_past_hashes[i].c_str()
|
1170
|
+
);
|
1171
|
+
n_past = chunk_pos_media[i];
|
1172
|
+
new_n_past = n_past;
|
1173
|
+
break;
|
1174
|
+
}
|
1175
|
+
}
|
839
1176
|
}
|
840
1177
|
|
841
|
-
//
|
842
|
-
|
1178
|
+
// Clear all KV cache entries after position n_past
|
1179
|
+
llama_kv_self_seq_rm(ctx, 0, n_past, -1);
|
843
1180
|
|
844
|
-
|
845
|
-
auto new_ctx_without_memory = std::vector<int>(new_context_tokens.begin() + trimstart, new_context_tokens.end());
|
1181
|
+
LOG_INFO("[DEBUG] Evaluating chunks: n_past=%d, n_batch=%d", n_past, params.n_batch);
|
846
1182
|
|
847
|
-
|
1183
|
+
size_t num_chunks = mtmd_input_chunks_size(chunks);
|
848
1184
|
|
849
|
-
|
850
|
-
{
|
851
|
-
int found = arr_find_index_of(current_context_tokens,shared);
|
852
|
-
if(found>=0 && found > trimstart)
|
853
|
-
{
|
1185
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
854
1186
|
|
855
|
-
|
856
|
-
int diff = found - trimstart;
|
857
|
-
llama_kv_self_seq_rm(ctx, 0, trimstart, trimstart + diff);
|
858
|
-
llama_kv_self_seq_add(ctx, 0, trimstart + diff, -1, -diff);
|
1187
|
+
LOG_INFO("[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu", i, n_past, chunk_pos[i]);
|
859
1188
|
|
860
|
-
|
861
|
-
|
862
|
-
|
1189
|
+
// Process chunk only if it's after the current n_past
|
1190
|
+
if (chunk_pos[i] >= n_past) {
|
1191
|
+
bool chunk_logits_last = (i == num_chunks - 1);
|
1192
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
1193
|
+
|
1194
|
+
int32_t res = mtmd_helper_eval_chunk_single(
|
1195
|
+
mtmd_wrapper->mtmd_ctx,
|
1196
|
+
ctx,
|
1197
|
+
chunk,
|
1198
|
+
n_past,
|
1199
|
+
0,
|
1200
|
+
params.n_batch,
|
1201
|
+
chunk_logits_last,
|
1202
|
+
&new_n_past
|
1203
|
+
);
|
1204
|
+
if (res != 0) {
|
1205
|
+
mtmd_input_chunks_free(chunks);
|
1206
|
+
throw std::runtime_error("Failed to evaluate chunks");
|
863
1207
|
}
|
1208
|
+
n_past = new_n_past;
|
1209
|
+
}
|
1210
|
+
}
|
864
1211
|
|
865
|
-
|
1212
|
+
if (n_past == all_tokens.size() && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
|
1213
|
+
// we have to evaluate at least 1 token to generate logits.
|
1214
|
+
n_past--;
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
// Update embd with all tokens (both text and media)
|
1218
|
+
embd = all_tokens;
|
1219
|
+
|
1220
|
+
mtmd_bitmap_past_hashes = bitmap_hashes;
|
1221
|
+
|
1222
|
+
// Update sampling context with text tokens only
|
1223
|
+
for (auto & token : all_tokens) {
|
1224
|
+
if (token == LLAMA_TOKEN_NULL) {
|
1225
|
+
continue;
|
1226
|
+
}
|
1227
|
+
common_sampler_accept(ctx_sampling, token, false);
|
1228
|
+
}
|
866
1229
|
|
867
|
-
|
1230
|
+
// Clean up media resources
|
1231
|
+
LOG_INFO("[DEBUG] Cleaning up resources");
|
1232
|
+
mtmd_input_chunks_free(chunks);
|
1233
|
+
}
|
1234
|
+
|
1235
|
+
llama_rn_tokenize_result llama_rn_context::tokenize(const std::string &text, const std::vector<std::string> &media_paths) {
|
1236
|
+
if (media_paths.size() > 0) {
|
1237
|
+
if (!isMultimodalEnabled()) {
|
1238
|
+
throw std::runtime_error("Multimodal is not enabled but media paths are provided");
|
868
1239
|
}
|
1240
|
+
auto result = tokenizeWithMedia(mtmd_wrapper, text, media_paths);
|
1241
|
+
mtmd_input_chunks_free(result.chunks);
|
1242
|
+
llama_rn_tokenize_result tokenize_result = {
|
1243
|
+
.tokens = result.tokens,
|
1244
|
+
.has_media = true,
|
1245
|
+
.bitmap_hashes = result.bitmap_hashes,
|
1246
|
+
.chunk_pos = result.chunk_pos,
|
1247
|
+
.chunk_pos_media = result.chunk_pos_media,
|
1248
|
+
};
|
1249
|
+
return tokenize_result;
|
869
1250
|
}
|
1251
|
+
std::vector<llama_token> text_tokens;
|
1252
|
+
text_tokens = common_tokenize(ctx, text, false);
|
1253
|
+
llama_rn_tokenize_result tokenize_result = {
|
1254
|
+
.tokens = text_tokens,
|
1255
|
+
.has_media = false,
|
1256
|
+
.bitmap_hashes = {},
|
1257
|
+
.chunk_pos = {},
|
1258
|
+
.chunk_pos_media = {},
|
1259
|
+
};
|
1260
|
+
return tokenize_result;
|
1261
|
+
}
|
1262
|
+
|
1263
|
+
bool llama_rn_context::isMultimodalEnabled() const {
|
1264
|
+
return has_multimodal && mtmd_wrapper != nullptr;
|
1265
|
+
}
|
870
1266
|
|
1267
|
+
bool llama_rn_context::isMultimodalSupportVision() const {
|
1268
|
+
return isMultimodalEnabled() && mtmd_support_vision(mtmd_wrapper->mtmd_ctx);
|
1269
|
+
}
|
1270
|
+
|
1271
|
+
bool llama_rn_context::isMultimodalSupportAudio() const {
|
1272
|
+
return isMultimodalEnabled() && mtmd_support_audio(mtmd_wrapper->mtmd_ctx);
|
1273
|
+
}
|
1274
|
+
|
1275
|
+
void llama_rn_context::releaseMultimodal() {
|
1276
|
+
if (mtmd_wrapper && mtmd_wrapper->mtmd_ctx != nullptr) {
|
1277
|
+
mtmd_free(mtmd_wrapper->mtmd_ctx);
|
1278
|
+
mtmd_wrapper->mtmd_ctx = nullptr;
|
1279
|
+
delete mtmd_wrapper;
|
1280
|
+
mtmd_wrapper = nullptr;
|
1281
|
+
has_multimodal = false;
|
1282
|
+
}
|
871
1283
|
}
|
872
1284
|
|
873
1285
|
}
|