@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#include "regex-partial.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include <functional>
|
|
4
|
+
#include <optional>
|
|
5
|
+
|
|
6
|
+
common_regex::common_regex(const std::string & pattern) :
|
|
7
|
+
pattern(pattern),
|
|
8
|
+
rx(pattern),
|
|
9
|
+
rx_reversed_partial(regex_to_reversed_partial_regex(pattern)) {}
|
|
10
|
+
|
|
11
|
+
common_regex_match common_regex::search(const std::string & input, size_t pos, bool as_match) const {
|
|
12
|
+
std::smatch match;
|
|
13
|
+
if (pos > input.size()) {
|
|
14
|
+
throw std::runtime_error("Position out of bounds");
|
|
15
|
+
}
|
|
16
|
+
auto start = input.begin() + pos;
|
|
17
|
+
auto found = as_match
|
|
18
|
+
? std::regex_match(start, input.end(), match, rx)
|
|
19
|
+
: std::regex_search(start, input.end(), match, rx);
|
|
20
|
+
if (found) {
|
|
21
|
+
common_regex_match res;
|
|
22
|
+
res.type = COMMON_REGEX_MATCH_TYPE_FULL;
|
|
23
|
+
for (size_t i = 0; i < match.size(); ++i) {
|
|
24
|
+
auto begin = pos + match.position(i);
|
|
25
|
+
res.groups.emplace_back(begin, begin + match.length(i));
|
|
26
|
+
}
|
|
27
|
+
return res;
|
|
28
|
+
}
|
|
29
|
+
std::match_results<std::string::const_reverse_iterator> srmatch;
|
|
30
|
+
if (std::regex_match(input.rbegin(), input.rend() - pos, srmatch, rx_reversed_partial)) {
|
|
31
|
+
auto group = srmatch[1].str();
|
|
32
|
+
if (group.length() != 0) {
|
|
33
|
+
auto it = srmatch[1].second.base();
|
|
34
|
+
// auto position = static_cast<size_t>(std::distance(input.begin(), it));
|
|
35
|
+
if ((!as_match) || it == input.begin()) {
|
|
36
|
+
common_regex_match res;
|
|
37
|
+
res.type = COMMON_REGEX_MATCH_TYPE_PARTIAL;
|
|
38
|
+
const size_t begin = std::distance(input.begin(), it);
|
|
39
|
+
const size_t end = input.size();
|
|
40
|
+
if (begin == std::string::npos || end == std::string::npos || begin > end) {
|
|
41
|
+
throw std::runtime_error("Invalid range");
|
|
42
|
+
}
|
|
43
|
+
res.groups.push_back({begin, end});
|
|
44
|
+
return res;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return {};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/*
|
|
52
|
+
Transforms a regex pattern to a partial match pattern that operates on a reversed input string to find partial final matches of the original pattern.
|
|
53
|
+
|
|
54
|
+
Ideally we'd like to use boost::match_partial (https://beta.boost.org/doc/libs/1_59_0/libs/regex/doc/html/boost_regex/partial_matches.html)
|
|
55
|
+
to see if a string ends with a partial regex match, but but it's not in std::regex yet.
|
|
56
|
+
Instead, we'll the regex into a partial match regex operating as a full match on the reverse iterators of the input.
|
|
57
|
+
|
|
58
|
+
- /abcd/ -> (dcba|cba|ba|a).* -> ((?:(?:(?:(?:d)?c)?b)?a).*
|
|
59
|
+
- /a|b/ -> (a|b).*
|
|
60
|
+
- /a*?/ -> error, could match ""
|
|
61
|
+
- /a*b/ -> ((?:b)?a*+).* (final repetitions become eager)
|
|
62
|
+
- /.*?ab/ -> ((?:b)?a).* (merge .*)
|
|
63
|
+
- /a.*?b/ -> ((?:b)?.*?a).* (keep reluctant matches)
|
|
64
|
+
- /a(bc)d/ -> ((?:(?:d)?(?:(?:c)?b))?a).*
|
|
65
|
+
- /a(bc|de)/ -> ((?:(?:(?:e)?d)?|(?:(?:c)?b)?)?a).*
|
|
66
|
+
- /ab{2,4}c/ -> abbb?b?c -> ((?:(?:(?:(?:(?:c)?b)?b)?b?)?b?)?a).*
|
|
67
|
+
|
|
68
|
+
The regex will match a reversed string fully, and the end of the first (And only) capturing group will indicate the reversed start of the original partial pattern
|
|
69
|
+
(i.e. just where the final .* starts in the inverted pattern; all other groups are turned into non-capturing groups, and reluctant quantifiers are ignored)
|
|
70
|
+
*/
|
|
71
|
+
std::string regex_to_reversed_partial_regex(const std::string & pattern) {
|
|
72
|
+
auto it = pattern.begin();
|
|
73
|
+
const auto end = pattern.end();
|
|
74
|
+
|
|
75
|
+
std::function<std::string()> process = [&]() {
|
|
76
|
+
std::vector<std::vector<std::string>> alternatives(1);
|
|
77
|
+
std::vector<std::string> * sequence = &alternatives.back();
|
|
78
|
+
|
|
79
|
+
while (it != end) {
|
|
80
|
+
if (*it == '[') {
|
|
81
|
+
auto start = it;
|
|
82
|
+
++it;
|
|
83
|
+
while (it != end) {
|
|
84
|
+
if ((*it == '\\') && (++it != end)) {
|
|
85
|
+
++it;
|
|
86
|
+
} else if ((it != end) && (*it == ']')) {
|
|
87
|
+
break;
|
|
88
|
+
} else {
|
|
89
|
+
++it;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (it == end) {
|
|
93
|
+
throw std::runtime_error("Unmatched '[' in pattern");
|
|
94
|
+
}
|
|
95
|
+
++it;
|
|
96
|
+
sequence->push_back(std::string(start, it));
|
|
97
|
+
} else if (*it == '*' || *it == '?' || *it == '+') {
|
|
98
|
+
if (sequence->empty()) {
|
|
99
|
+
throw std::runtime_error("Quantifier without preceding element");
|
|
100
|
+
}
|
|
101
|
+
sequence->back() += *it;
|
|
102
|
+
auto is_star = *it == '*';
|
|
103
|
+
++it;
|
|
104
|
+
if (is_star) {
|
|
105
|
+
if (*it == '?') {
|
|
106
|
+
++it;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
} else if (*it == '{') {
|
|
110
|
+
if (sequence->empty()) {
|
|
111
|
+
throw std::runtime_error("Repetition without preceding element");
|
|
112
|
+
}
|
|
113
|
+
++it;
|
|
114
|
+
auto start = it;
|
|
115
|
+
while (it != end && *it != '}') {
|
|
116
|
+
++it;
|
|
117
|
+
}
|
|
118
|
+
if (it == end) {
|
|
119
|
+
throw std::runtime_error("Unmatched '{' in pattern");
|
|
120
|
+
}
|
|
121
|
+
auto parts = string_split(std::string(start, it), ",");
|
|
122
|
+
++it;
|
|
123
|
+
if (parts.size() > 2) {
|
|
124
|
+
throw std::runtime_error("Invalid repetition range in pattern");
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
auto parseOptInt = [&](const std::string & s, const std::optional<int> & def = std::nullopt) -> std::optional<int> {
|
|
128
|
+
if (s.empty()) {
|
|
129
|
+
return def;
|
|
130
|
+
}
|
|
131
|
+
return std::stoi(s);
|
|
132
|
+
};
|
|
133
|
+
auto min = parseOptInt(parts[0], 0);
|
|
134
|
+
auto max = parts.size() == 1 ? min : parseOptInt(parts[1]);
|
|
135
|
+
if (min && max && *max < *min) {
|
|
136
|
+
throw std::runtime_error("Invalid repetition range in pattern");
|
|
137
|
+
}
|
|
138
|
+
// Brutal but... let's repeat at least min times, then ? for the delta between min & max (or * for unbounded)
|
|
139
|
+
auto part = sequence->back();
|
|
140
|
+
sequence->pop_back();
|
|
141
|
+
for (int i = 0; i < *min; i++) {
|
|
142
|
+
sequence->push_back(part);
|
|
143
|
+
}
|
|
144
|
+
if (max) {
|
|
145
|
+
for (int i = *min; i < *max; i++) {
|
|
146
|
+
sequence->push_back(part + "?");
|
|
147
|
+
}
|
|
148
|
+
} else {
|
|
149
|
+
sequence->push_back(part + "*");
|
|
150
|
+
}
|
|
151
|
+
} else if (*it == '(') {
|
|
152
|
+
++it;
|
|
153
|
+
if (it != end && *it == '?' && (it + 1 != end) && *(it + 1) == ':') {
|
|
154
|
+
it += 2;
|
|
155
|
+
}
|
|
156
|
+
auto sub = process();
|
|
157
|
+
if (*it != ')') {
|
|
158
|
+
throw std::runtime_error("Unmatched '(' in pattern");
|
|
159
|
+
}
|
|
160
|
+
++it;
|
|
161
|
+
auto & part = sequence->emplace_back("(?:");
|
|
162
|
+
part += sub;
|
|
163
|
+
part += ")";
|
|
164
|
+
} else if (*it == ')') {
|
|
165
|
+
break;
|
|
166
|
+
} else if (*it == '|') {
|
|
167
|
+
++it;
|
|
168
|
+
alternatives.emplace_back();
|
|
169
|
+
sequence = &alternatives.back();
|
|
170
|
+
} else if (*it == '\\' && (++it != end)) {
|
|
171
|
+
auto str = std::string("\\") + *it;
|
|
172
|
+
sequence->push_back(str);
|
|
173
|
+
++it;
|
|
174
|
+
} else if (it != end) {
|
|
175
|
+
sequence->push_back(std::string(1, *it));
|
|
176
|
+
++it;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// /abcd/ -> (dcba|cba|ba|a).* -> ((?:(?:(?:d)?c)?b)?a).*
|
|
181
|
+
// if n(=4) parts, opening n-1(=3) non-capturing groups after the 1 capturing group
|
|
182
|
+
// We'll do the outermost capturing group and final .* in the enclosing function.
|
|
183
|
+
std::vector<std::string> res_alts;
|
|
184
|
+
for (const auto & parts : alternatives) {
|
|
185
|
+
auto & res = res_alts.emplace_back();
|
|
186
|
+
for (size_t i = 0; i < parts.size() - 1; i++) {
|
|
187
|
+
res += "(?:";
|
|
188
|
+
}
|
|
189
|
+
for (auto it = parts.rbegin(); it != parts.rend(); ++it) {
|
|
190
|
+
res += *it;
|
|
191
|
+
if (it != parts.rend() - 1) {
|
|
192
|
+
res += ")?";
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return string_join(res_alts, "|");
|
|
197
|
+
};
|
|
198
|
+
auto res = process();
|
|
199
|
+
if (it != end) {
|
|
200
|
+
throw std::runtime_error("Unmatched '(' in pattern");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return "(" + res + ")[\\s\\S]*";
|
|
204
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <regex>
|
|
4
|
+
#include <string>
|
|
5
|
+
|
|
6
|
+
enum common_regex_match_type {
|
|
7
|
+
COMMON_REGEX_MATCH_TYPE_NONE,
|
|
8
|
+
COMMON_REGEX_MATCH_TYPE_PARTIAL,
|
|
9
|
+
COMMON_REGEX_MATCH_TYPE_FULL,
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
struct common_string_range {
|
|
13
|
+
size_t begin;
|
|
14
|
+
size_t end;
|
|
15
|
+
common_string_range(size_t begin, size_t end) : begin(begin), end(end) {
|
|
16
|
+
if (begin > end) {
|
|
17
|
+
throw std::runtime_error("Invalid range");
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
// prevent default ctor
|
|
21
|
+
common_string_range() = delete;
|
|
22
|
+
bool empty() const {
|
|
23
|
+
return begin == end;
|
|
24
|
+
}
|
|
25
|
+
bool operator==(const common_string_range & other) const {
|
|
26
|
+
return begin == other.begin && end == other.end;
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
struct common_regex_match {
|
|
31
|
+
common_regex_match_type type = COMMON_REGEX_MATCH_TYPE_NONE;
|
|
32
|
+
std::vector<common_string_range> groups;
|
|
33
|
+
|
|
34
|
+
bool operator==(const common_regex_match & other) const {
|
|
35
|
+
return type == other.type && groups == other.groups;
|
|
36
|
+
}
|
|
37
|
+
bool operator!=(const common_regex_match & other) const {
|
|
38
|
+
return !(*this == other);
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
class common_regex {
|
|
43
|
+
std::string pattern;
|
|
44
|
+
std::regex rx;
|
|
45
|
+
std::regex rx_reversed_partial;
|
|
46
|
+
|
|
47
|
+
public:
|
|
48
|
+
explicit common_regex(const std::string & pattern);
|
|
49
|
+
|
|
50
|
+
common_regex_match search(const std::string & input, size_t pos, bool as_match = false) const;
|
|
51
|
+
|
|
52
|
+
const std::string & str() const { return pattern; }
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// For testing only (pretty print of failures).
|
|
56
|
+
std::string regex_to_reversed_partial_regex(const std::string & pattern);
|
|
@@ -161,7 +161,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
161
161
|
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
|
162
162
|
#endif // LLAMA_USE_LLGUIDANCE
|
|
163
163
|
} else {
|
|
164
|
-
std::vector<std::string>
|
|
164
|
+
std::vector<std::string> trigger_patterns;
|
|
165
165
|
std::vector<std::string> patterns_anywhere;
|
|
166
166
|
std::vector<llama_token> trigger_tokens;
|
|
167
167
|
for (const auto & trigger : params.grammar_triggers) {
|
|
@@ -173,10 +173,13 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
173
173
|
break;
|
|
174
174
|
}
|
|
175
175
|
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
|
|
176
|
-
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START:
|
|
177
176
|
{
|
|
178
|
-
|
|
179
|
-
|
|
177
|
+
patterns_anywhere.push_back(trigger.value);
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
|
|
181
|
+
{
|
|
182
|
+
trigger_patterns.push_back(trigger.value);
|
|
180
183
|
break;
|
|
181
184
|
}
|
|
182
185
|
case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN:
|
|
@@ -190,10 +193,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
190
193
|
}
|
|
191
194
|
}
|
|
192
195
|
|
|
193
|
-
std::vector<std::string> trigger_patterns;
|
|
194
|
-
if (!patterns_at_start.empty()) {
|
|
195
|
-
trigger_patterns.push_back("^(" + string_join(patterns_at_start, "|") + ")[\\s\\S]*");
|
|
196
|
-
}
|
|
197
196
|
if (!patterns_anywhere.empty()) {
|
|
198
197
|
trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*");
|
|
199
198
|
}
|