whispercpp 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/ruby_whisper_params.c +55 -25
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/addon.cpp +19 -19
- data/ext/sources/examples/addon.node/index.js +7 -5
- data/ext/sources/examples/bench/bench.cpp +26 -16
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +4 -2
- data/ext/sources/examples/command/command.cpp +26 -24
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/lsp.cpp +19 -17
- data/ext/sources/examples/server/server.cpp +24 -13
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +4 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +2 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +588 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +58 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +103 -71
- data/ext/sources/examples/talk-llama/llama-batch.h +31 -18
- data/ext/sources/examples/talk-llama/llama-chat.cpp +120 -5
- data/ext/sources/examples/talk-llama/llama-chat.h +7 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +460 -357
- data/ext/sources/examples/talk-llama/llama-context.h +44 -29
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +543 -271
- data/ext/sources/examples/talk-llama/llama-graph.h +278 -168
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +118 -4
- data/ext/sources/examples/talk-llama/llama-hparams.h +61 -15
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/{llama-kv-cache-unified-iswa.h → llama-kv-cache-iswa.h} +38 -29
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2020 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +358 -27
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +80 -28
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +56 -36
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +30 -29
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +48 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +13 -14
- data/ext/sources/examples/talk-llama/llama-memory.h +16 -10
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +2 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +7165 -2336
- data/ext/sources/examples/talk-llama/llama-model.h +60 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +48 -10
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +440 -13
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -0
- data/ext/sources/examples/talk-llama/llama.cpp +65 -10
- data/ext/sources/examples/talk-llama/llama.h +95 -177
- data/ext/sources/examples/talk-llama/talk-llama.cpp +9 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +207 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +4 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +59 -31
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +17 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -1
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +221 -16
- data/ext/sources/ggml/src/CMakeLists.txt +17 -2
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +30 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +221 -38
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +143 -1
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +488 -69
- data/ext/sources/ggml/src/ggml-common.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +40 -18
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +4 -2
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +132 -596
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +14 -286
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +103 -582
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +162 -589
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +265 -437
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +3 -58
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +521 -353
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +54 -314
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +184 -675
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +4679 -1657
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +32 -2
- data/ext/sources/ggml/src/ggml-cpu/common.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +13 -6
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +70 -42
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +35 -28
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +227 -97
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +474 -1116
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1587 -1177
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +35 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +458 -47
- data/ext/sources/ggml/src/ggml-cpu/repack.h +22 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +89 -60
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/traits.cpp +2 -2
- data/ext/sources/ggml/src/ggml-cpu/traits.h +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +170 -26
- data/ext/sources/ggml/src/ggml-cpu/vec.h +506 -63
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +250 -63
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +95 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +498 -367
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +137 -91
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +86 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +379 -107
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +56 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cu → mmvf.cu} +53 -53
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +3 -3
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -100
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +21 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +90 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +8 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +10 -2
- data/ext/sources/ggml/src/ggml-impl.h +119 -9
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -7
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +136 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +2854 -1503
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +18 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +2510 -242
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +177 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +49 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +111 -16
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +67 -47
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +15 -5
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +25 -16
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +10 -4
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +166 -99
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +72 -306
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +1 -31
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +79 -29
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +14 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +9 -6
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +328 -323
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +80 -60
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +201 -132
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +74 -55
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +8 -9
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +35 -42
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +16 -12
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +3492 -883
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +55 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -77
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +478 -98
- data/ext/sources/ggml/src/gguf.cpp +8 -1
- data/ext/sources/src/whisper.cpp +23 -46
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/lib/whisper/model/uri.rb +1 -1
- data/sig/whisper.rbs +7 -0
- data/test/test_params.rb +8 -0
- data/test/test_whisper.rb +1 -1
- data/whispercpp.gemspec +1 -1
- metadata +164 -157
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +0 -279
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +0 -1841
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +0 -303
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -6280
@@ -11,6 +11,7 @@
|
|
11
11
|
#include <cassert>
|
12
12
|
#include <cctype>
|
13
13
|
#include <cfloat>
|
14
|
+
#include <cmath>
|
14
15
|
#include <cstdarg>
|
15
16
|
#include <cstring>
|
16
17
|
#include <forward_list>
|
@@ -306,6 +307,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
306
307
|
};
|
307
308
|
break;
|
308
309
|
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
|
310
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE:
|
309
311
|
regex_exprs = {
|
310
312
|
"\\p{N}{1,3}",
|
311
313
|
"[一-龥-ゟ゠-ヿ]+",
|
@@ -351,6 +353,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
351
353
|
break;
|
352
354
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
353
355
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
356
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
354
357
|
regex_exprs = {
|
355
358
|
// original regex from tokenizer.json
|
356
359
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
@@ -403,6 +406,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
403
406
|
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
404
407
|
};
|
405
408
|
break;
|
409
|
+
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
|
410
|
+
regex_exprs = {
|
411
|
+
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
|
412
|
+
// The custom handler implements all K2 patterns with proper Han character exclusion
|
413
|
+
"\\p{Han}+",
|
414
|
+
};
|
415
|
+
break;
|
406
416
|
case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
|
407
417
|
regex_exprs = {
|
408
418
|
"\\p{N}+",
|
@@ -424,6 +434,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
424
434
|
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
425
435
|
};
|
426
436
|
break;
|
437
|
+
case LLAMA_VOCAB_PRE_TYPE_GROK_2:
|
438
|
+
regex_exprs = {
|
439
|
+
// original regex from tokenizer.json
|
440
|
+
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
441
|
+
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
442
|
+
};
|
443
|
+
break;
|
427
444
|
default:
|
428
445
|
// default regex for BPE tokenization pre-processing
|
429
446
|
regex_exprs = {
|
@@ -1195,6 +1212,284 @@ private:
|
|
1195
1212
|
const llm_tokenizer_rwkv & tokenizer;
|
1196
1213
|
};
|
1197
1214
|
|
1215
|
+
struct llm_tokenizer_plamo2 : llm_tokenizer {
|
1216
|
+
llm_tokenizer_plamo2(const llama_vocab & vocab) {
|
1217
|
+
build(vocab);
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
void build(const llama_vocab & vocab) {
|
1221
|
+
// Reset internal structures
|
1222
|
+
tokens_.clear();
|
1223
|
+
bytes_.assign(256, 0);
|
1224
|
+
to_suffix_id_.clear();
|
1225
|
+
table_.clear();
|
1226
|
+
|
1227
|
+
// Build token list and byte mapping
|
1228
|
+
std::unordered_map<std::string, float> suffix_to_score;
|
1229
|
+
std::unordered_map<std::string, llama_token> token_to_id;
|
1230
|
+
|
1231
|
+
for (size_t token_id = 0; token_id < vocab.n_tokens(); ++token_id) {
|
1232
|
+
const auto & entry = vocab.get_token_data(token_id);
|
1233
|
+
tokens_.push_back(entry.text);
|
1234
|
+
token_to_id[entry.text] = static_cast<llama_token>(token_id);
|
1235
|
+
|
1236
|
+
// Handle byte tokens
|
1237
|
+
if (vocab.is_byte(token_id)) {
|
1238
|
+
if (entry.text.length() == 6 && entry.text.substr(0, 3) == "<0x" && entry.text.back() == '>') {
|
1239
|
+
std::string hex_str = entry.text.substr(3, 2);
|
1240
|
+
int byte_val = std::stoi(hex_str, nullptr, 16);
|
1241
|
+
bytes_[byte_val] = static_cast<llama_token>(token_id);
|
1242
|
+
}
|
1243
|
+
continue;
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
// Add token and all its suffixes to suffix_to_score
|
1247
|
+
suffix_to_score[entry.text] = entry.score;
|
1248
|
+
|
1249
|
+
// Extract suffixes character by character (UTF-8 aware)
|
1250
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(entry.text);
|
1251
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
1252
|
+
std::string suffix;
|
1253
|
+
for (size_t j = i; j < cpts.size(); ++j) {
|
1254
|
+
suffix += unicode_cpt_to_utf8(cpts[j]);
|
1255
|
+
}
|
1256
|
+
if (suffix_to_score.find(suffix) == suffix_to_score.end()) {
|
1257
|
+
suffix_to_score[suffix] = std::numeric_limits<float>::quiet_NaN();
|
1258
|
+
}
|
1259
|
+
}
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
// Check that all byte tokens are set
|
1263
|
+
for (int i = 0; i < 256; ++i) {
|
1264
|
+
if (bytes_[i] == 0) {
|
1265
|
+
throw std::runtime_error("Byte token for <0x" + std::to_string(i) + "> is not set");
|
1266
|
+
}
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
// Build suffix list in lexicographical order of reversed strings
|
1270
|
+
std::vector<std::string> suffixes;
|
1271
|
+
for (const auto & pair : suffix_to_score) {
|
1272
|
+
suffixes.push_back(pair.first);
|
1273
|
+
}
|
1274
|
+
suffixes.push_back(""); // Empty suffix
|
1275
|
+
|
1276
|
+
std::sort(suffixes.begin(), suffixes.end(), [](const std::string & a, const std::string & b) {
|
1277
|
+
std::string rev_a(a.rbegin(), a.rend());
|
1278
|
+
std::string rev_b(b.rbegin(), b.rend());
|
1279
|
+
return rev_a < rev_b;
|
1280
|
+
});
|
1281
|
+
|
1282
|
+
// Build suffix_to_id and to_suffix_id_
|
1283
|
+
std::unordered_map<std::string, int32_t> suffix_to_id;
|
1284
|
+
int32_t num_pieces = 0;
|
1285
|
+
|
1286
|
+
for (const auto & suffix : suffixes) {
|
1287
|
+
suffix_to_id[suffix] = num_pieces;
|
1288
|
+
if (!suffix.empty()) {
|
1289
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
1290
|
+
|
1291
|
+
std::string remaining;
|
1292
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
1293
|
+
remaining += unicode_cpt_to_utf8(cpts[i]);
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
int64_t piece_code = (static_cast<int64_t>(cpts[0]) << 32) | suffix_to_id[remaining];
|
1297
|
+
to_suffix_id_[piece_code] = num_pieces;
|
1298
|
+
|
1299
|
+
// Count number of pieces for this suffix
|
1300
|
+
int32_t pieces_for_suffix = 1; // sentinel row
|
1301
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
1302
|
+
std::string piece;
|
1303
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
1304
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
1305
|
+
}
|
1306
|
+
if (suffix_to_score.find(piece) != suffix_to_score.end()) {
|
1307
|
+
pieces_for_suffix++;
|
1308
|
+
}
|
1309
|
+
}
|
1310
|
+
num_pieces += pieces_for_suffix;
|
1311
|
+
} else {
|
1312
|
+
num_pieces++; // Empty suffix contributes one piece (sentinel row)
|
1313
|
+
}
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
// Build flattened table
|
1317
|
+
table_.resize(num_pieces, std::vector<int32_t>(4, 0));
|
1318
|
+
int32_t table_idx = 0;
|
1319
|
+
|
1320
|
+
for (const auto & suffix : suffixes) {
|
1321
|
+
// Add all prefixes of the suffix to the table (in decreasing order of length)
|
1322
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
1323
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
1324
|
+
std::string piece;
|
1325
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
1326
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
1327
|
+
}
|
1328
|
+
|
1329
|
+
auto score_it = suffix_to_score.find(piece);
|
1330
|
+
if (score_it == suffix_to_score.end()) {
|
1331
|
+
continue;
|
1332
|
+
}
|
1333
|
+
|
1334
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = piece_length;
|
1335
|
+
auto token_it = token_to_id.find(piece);
|
1336
|
+
table_[table_idx][TABLE_TOKEN_ID] = (token_it != token_to_id.end()) ? token_it->second : -1;
|
1337
|
+
|
1338
|
+
float score = score_it->second;
|
1339
|
+
table_[table_idx][TABLE_SCORE] = std::isfinite(score) ?
|
1340
|
+
static_cast<int32_t>(std::round(score * 1e4)) : INVALID_SCORE;
|
1341
|
+
table_[table_idx][TABLE_PIECE_ID] = suffix_to_id[piece];
|
1342
|
+
|
1343
|
+
table_idx++;
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
// Add sentinel row
|
1347
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = 1;
|
1348
|
+
table_[table_idx][TABLE_TOKEN_ID] = -1;
|
1349
|
+
table_[table_idx][TABLE_SCORE] = UNKNOWN_SCORE;
|
1350
|
+
table_idx++;
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
std::vector<llama_token> encode(const std::string & text) const {
|
1355
|
+
std::vector<uint32_t> unicode_data = unicode_cpts_from_utf8(text);
|
1356
|
+
// Skip the first code point if it is a BOM (Byte Order Mark)
|
1357
|
+
if (!unicode_data.empty() && unicode_data[0] == 0xFEFF) {
|
1358
|
+
unicode_data.erase(unicode_data.begin());
|
1359
|
+
}
|
1360
|
+
|
1361
|
+
if (unicode_data.empty()) {
|
1362
|
+
return {};
|
1363
|
+
}
|
1364
|
+
|
1365
|
+
const size_t data_len = unicode_data.size();
|
1366
|
+
|
1367
|
+
// Initialize scores array (dynamic programming)
|
1368
|
+
std::vector<int64_t> scores(data_len + 1, static_cast<int64_t>(1) << 60);
|
1369
|
+
scores[data_len] = 0;
|
1370
|
+
|
1371
|
+
// Path array to track best tokenization
|
1372
|
+
std::vector<std::vector<int32_t>> path(data_len + 1, std::vector<int32_t>(3, 0));
|
1373
|
+
|
1374
|
+
int32_t suffix_id = 0;
|
1375
|
+
|
1376
|
+
// Process from end to beginning
|
1377
|
+
for (int i = static_cast<int>(data_len) - 1; i >= 0; --i) {
|
1378
|
+
uint32_t c = unicode_data[i];
|
1379
|
+
|
1380
|
+
// Find next suffix ID
|
1381
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
1382
|
+
int64_t piece_code = (static_cast<int64_t>(c) << 32) | table_[p][TABLE_PIECE_ID];
|
1383
|
+
auto it = to_suffix_id_.find(piece_code);
|
1384
|
+
suffix_id = (it != to_suffix_id_.end()) ? it->second : 0;
|
1385
|
+
|
1386
|
+
if (suffix_id > 0 || table_[p][TABLE_SCORE] == UNKNOWN_SCORE) {
|
1387
|
+
break;
|
1388
|
+
}
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
// Update best path
|
1392
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
1393
|
+
int32_t score = table_[p][TABLE_SCORE];
|
1394
|
+
if (score > INVALID_SCORE) {
|
1395
|
+
int32_t piece_length = table_[p][TABLE_PIECE_LENGTH];
|
1396
|
+
int64_t s = scores[i + piece_length] - score;
|
1397
|
+
|
1398
|
+
if (s < scores[i]) {
|
1399
|
+
scores[i] = s;
|
1400
|
+
path[i][PATH_TOKEN_LENGTH] = piece_length;
|
1401
|
+
path[i][PATH_TOKEN_ID] = table_[p][TABLE_TOKEN_ID];
|
1402
|
+
path[i][PATH_NUM_TOKENS] = path[i + piece_length][PATH_NUM_TOKENS] + 1;
|
1403
|
+
|
1404
|
+
if (score == UNKNOWN_SCORE) {
|
1405
|
+
// Add UTF-8 byte count
|
1406
|
+
path[i][PATH_NUM_TOKENS] += (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
1407
|
+
}
|
1408
|
+
}
|
1409
|
+
}
|
1410
|
+
|
1411
|
+
if (score == UNKNOWN_SCORE) {
|
1412
|
+
break;
|
1413
|
+
}
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
|
1417
|
+
// Decode the best path
|
1418
|
+
std::vector<llama_token> token_ids;
|
1419
|
+
token_ids.reserve(path[0][PATH_NUM_TOKENS]);
|
1420
|
+
|
1421
|
+
int pos = 0;
|
1422
|
+
while (pos < static_cast<int>(data_len)) {
|
1423
|
+
if (path[pos][PATH_TOKEN_ID] >= 0) {
|
1424
|
+
token_ids.push_back(path[pos][PATH_TOKEN_ID]);
|
1425
|
+
} else {
|
1426
|
+
// Fall back to byte tokens
|
1427
|
+
uint32_t c = unicode_data[pos];
|
1428
|
+
int s = 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
1429
|
+
|
1430
|
+
for (int i = 0; i < s; ++i) {
|
1431
|
+
uint8_t b;
|
1432
|
+
if (s == 1) {
|
1433
|
+
b = c;
|
1434
|
+
} else {
|
1435
|
+
if (i == 0) {
|
1436
|
+
b = (0xF00 >> s) & 0xFF;
|
1437
|
+
} else {
|
1438
|
+
b = 0x80;
|
1439
|
+
}
|
1440
|
+
}
|
1441
|
+
token_ids.push_back(bytes_[b | ((c >> ((s - i - 1) * 6)) & 0x3F)]);
|
1442
|
+
}
|
1443
|
+
}
|
1444
|
+
|
1445
|
+
assert(path[pos][PATH_TOKEN_LENGTH] > 0);
|
1446
|
+
pos += path[pos][PATH_TOKEN_LENGTH];
|
1447
|
+
}
|
1448
|
+
|
1449
|
+
return token_ids;
|
1450
|
+
}
|
1451
|
+
private:
|
1452
|
+
// Constants for table structure
|
1453
|
+
static constexpr int32_t TABLE_PIECE_LENGTH = 0;
|
1454
|
+
static constexpr int32_t TABLE_TOKEN_ID = 1;
|
1455
|
+
static constexpr int32_t TABLE_SCORE = 2;
|
1456
|
+
static constexpr int32_t TABLE_PIECE_ID = 3;
|
1457
|
+
|
1458
|
+
// Constants for path array
|
1459
|
+
static constexpr int32_t PATH_TOKEN_LENGTH = 0;
|
1460
|
+
static constexpr int32_t PATH_TOKEN_ID = 1;
|
1461
|
+
static constexpr int32_t PATH_NUM_TOKENS = 2;
|
1462
|
+
|
1463
|
+
// Score constants
|
1464
|
+
static constexpr int32_t INVALID_SCORE = -20000000;
|
1465
|
+
static constexpr int32_t UNKNOWN_SCORE = -10000000;
|
1466
|
+
|
1467
|
+
// List of tokens in the vocabulary
|
1468
|
+
std::vector<std::string> tokens_;
|
1469
|
+
|
1470
|
+
// Mapping from byte code point to token ID (for byte fallback)
|
1471
|
+
std::vector<llama_token> bytes_;
|
1472
|
+
|
1473
|
+
// Mapping from piece code to suffix ID
|
1474
|
+
std::unordered_map<int64_t, int32_t> to_suffix_id_;
|
1475
|
+
|
1476
|
+
// Flattened table representing the Trie structure
|
1477
|
+
// Each row contains: [piece_length, token_id, score, piece_id]
|
1478
|
+
std::vector<std::vector<int32_t>> table_;
|
1479
|
+
};
|
1480
|
+
|
1481
|
+
struct llm_tokenizer_plamo2_session {
|
1482
|
+
llm_tokenizer_plamo2_session(const llm_tokenizer_plamo2 & tokenizer) : tokenizer(tokenizer) {}
|
1483
|
+
|
1484
|
+
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
1485
|
+
std::vector<llama_token> tokens = tokenizer.encode(text);
|
1486
|
+
output.insert(output.end(), tokens.begin(), tokens.end());
|
1487
|
+
}
|
1488
|
+
|
1489
|
+
private:
|
1490
|
+
const llm_tokenizer_plamo2 & tokenizer;
|
1491
|
+
};
|
1492
|
+
|
1198
1493
|
//
|
1199
1494
|
// impl
|
1200
1495
|
//
|
@@ -1477,7 +1772,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1477
1772
|
const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
|
1478
1773
|
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
|
1479
1774
|
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
|
1480
|
-
#
|
1775
|
+
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
1481
1776
|
// correct endiannes of data in precompiled_charsmap binary blob
|
1482
1777
|
uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
|
1483
1778
|
*xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
|
@@ -1498,6 +1793,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1498
1793
|
special_unk_id = LLAMA_TOKEN_NULL;
|
1499
1794
|
special_sep_id = LLAMA_TOKEN_NULL;
|
1500
1795
|
special_pad_id = LLAMA_TOKEN_NULL;
|
1796
|
+
} else if (tokenizer_model == "plamo2") {
|
1797
|
+
type = LLAMA_VOCAB_TYPE_PLAMO2;
|
1798
|
+
|
1799
|
+
// PLaMo-2 default special tokens (these will be overridden by model config)
|
1800
|
+
special_bos_id = 1; // <|plamo:bos|>
|
1801
|
+
special_eos_id = 2; // <|plamo:eos|>
|
1802
|
+
special_unk_id = 0; // <|plamo:unk|>
|
1803
|
+
special_sep_id = LLAMA_TOKEN_NULL;
|
1804
|
+
special_pad_id = 3; // <|plamo:pad|>
|
1805
|
+
special_mask_id = LLAMA_TOKEN_NULL;
|
1501
1806
|
} else {
|
1502
1807
|
throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str()));
|
1503
1808
|
}
|
@@ -1522,7 +1827,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1522
1827
|
tokenizer_pre == "llama-v3" ||
|
1523
1828
|
tokenizer_pre == "llama-bpe"||
|
1524
1829
|
tokenizer_pre == "falcon3" ||
|
1525
|
-
tokenizer_pre == "
|
1830
|
+
tokenizer_pre == "falcon-h1" ||
|
1831
|
+
tokenizer_pre == "pixtral" ||
|
1832
|
+
tokenizer_pre == "midm-2.0" ||
|
1833
|
+
tokenizer_pre == "lfm2") {
|
1526
1834
|
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
|
1527
1835
|
ignore_merges = true;
|
1528
1836
|
add_bos = true;
|
@@ -1554,7 +1862,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1554
1862
|
tokenizer_pre == "jina-de" ||
|
1555
1863
|
tokenizer_pre == "gigachat" ||
|
1556
1864
|
tokenizer_pre == "jina-v2-es" ||
|
1557
|
-
tokenizer_pre == "jina-v2-de"
|
1865
|
+
tokenizer_pre == "jina-v2-de" ||
|
1866
|
+
tokenizer_pre == "a.x-4.0" ||
|
1867
|
+
tokenizer_pre == "mellum") {
|
1558
1868
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1559
1869
|
} else if (
|
1560
1870
|
tokenizer_pre == "jina-v1-en" ||
|
@@ -1624,6 +1934,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1624
1934
|
} else if (
|
1625
1935
|
tokenizer_pre == "exaone") {
|
1626
1936
|
pre_type = LLAMA_VOCAB_PRE_TYPE_EXAONE;
|
1937
|
+
} else if (
|
1938
|
+
tokenizer_pre == "exaone4") {
|
1939
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1627
1940
|
} else if (
|
1628
1941
|
tokenizer_pre == "chameleon") {
|
1629
1942
|
pre_type = LLAMA_VOCAB_PRE_TYPE_CHAMELEON;
|
@@ -1649,13 +1962,30 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1649
1962
|
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
|
1650
1963
|
clean_spaces = false;
|
1651
1964
|
} else if (
|
1652
|
-
tokenizer_pre == "bailingmoe"
|
1965
|
+
tokenizer_pre == "bailingmoe" ||
|
1966
|
+
tokenizer_pre == "llada-moe") {
|
1653
1967
|
pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
|
1654
1968
|
clean_spaces = false;
|
1655
1969
|
} else if (
|
1656
1970
|
tokenizer_pre == "seed-coder") {
|
1657
1971
|
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
|
1658
1972
|
clean_spaces = false;
|
1973
|
+
} else if (
|
1974
|
+
tokenizer_pre == "hunyuan") {
|
1975
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
|
1976
|
+
clean_spaces = false;
|
1977
|
+
} else if (
|
1978
|
+
tokenizer_pre == "hunyuan-dense") {
|
1979
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE;
|
1980
|
+
clean_spaces = false;
|
1981
|
+
} else if (
|
1982
|
+
tokenizer_pre == "kimi-k2") {
|
1983
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
|
1984
|
+
clean_spaces = false;
|
1985
|
+
} else if (
|
1986
|
+
tokenizer_pre == "grok-2") {
|
1987
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GROK_2;
|
1988
|
+
clean_spaces = false;
|
1659
1989
|
} else {
|
1660
1990
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
1661
1991
|
}
|
@@ -1839,6 +2169,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1839
2169
|
|| t.first == "<EOT>"
|
1840
2170
|
|| t.first == "_<EOT>"
|
1841
2171
|
|| t.first == "<|end▁of▁sentence|>" // DeepSeek
|
2172
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
1842
2173
|
) {
|
1843
2174
|
special_eot_id = t.second;
|
1844
2175
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1872,6 +2203,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1872
2203
|
|| t.first == "<|fim▁begin|>" // DeepSeek
|
1873
2204
|
|| t.first == "<PRE>"
|
1874
2205
|
|| t.first == "▁<PRE>" // CodeLlama
|
2206
|
+
|| t.first == "<|code_prefix|>" // GLM-4.5
|
1875
2207
|
) {
|
1876
2208
|
special_fim_pre_id = t.second;
|
1877
2209
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1891,6 +2223,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1891
2223
|
|| t.first == "<|fim▁hole|>" // DeepSeek
|
1892
2224
|
|| t.first == "<SUF>"
|
1893
2225
|
|| t.first == "▁<SUF>" // CodeLlama
|
2226
|
+
|| t.first == "<|code_suffix|>" // GLM-4.5
|
1894
2227
|
) {
|
1895
2228
|
special_fim_suf_id = t.second;
|
1896
2229
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1910,6 +2243,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1910
2243
|
|| t.first == "<|fim▁end|>" // DeepSeek
|
1911
2244
|
|| t.first == "<MID>"
|
1912
2245
|
|| t.first == "▁<MID>" // CodeLlama
|
2246
|
+
|| t.first == "<|code_middle|>" // GLM-4.5
|
1913
2247
|
) {
|
1914
2248
|
special_fim_mid_id = t.second;
|
1915
2249
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1992,12 +2326,15 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1992
2326
|
|| t.first == "<|eot_id|>"
|
1993
2327
|
|| t.first == "<|im_end|>"
|
1994
2328
|
|| t.first == "<|end|>"
|
2329
|
+
|| t.first == "<|return|>" // o200k_harmony
|
2330
|
+
|| t.first == "<|call|>" // o200k_harmony
|
1995
2331
|
|| t.first == "<end_of_turn>"
|
1996
2332
|
|| t.first == "<|endoftext|>"
|
1997
2333
|
|| t.first == "<|eom_id|>"
|
1998
2334
|
|| t.first == "<EOT>"
|
1999
2335
|
|| t.first == "_<EOT>"
|
2000
2336
|
|| t.first == "<|end_of_text|>"
|
2337
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
2001
2338
|
) {
|
2002
2339
|
special_eog_ids.insert(t.second);
|
2003
2340
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -2014,6 +2351,13 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2014
2351
|
}
|
2015
2352
|
}
|
2016
2353
|
|
2354
|
+
// @ngxson : quick hack for gpt-oss, always render these tokens
|
2355
|
+
for (const auto & t : token_to_id) {
|
2356
|
+
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
|
2357
|
+
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
2358
|
+
}
|
2359
|
+
}
|
2360
|
+
|
2017
2361
|
// sanity checks
|
2018
2362
|
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
|
2019
2363
|
special_eog_ids.insert(special_eos_id);
|
@@ -2029,6 +2373,37 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2029
2373
|
special_eog_ids.insert(special_eom_id);
|
2030
2374
|
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
2031
2375
|
}
|
2376
|
+
|
2377
|
+
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
|
2378
|
+
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
|
2379
|
+
// we remove the "<|end|>" token from the EOG list
|
2380
|
+
{
|
2381
|
+
bool has_return = false;
|
2382
|
+
bool has_call = false;
|
2383
|
+
bool has_end = false;
|
2384
|
+
|
2385
|
+
llama_token end_id = LLAMA_TOKEN_NULL;
|
2386
|
+
|
2387
|
+
LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__);
|
2388
|
+
for (auto tid : special_eog_ids) {
|
2389
|
+
LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, id_to_token[tid].text.c_str());
|
2390
|
+
|
2391
|
+
if (id_to_token[tid].text == "<|return|>") {
|
2392
|
+
has_return = true;
|
2393
|
+
} else if (id_to_token[tid].text == "<|call|>") {
|
2394
|
+
has_call = true;
|
2395
|
+
} else if (id_to_token[tid].text == "<|end|>") {
|
2396
|
+
has_end = true;
|
2397
|
+
end_id = tid;
|
2398
|
+
}
|
2399
|
+
}
|
2400
|
+
|
2401
|
+
if (has_return && has_call && has_end) {
|
2402
|
+
special_eog_ids.erase(end_id);
|
2403
|
+
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
2404
|
+
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
2405
|
+
}
|
2406
|
+
}
|
2032
2407
|
}
|
2033
2408
|
|
2034
2409
|
// build special tokens cache
|
@@ -2107,7 +2482,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2107
2482
|
// set attributes by model/tokenizer/architecture name
|
2108
2483
|
if (false
|
2109
2484
|
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
2110
|
-
|| _contains_any(general_arch, {"nomic-bert-moe"})
|
2485
|
+
|| _contains_any(general_arch, {"nomic-bert-moe", "jina-bert-v3"})
|
2111
2486
|
) {
|
2112
2487
|
if (token_to_id.count("<mask>") == 0) {
|
2113
2488
|
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
|
@@ -2134,13 +2509,14 @@ enum llama_vocab_type llama_vocab::impl::get_type() const {
|
|
2134
2509
|
|
2135
2510
|
std::string llama_vocab::impl::type_name() const{
|
2136
2511
|
switch (type) {
|
2137
|
-
case LLAMA_VOCAB_TYPE_NONE:
|
2138
|
-
case LLAMA_VOCAB_TYPE_SPM:
|
2139
|
-
case LLAMA_VOCAB_TYPE_BPE:
|
2140
|
-
case LLAMA_VOCAB_TYPE_WPM:
|
2141
|
-
case LLAMA_VOCAB_TYPE_UGM:
|
2142
|
-
case LLAMA_VOCAB_TYPE_RWKV:
|
2143
|
-
|
2512
|
+
case LLAMA_VOCAB_TYPE_NONE: return "no vocab";
|
2513
|
+
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
|
2514
|
+
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
|
2515
|
+
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
|
2516
|
+
case LLAMA_VOCAB_TYPE_UGM: return "UGM";
|
2517
|
+
case LLAMA_VOCAB_TYPE_RWKV: return "RWKV";
|
2518
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2";
|
2519
|
+
default: return "unknown";
|
2144
2520
|
}
|
2145
2521
|
}
|
2146
2522
|
|
@@ -2223,6 +2599,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
|
|
2223
2599
|
case LLAMA_VOCAB_TYPE_RWKV:
|
2224
2600
|
tokenizer = std::make_unique<llm_tokenizer_rwkv>(vocab);
|
2225
2601
|
break;
|
2602
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
2603
|
+
tokenizer = std::make_unique<llm_tokenizer_plamo2>(vocab);
|
2604
|
+
break;
|
2226
2605
|
default:
|
2227
2606
|
GGML_ABORT("unsupported vocab type");
|
2228
2607
|
}
|
@@ -2555,6 +2934,23 @@ std::vector<llama_token> llama_vocab::impl::tokenize(
|
|
2555
2934
|
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
2556
2935
|
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
2557
2936
|
|
2937
|
+
#ifdef PRETOKENIZERDEBUG
|
2938
|
+
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
2939
|
+
#endif
|
2940
|
+
|
2941
|
+
session.tokenize(text, output);
|
2942
|
+
} else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
|
2943
|
+
output.push_back(fragment.token);
|
2944
|
+
}
|
2945
|
+
}
|
2946
|
+
} break;
|
2947
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
2948
|
+
{
|
2949
|
+
llm_tokenizer_plamo2_session session(*static_cast<const llm_tokenizer_plamo2 *>(tokenizer.get()));
|
2950
|
+
for (const auto & fragment : fragment_buffer) {
|
2951
|
+
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
2952
|
+
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
2953
|
+
|
2558
2954
|
#ifdef PRETOKENIZERDEBUG
|
2559
2955
|
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
2560
2956
|
#endif
|
@@ -2653,6 +3049,24 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
|
|
2653
3049
|
memcpy(buf, result.data(), result.size());
|
2654
3050
|
return (int)result.size();
|
2655
3051
|
}
|
3052
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
3053
|
+
// PLaMo-2 uses similar token handling as BPE/SPM
|
3054
|
+
if (vocab.is_byte(token)) {
|
3055
|
+
// Handle byte tokens like <0xXX>
|
3056
|
+
if (token_text.length() == 6 && token_text.substr(0, 3) == "<0x" && token_text.back() == '>') {
|
3057
|
+
int hex_val = std::stoi(token_text.substr(3, 2), nullptr, 16);
|
3058
|
+
if (length < 1) {
|
3059
|
+
return -1;
|
3060
|
+
}
|
3061
|
+
buf[0] = static_cast<char>(hex_val);
|
3062
|
+
return 1;
|
3063
|
+
}
|
3064
|
+
}
|
3065
|
+
|
3066
|
+
// Normal token - just copy the text
|
3067
|
+
std::string result = token_text;
|
3068
|
+
return _try_copy(result.data(), result.size());
|
3069
|
+
}
|
2656
3070
|
default:
|
2657
3071
|
GGML_ABORT("fatal error");
|
2658
3072
|
}
|
@@ -2897,6 +3311,12 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const {
|
|
2897
3311
|
case LLAMA_VOCAB_TYPE_BPE: {
|
2898
3312
|
return pimpl->token_to_id.at(unicode_byte_to_utf8(ch));
|
2899
3313
|
}
|
3314
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
3315
|
+
// PLaMo-2 uses byte tokens in format <0xXX>
|
3316
|
+
char hex_str[8];
|
3317
|
+
snprintf(hex_str, sizeof(hex_str), "<0x%02X>", ch);
|
3318
|
+
return pimpl->token_to_id.at(hex_str);
|
3319
|
+
}
|
2900
3320
|
default:
|
2901
3321
|
GGML_ABORT("fatal error");
|
2902
3322
|
}
|
@@ -2998,6 +3418,10 @@ llama_token llama_vocab::token_fim_sep() const {
|
|
2998
3418
|
return pimpl->special_fim_sep_id;
|
2999
3419
|
}
|
3000
3420
|
|
3421
|
+
llama_token llama_vocab::token_mask() const {
|
3422
|
+
return pimpl->special_mask_id;
|
3423
|
+
}
|
3424
|
+
|
3001
3425
|
bool llama_vocab::get_add_space_prefix() const {
|
3002
3426
|
return pimpl->add_space_prefix;
|
3003
3427
|
}
|
@@ -3238,6 +3662,10 @@ llama_token llama_vocab_fim_sep(const struct llama_vocab * vocab) {
|
|
3238
3662
|
return vocab->token_fim_sep();
|
3239
3663
|
}
|
3240
3664
|
|
3665
|
+
llama_token llama_vocab_mask(const struct llama_vocab* vocab) {
|
3666
|
+
return vocab->token_mask();
|
3667
|
+
}
|
3668
|
+
|
3241
3669
|
// deprecated
|
3242
3670
|
const char * llama_token_get_text(const struct llama_vocab * vocab, llama_token token) {
|
3243
3671
|
return llama_vocab_get_text(vocab, token);
|
@@ -3374,4 +3802,3 @@ int32_t llama_detokenize(
|
|
3374
3802
|
bool unparse_special) {
|
3375
3803
|
return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
|
3376
3804
|
}
|
3377
|
-
|