whispercpp 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +59 -27
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +154 -35
- data/ext/sources/examples/addon.node/index.js +10 -5
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +29 -18
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +7 -4
- data/ext/sources/examples/command/command.cpp +58 -32
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +21 -17
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +193 -35
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +10 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
- data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
- data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
- data/ext/sources/examples/talk-llama/llama-context.h +68 -32
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
- data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
- data/ext/sources/examples/talk-llama/llama-model.h +87 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
- data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -17
- data/ext/sources/examples/talk-llama/llama.h +176 -151
- data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +106 -33
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +18 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +365 -21
- data/ext/sources/ggml/src/CMakeLists.txt +98 -25
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
- data/ext/sources/ggml/src/ggml-common.h +21 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
- data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
- data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
- data/ext/sources/ggml/src/ggml-impl.h +229 -175
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +117 -24
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +802 -142
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +32 -4
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +241 -215
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +57 -2
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +75 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/{tests → test}/test_params.rb +8 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +246 -191
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -9,16 +9,17 @@
|
|
9
9
|
|
10
10
|
#include <algorithm>
|
11
11
|
#include <cassert>
|
12
|
+
#include <cctype>
|
12
13
|
#include <cfloat>
|
13
|
-
#include <
|
14
|
+
#include <cmath>
|
14
15
|
#include <cstdarg>
|
15
16
|
#include <cstring>
|
16
17
|
#include <forward_list>
|
18
|
+
#include <limits>
|
17
19
|
#include <map>
|
18
20
|
#include <queue>
|
19
21
|
#include <set>
|
20
22
|
#include <unordered_map>
|
21
|
-
#include <cctype>
|
22
23
|
|
23
24
|
//
|
24
25
|
// helpers
|
@@ -306,6 +307,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
306
307
|
};
|
307
308
|
break;
|
308
309
|
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
|
310
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE:
|
309
311
|
regex_exprs = {
|
310
312
|
"\\p{N}{1,3}",
|
311
313
|
"[一-龥-ゟ゠-ヿ]+",
|
@@ -351,6 +353,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
351
353
|
break;
|
352
354
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
353
355
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
356
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
354
357
|
regex_exprs = {
|
355
358
|
// original regex from tokenizer.json
|
356
359
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
@@ -403,6 +406,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
403
406
|
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
404
407
|
};
|
405
408
|
break;
|
409
|
+
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
|
410
|
+
regex_exprs = {
|
411
|
+
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
|
412
|
+
// The custom handler implements all K2 patterns with proper Han character exclusion
|
413
|
+
"\\p{Han}+",
|
414
|
+
};
|
415
|
+
break;
|
406
416
|
case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
|
407
417
|
regex_exprs = {
|
408
418
|
"\\p{N}+",
|
@@ -424,6 +434,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
424
434
|
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
425
435
|
};
|
426
436
|
break;
|
437
|
+
case LLAMA_VOCAB_PRE_TYPE_GROK_2:
|
438
|
+
regex_exprs = {
|
439
|
+
// original regex from tokenizer.json
|
440
|
+
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
441
|
+
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
442
|
+
};
|
443
|
+
break;
|
427
444
|
default:
|
428
445
|
// default regex for BPE tokenization pre-processing
|
429
446
|
regex_exprs = {
|
@@ -1195,6 +1212,284 @@ private:
|
|
1195
1212
|
const llm_tokenizer_rwkv & tokenizer;
|
1196
1213
|
};
|
1197
1214
|
|
1215
|
+
struct llm_tokenizer_plamo2 : llm_tokenizer {
|
1216
|
+
llm_tokenizer_plamo2(const llama_vocab & vocab) {
|
1217
|
+
build(vocab);
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
void build(const llama_vocab & vocab) {
|
1221
|
+
// Reset internal structures
|
1222
|
+
tokens_.clear();
|
1223
|
+
bytes_.assign(256, 0);
|
1224
|
+
to_suffix_id_.clear();
|
1225
|
+
table_.clear();
|
1226
|
+
|
1227
|
+
// Build token list and byte mapping
|
1228
|
+
std::unordered_map<std::string, float> suffix_to_score;
|
1229
|
+
std::unordered_map<std::string, llama_token> token_to_id;
|
1230
|
+
|
1231
|
+
for (size_t token_id = 0; token_id < vocab.n_tokens(); ++token_id) {
|
1232
|
+
const auto & entry = vocab.get_token_data(token_id);
|
1233
|
+
tokens_.push_back(entry.text);
|
1234
|
+
token_to_id[entry.text] = static_cast<llama_token>(token_id);
|
1235
|
+
|
1236
|
+
// Handle byte tokens
|
1237
|
+
if (vocab.is_byte(token_id)) {
|
1238
|
+
if (entry.text.length() == 6 && entry.text.substr(0, 3) == "<0x" && entry.text.back() == '>') {
|
1239
|
+
std::string hex_str = entry.text.substr(3, 2);
|
1240
|
+
int byte_val = std::stoi(hex_str, nullptr, 16);
|
1241
|
+
bytes_[byte_val] = static_cast<llama_token>(token_id);
|
1242
|
+
}
|
1243
|
+
continue;
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
// Add token and all its suffixes to suffix_to_score
|
1247
|
+
suffix_to_score[entry.text] = entry.score;
|
1248
|
+
|
1249
|
+
// Extract suffixes character by character (UTF-8 aware)
|
1250
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(entry.text);
|
1251
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
1252
|
+
std::string suffix;
|
1253
|
+
for (size_t j = i; j < cpts.size(); ++j) {
|
1254
|
+
suffix += unicode_cpt_to_utf8(cpts[j]);
|
1255
|
+
}
|
1256
|
+
if (suffix_to_score.find(suffix) == suffix_to_score.end()) {
|
1257
|
+
suffix_to_score[suffix] = std::numeric_limits<float>::quiet_NaN();
|
1258
|
+
}
|
1259
|
+
}
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
// Check that all byte tokens are set
|
1263
|
+
for (int i = 0; i < 256; ++i) {
|
1264
|
+
if (bytes_[i] == 0) {
|
1265
|
+
throw std::runtime_error("Byte token for <0x" + std::to_string(i) + "> is not set");
|
1266
|
+
}
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
// Build suffix list in lexicographical order of reversed strings
|
1270
|
+
std::vector<std::string> suffixes;
|
1271
|
+
for (const auto & pair : suffix_to_score) {
|
1272
|
+
suffixes.push_back(pair.first);
|
1273
|
+
}
|
1274
|
+
suffixes.push_back(""); // Empty suffix
|
1275
|
+
|
1276
|
+
std::sort(suffixes.begin(), suffixes.end(), [](const std::string & a, const std::string & b) {
|
1277
|
+
std::string rev_a(a.rbegin(), a.rend());
|
1278
|
+
std::string rev_b(b.rbegin(), b.rend());
|
1279
|
+
return rev_a < rev_b;
|
1280
|
+
});
|
1281
|
+
|
1282
|
+
// Build suffix_to_id and to_suffix_id_
|
1283
|
+
std::unordered_map<std::string, int32_t> suffix_to_id;
|
1284
|
+
int32_t num_pieces = 0;
|
1285
|
+
|
1286
|
+
for (const auto & suffix : suffixes) {
|
1287
|
+
suffix_to_id[suffix] = num_pieces;
|
1288
|
+
if (!suffix.empty()) {
|
1289
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
1290
|
+
|
1291
|
+
std::string remaining;
|
1292
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
1293
|
+
remaining += unicode_cpt_to_utf8(cpts[i]);
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
int64_t piece_code = (static_cast<int64_t>(cpts[0]) << 32) | suffix_to_id[remaining];
|
1297
|
+
to_suffix_id_[piece_code] = num_pieces;
|
1298
|
+
|
1299
|
+
// Count number of pieces for this suffix
|
1300
|
+
int32_t pieces_for_suffix = 1; // sentinel row
|
1301
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
1302
|
+
std::string piece;
|
1303
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
1304
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
1305
|
+
}
|
1306
|
+
if (suffix_to_score.find(piece) != suffix_to_score.end()) {
|
1307
|
+
pieces_for_suffix++;
|
1308
|
+
}
|
1309
|
+
}
|
1310
|
+
num_pieces += pieces_for_suffix;
|
1311
|
+
} else {
|
1312
|
+
num_pieces++; // Empty suffix contributes one piece (sentinel row)
|
1313
|
+
}
|
1314
|
+
}
|
1315
|
+
|
1316
|
+
// Build flattened table
|
1317
|
+
table_.resize(num_pieces, std::vector<int32_t>(4, 0));
|
1318
|
+
int32_t table_idx = 0;
|
1319
|
+
|
1320
|
+
for (const auto & suffix : suffixes) {
|
1321
|
+
// Add all prefixes of the suffix to the table (in decreasing order of length)
|
1322
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
1323
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
1324
|
+
std::string piece;
|
1325
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
1326
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
1327
|
+
}
|
1328
|
+
|
1329
|
+
auto score_it = suffix_to_score.find(piece);
|
1330
|
+
if (score_it == suffix_to_score.end()) {
|
1331
|
+
continue;
|
1332
|
+
}
|
1333
|
+
|
1334
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = piece_length;
|
1335
|
+
auto token_it = token_to_id.find(piece);
|
1336
|
+
table_[table_idx][TABLE_TOKEN_ID] = (token_it != token_to_id.end()) ? token_it->second : -1;
|
1337
|
+
|
1338
|
+
float score = score_it->second;
|
1339
|
+
table_[table_idx][TABLE_SCORE] = std::isfinite(score) ?
|
1340
|
+
static_cast<int32_t>(std::round(score * 1e4)) : INVALID_SCORE;
|
1341
|
+
table_[table_idx][TABLE_PIECE_ID] = suffix_to_id[piece];
|
1342
|
+
|
1343
|
+
table_idx++;
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
// Add sentinel row
|
1347
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = 1;
|
1348
|
+
table_[table_idx][TABLE_TOKEN_ID] = -1;
|
1349
|
+
table_[table_idx][TABLE_SCORE] = UNKNOWN_SCORE;
|
1350
|
+
table_idx++;
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
std::vector<llama_token> encode(const std::string & text) const {
|
1355
|
+
std::vector<uint32_t> unicode_data = unicode_cpts_from_utf8(text);
|
1356
|
+
// Skip the first code point if it is a BOM (Byte Order Mark)
|
1357
|
+
if (!unicode_data.empty() && unicode_data[0] == 0xFEFF) {
|
1358
|
+
unicode_data.erase(unicode_data.begin());
|
1359
|
+
}
|
1360
|
+
|
1361
|
+
if (unicode_data.empty()) {
|
1362
|
+
return {};
|
1363
|
+
}
|
1364
|
+
|
1365
|
+
const size_t data_len = unicode_data.size();
|
1366
|
+
|
1367
|
+
// Initialize scores array (dynamic programming)
|
1368
|
+
std::vector<int64_t> scores(data_len + 1, static_cast<int64_t>(1) << 60);
|
1369
|
+
scores[data_len] = 0;
|
1370
|
+
|
1371
|
+
// Path array to track best tokenization
|
1372
|
+
std::vector<std::vector<int32_t>> path(data_len + 1, std::vector<int32_t>(3, 0));
|
1373
|
+
|
1374
|
+
int32_t suffix_id = 0;
|
1375
|
+
|
1376
|
+
// Process from end to beginning
|
1377
|
+
for (int i = static_cast<int>(data_len) - 1; i >= 0; --i) {
|
1378
|
+
uint32_t c = unicode_data[i];
|
1379
|
+
|
1380
|
+
// Find next suffix ID
|
1381
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
1382
|
+
int64_t piece_code = (static_cast<int64_t>(c) << 32) | table_[p][TABLE_PIECE_ID];
|
1383
|
+
auto it = to_suffix_id_.find(piece_code);
|
1384
|
+
suffix_id = (it != to_suffix_id_.end()) ? it->second : 0;
|
1385
|
+
|
1386
|
+
if (suffix_id > 0 || table_[p][TABLE_SCORE] == UNKNOWN_SCORE) {
|
1387
|
+
break;
|
1388
|
+
}
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
// Update best path
|
1392
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
1393
|
+
int32_t score = table_[p][TABLE_SCORE];
|
1394
|
+
if (score > INVALID_SCORE) {
|
1395
|
+
int32_t piece_length = table_[p][TABLE_PIECE_LENGTH];
|
1396
|
+
int64_t s = scores[i + piece_length] - score;
|
1397
|
+
|
1398
|
+
if (s < scores[i]) {
|
1399
|
+
scores[i] = s;
|
1400
|
+
path[i][PATH_TOKEN_LENGTH] = piece_length;
|
1401
|
+
path[i][PATH_TOKEN_ID] = table_[p][TABLE_TOKEN_ID];
|
1402
|
+
path[i][PATH_NUM_TOKENS] = path[i + piece_length][PATH_NUM_TOKENS] + 1;
|
1403
|
+
|
1404
|
+
if (score == UNKNOWN_SCORE) {
|
1405
|
+
// Add UTF-8 byte count
|
1406
|
+
path[i][PATH_NUM_TOKENS] += (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
1407
|
+
}
|
1408
|
+
}
|
1409
|
+
}
|
1410
|
+
|
1411
|
+
if (score == UNKNOWN_SCORE) {
|
1412
|
+
break;
|
1413
|
+
}
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
|
1417
|
+
// Decode the best path
|
1418
|
+
std::vector<llama_token> token_ids;
|
1419
|
+
token_ids.reserve(path[0][PATH_NUM_TOKENS]);
|
1420
|
+
|
1421
|
+
int pos = 0;
|
1422
|
+
while (pos < static_cast<int>(data_len)) {
|
1423
|
+
if (path[pos][PATH_TOKEN_ID] >= 0) {
|
1424
|
+
token_ids.push_back(path[pos][PATH_TOKEN_ID]);
|
1425
|
+
} else {
|
1426
|
+
// Fall back to byte tokens
|
1427
|
+
uint32_t c = unicode_data[pos];
|
1428
|
+
int s = 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
1429
|
+
|
1430
|
+
for (int i = 0; i < s; ++i) {
|
1431
|
+
uint8_t b;
|
1432
|
+
if (s == 1) {
|
1433
|
+
b = c;
|
1434
|
+
} else {
|
1435
|
+
if (i == 0) {
|
1436
|
+
b = (0xF00 >> s) & 0xFF;
|
1437
|
+
} else {
|
1438
|
+
b = 0x80;
|
1439
|
+
}
|
1440
|
+
}
|
1441
|
+
token_ids.push_back(bytes_[b | ((c >> ((s - i - 1) * 6)) & 0x3F)]);
|
1442
|
+
}
|
1443
|
+
}
|
1444
|
+
|
1445
|
+
assert(path[pos][PATH_TOKEN_LENGTH] > 0);
|
1446
|
+
pos += path[pos][PATH_TOKEN_LENGTH];
|
1447
|
+
}
|
1448
|
+
|
1449
|
+
return token_ids;
|
1450
|
+
}
|
1451
|
+
private:
|
1452
|
+
// Constants for table structure
|
1453
|
+
static constexpr int32_t TABLE_PIECE_LENGTH = 0;
|
1454
|
+
static constexpr int32_t TABLE_TOKEN_ID = 1;
|
1455
|
+
static constexpr int32_t TABLE_SCORE = 2;
|
1456
|
+
static constexpr int32_t TABLE_PIECE_ID = 3;
|
1457
|
+
|
1458
|
+
// Constants for path array
|
1459
|
+
static constexpr int32_t PATH_TOKEN_LENGTH = 0;
|
1460
|
+
static constexpr int32_t PATH_TOKEN_ID = 1;
|
1461
|
+
static constexpr int32_t PATH_NUM_TOKENS = 2;
|
1462
|
+
|
1463
|
+
// Score constants
|
1464
|
+
static constexpr int32_t INVALID_SCORE = -20000000;
|
1465
|
+
static constexpr int32_t UNKNOWN_SCORE = -10000000;
|
1466
|
+
|
1467
|
+
// List of tokens in the vocabulary
|
1468
|
+
std::vector<std::string> tokens_;
|
1469
|
+
|
1470
|
+
// Mapping from byte code point to token ID (for byte fallback)
|
1471
|
+
std::vector<llama_token> bytes_;
|
1472
|
+
|
1473
|
+
// Mapping from piece code to suffix ID
|
1474
|
+
std::unordered_map<int64_t, int32_t> to_suffix_id_;
|
1475
|
+
|
1476
|
+
// Flattened table representing the Trie structure
|
1477
|
+
// Each row contains: [piece_length, token_id, score, piece_id]
|
1478
|
+
std::vector<std::vector<int32_t>> table_;
|
1479
|
+
};
|
1480
|
+
|
1481
|
+
struct llm_tokenizer_plamo2_session {
|
1482
|
+
llm_tokenizer_plamo2_session(const llm_tokenizer_plamo2 & tokenizer) : tokenizer(tokenizer) {}
|
1483
|
+
|
1484
|
+
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
1485
|
+
std::vector<llama_token> tokens = tokenizer.encode(text);
|
1486
|
+
output.insert(output.end(), tokens.begin(), tokens.end());
|
1487
|
+
}
|
1488
|
+
|
1489
|
+
private:
|
1490
|
+
const llm_tokenizer_plamo2 & tokenizer;
|
1491
|
+
};
|
1492
|
+
|
1198
1493
|
//
|
1199
1494
|
// impl
|
1200
1495
|
//
|
@@ -1269,6 +1564,7 @@ struct llama_vocab::impl {
|
|
1269
1564
|
bool add_space_prefix = false;
|
1270
1565
|
bool add_bos = false;
|
1271
1566
|
bool add_eos = false;
|
1567
|
+
bool add_sep = false;
|
1272
1568
|
bool ignore_merges = false;
|
1273
1569
|
bool clean_spaces = false; // clean_up_tokenization_spaces
|
1274
1570
|
bool remove_extra_whitespaces = false;
|
@@ -1421,6 +1717,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1421
1717
|
special_sep_id = 102;
|
1422
1718
|
special_pad_id = 0;
|
1423
1719
|
special_mask_id = 103;
|
1720
|
+
|
1721
|
+
add_sep = true;
|
1424
1722
|
} else if (tokenizer_model == "gpt2") {
|
1425
1723
|
type = LLAMA_VOCAB_TYPE_BPE;
|
1426
1724
|
|
@@ -1474,7 +1772,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1474
1772
|
const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
|
1475
1773
|
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
|
1476
1774
|
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
|
1477
|
-
#
|
1775
|
+
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
1478
1776
|
// correct endiannes of data in precompiled_charsmap binary blob
|
1479
1777
|
uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
|
1480
1778
|
*xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
|
@@ -1495,6 +1793,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1495
1793
|
special_unk_id = LLAMA_TOKEN_NULL;
|
1496
1794
|
special_sep_id = LLAMA_TOKEN_NULL;
|
1497
1795
|
special_pad_id = LLAMA_TOKEN_NULL;
|
1796
|
+
} else if (tokenizer_model == "plamo2") {
|
1797
|
+
type = LLAMA_VOCAB_TYPE_PLAMO2;
|
1798
|
+
|
1799
|
+
// PLaMo-2 default special tokens (these will be overridden by model config)
|
1800
|
+
special_bos_id = 1; // <|plamo:bos|>
|
1801
|
+
special_eos_id = 2; // <|plamo:eos|>
|
1802
|
+
special_unk_id = 0; // <|plamo:unk|>
|
1803
|
+
special_sep_id = LLAMA_TOKEN_NULL;
|
1804
|
+
special_pad_id = 3; // <|plamo:pad|>
|
1805
|
+
special_mask_id = LLAMA_TOKEN_NULL;
|
1498
1806
|
} else {
|
1499
1807
|
throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str()));
|
1500
1808
|
}
|
@@ -1519,7 +1827,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1519
1827
|
tokenizer_pre == "llama-v3" ||
|
1520
1828
|
tokenizer_pre == "llama-bpe"||
|
1521
1829
|
tokenizer_pre == "falcon3" ||
|
1522
|
-
tokenizer_pre == "
|
1830
|
+
tokenizer_pre == "falcon-h1" ||
|
1831
|
+
tokenizer_pre == "pixtral" ||
|
1832
|
+
tokenizer_pre == "midm-2.0" ||
|
1833
|
+
tokenizer_pre == "lfm2") {
|
1523
1834
|
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
|
1524
1835
|
ignore_merges = true;
|
1525
1836
|
add_bos = true;
|
@@ -1550,12 +1861,17 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1550
1861
|
tokenizer_pre == "jina-es" ||
|
1551
1862
|
tokenizer_pre == "jina-de" ||
|
1552
1863
|
tokenizer_pre == "gigachat" ||
|
1553
|
-
tokenizer_pre == "jina-v1-en" ||
|
1554
1864
|
tokenizer_pre == "jina-v2-es" ||
|
1555
1865
|
tokenizer_pre == "jina-v2-de" ||
|
1866
|
+
tokenizer_pre == "a.x-4.0" ||
|
1867
|
+
tokenizer_pre == "mellum") {
|
1868
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1869
|
+
} else if (
|
1870
|
+
tokenizer_pre == "jina-v1-en" ||
|
1556
1871
|
tokenizer_pre == "jina-v2-code" ||
|
1557
1872
|
tokenizer_pre == "roberta-bpe") {
|
1558
1873
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1874
|
+
add_sep = true;
|
1559
1875
|
} else if (
|
1560
1876
|
tokenizer_pre == "refact") {
|
1561
1877
|
pre_type = LLAMA_VOCAB_PRE_TYPE_REFACT;
|
@@ -1618,6 +1934,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1618
1934
|
} else if (
|
1619
1935
|
tokenizer_pre == "exaone") {
|
1620
1936
|
pre_type = LLAMA_VOCAB_PRE_TYPE_EXAONE;
|
1937
|
+
} else if (
|
1938
|
+
tokenizer_pre == "exaone4") {
|
1939
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1621
1940
|
} else if (
|
1622
1941
|
tokenizer_pre == "chameleon") {
|
1623
1942
|
pre_type = LLAMA_VOCAB_PRE_TYPE_CHAMELEON;
|
@@ -1643,13 +1962,30 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1643
1962
|
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
|
1644
1963
|
clean_spaces = false;
|
1645
1964
|
} else if (
|
1646
|
-
tokenizer_pre == "bailingmoe"
|
1965
|
+
tokenizer_pre == "bailingmoe" ||
|
1966
|
+
tokenizer_pre == "llada-moe") {
|
1647
1967
|
pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
|
1648
1968
|
clean_spaces = false;
|
1649
1969
|
} else if (
|
1650
1970
|
tokenizer_pre == "seed-coder") {
|
1651
1971
|
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
|
1652
1972
|
clean_spaces = false;
|
1973
|
+
} else if (
|
1974
|
+
tokenizer_pre == "hunyuan") {
|
1975
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
|
1976
|
+
clean_spaces = false;
|
1977
|
+
} else if (
|
1978
|
+
tokenizer_pre == "hunyuan-dense") {
|
1979
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE;
|
1980
|
+
clean_spaces = false;
|
1981
|
+
} else if (
|
1982
|
+
tokenizer_pre == "kimi-k2") {
|
1983
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
|
1984
|
+
clean_spaces = false;
|
1985
|
+
} else if (
|
1986
|
+
tokenizer_pre == "grok-2") {
|
1987
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GROK_2;
|
1988
|
+
clean_spaces = false;
|
1653
1989
|
} else {
|
1654
1990
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
1655
1991
|
}
|
@@ -1665,6 +2001,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1665
2001
|
clean_spaces = true;
|
1666
2002
|
add_bos = true;
|
1667
2003
|
add_eos = false;
|
2004
|
+
add_sep = true;
|
1668
2005
|
} else if (type == LLAMA_VOCAB_TYPE_UGM) {
|
1669
2006
|
pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
1670
2007
|
add_bos = false;
|
@@ -1801,7 +2138,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1801
2138
|
}
|
1802
2139
|
}
|
1803
2140
|
|
1804
|
-
// Handle add_bos and
|
2141
|
+
// Handle add_bos, add_eos and add_sep
|
1805
2142
|
{
|
1806
2143
|
bool temp = true;
|
1807
2144
|
|
@@ -1811,6 +2148,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1811
2148
|
if (ml.get_key(LLM_KV_TOKENIZER_ADD_EOS, temp, false)) {
|
1812
2149
|
add_eos = temp;
|
1813
2150
|
}
|
2151
|
+
if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) {
|
2152
|
+
add_sep = temp;
|
2153
|
+
}
|
1814
2154
|
}
|
1815
2155
|
|
1816
2156
|
// auto-detect special tokens by text
|
@@ -1829,6 +2169,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1829
2169
|
|| t.first == "<EOT>"
|
1830
2170
|
|| t.first == "_<EOT>"
|
1831
2171
|
|| t.first == "<|end▁of▁sentence|>" // DeepSeek
|
2172
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
1832
2173
|
) {
|
1833
2174
|
special_eot_id = t.second;
|
1834
2175
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1862,6 +2203,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1862
2203
|
|| t.first == "<|fim▁begin|>" // DeepSeek
|
1863
2204
|
|| t.first == "<PRE>"
|
1864
2205
|
|| t.first == "▁<PRE>" // CodeLlama
|
2206
|
+
|| t.first == "<|code_prefix|>" // GLM-4.5
|
1865
2207
|
) {
|
1866
2208
|
special_fim_pre_id = t.second;
|
1867
2209
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1881,6 +2223,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1881
2223
|
|| t.first == "<|fim▁hole|>" // DeepSeek
|
1882
2224
|
|| t.first == "<SUF>"
|
1883
2225
|
|| t.first == "▁<SUF>" // CodeLlama
|
2226
|
+
|| t.first == "<|code_suffix|>" // GLM-4.5
|
1884
2227
|
) {
|
1885
2228
|
special_fim_suf_id = t.second;
|
1886
2229
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1900,6 +2243,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1900
2243
|
|| t.first == "<|fim▁end|>" // DeepSeek
|
1901
2244
|
|| t.first == "<MID>"
|
1902
2245
|
|| t.first == "▁<MID>" // CodeLlama
|
2246
|
+
|| t.first == "<|code_middle|>" // GLM-4.5
|
1903
2247
|
) {
|
1904
2248
|
special_fim_mid_id = t.second;
|
1905
2249
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -1982,11 +2326,15 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1982
2326
|
|| t.first == "<|eot_id|>"
|
1983
2327
|
|| t.first == "<|im_end|>"
|
1984
2328
|
|| t.first == "<|end|>"
|
2329
|
+
|| t.first == "<|return|>" // o200k_harmony
|
2330
|
+
|| t.first == "<|call|>" // o200k_harmony
|
1985
2331
|
|| t.first == "<end_of_turn>"
|
1986
2332
|
|| t.first == "<|endoftext|>"
|
1987
2333
|
|| t.first == "<|eom_id|>"
|
1988
2334
|
|| t.first == "<EOT>"
|
1989
2335
|
|| t.first == "_<EOT>"
|
2336
|
+
|| t.first == "<|end_of_text|>"
|
2337
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
1990
2338
|
) {
|
1991
2339
|
special_eog_ids.insert(t.second);
|
1992
2340
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -2003,6 +2351,13 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2003
2351
|
}
|
2004
2352
|
}
|
2005
2353
|
|
2354
|
+
// @ngxson : quick hack for gpt-oss, always render these tokens
|
2355
|
+
for (const auto & t : token_to_id) {
|
2356
|
+
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
|
2357
|
+
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
2358
|
+
}
|
2359
|
+
}
|
2360
|
+
|
2006
2361
|
// sanity checks
|
2007
2362
|
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
|
2008
2363
|
special_eog_ids.insert(special_eos_id);
|
@@ -2018,6 +2373,37 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2018
2373
|
special_eog_ids.insert(special_eom_id);
|
2019
2374
|
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
2020
2375
|
}
|
2376
|
+
|
2377
|
+
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
|
2378
|
+
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
|
2379
|
+
// we remove the "<|end|>" token from the EOG list
|
2380
|
+
{
|
2381
|
+
bool has_return = false;
|
2382
|
+
bool has_call = false;
|
2383
|
+
bool has_end = false;
|
2384
|
+
|
2385
|
+
llama_token end_id = LLAMA_TOKEN_NULL;
|
2386
|
+
|
2387
|
+
LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__);
|
2388
|
+
for (auto tid : special_eog_ids) {
|
2389
|
+
LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, id_to_token[tid].text.c_str());
|
2390
|
+
|
2391
|
+
if (id_to_token[tid].text == "<|return|>") {
|
2392
|
+
has_return = true;
|
2393
|
+
} else if (id_to_token[tid].text == "<|call|>") {
|
2394
|
+
has_call = true;
|
2395
|
+
} else if (id_to_token[tid].text == "<|end|>") {
|
2396
|
+
has_end = true;
|
2397
|
+
end_id = tid;
|
2398
|
+
}
|
2399
|
+
}
|
2400
|
+
|
2401
|
+
if (has_return && has_call && has_end) {
|
2402
|
+
special_eog_ids.erase(end_id);
|
2403
|
+
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
2404
|
+
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
2405
|
+
}
|
2406
|
+
}
|
2021
2407
|
}
|
2022
2408
|
|
2023
2409
|
// build special tokens cache
|
@@ -2059,9 +2445,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2059
2445
|
//NOTE: Per token attributes are missing from the GGUF file.
|
2060
2446
|
//TODO: Extract attributes from GGUF file.
|
2061
2447
|
{
|
2062
|
-
auto _contains_any = [] (const std::string & str, const std::vector<std::
|
2448
|
+
auto _contains_any = [] (const std::string & str, const std::vector<std::string_view> & substrs) -> bool {
|
2063
2449
|
for (const auto & substr : substrs) {
|
2064
|
-
if (str.find(substr)
|
2450
|
+
if (str.find(substr) != std::string::npos) {
|
2065
2451
|
return true;
|
2066
2452
|
}
|
2067
2453
|
}
|
@@ -2080,9 +2466,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2080
2466
|
|
2081
2467
|
std::string model_name;
|
2082
2468
|
std::string tokenizer_pre;
|
2469
|
+
std::string general_arch;
|
2083
2470
|
|
2084
2471
|
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
|
2085
2472
|
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
2473
|
+
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
|
2086
2474
|
|
2087
2475
|
// model name to lowercase
|
2088
2476
|
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
|
@@ -2091,9 +2479,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2091
2479
|
}
|
2092
2480
|
);
|
2093
2481
|
|
2094
|
-
// set attributes by model/tokenizer name
|
2095
|
-
if (
|
2096
|
-
|
2482
|
+
// set attributes by model/tokenizer/architecture name
|
2483
|
+
if (false
|
2484
|
+
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
2485
|
+
|| _contains_any(general_arch, {"nomic-bert-moe", "jina-bert-v3"})
|
2486
|
+
) {
|
2487
|
+
if (token_to_id.count("<mask>") == 0) {
|
2488
|
+
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
|
2489
|
+
} else {
|
2490
|
+
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
2491
|
+
}
|
2097
2492
|
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
|
2098
2493
|
for (auto id : cache_special_tokens) {
|
2099
2494
|
_set_tokenid_attr(id, LLAMA_TOKEN_ATTR_RSTRIP, true);
|
@@ -2114,13 +2509,14 @@ enum llama_vocab_type llama_vocab::impl::get_type() const {
|
|
2114
2509
|
|
2115
2510
|
std::string llama_vocab::impl::type_name() const{
|
2116
2511
|
switch (type) {
|
2117
|
-
case LLAMA_VOCAB_TYPE_NONE:
|
2118
|
-
case LLAMA_VOCAB_TYPE_SPM:
|
2119
|
-
case LLAMA_VOCAB_TYPE_BPE:
|
2120
|
-
case LLAMA_VOCAB_TYPE_WPM:
|
2121
|
-
case LLAMA_VOCAB_TYPE_UGM:
|
2122
|
-
case LLAMA_VOCAB_TYPE_RWKV:
|
2123
|
-
|
2512
|
+
case LLAMA_VOCAB_TYPE_NONE: return "no vocab";
|
2513
|
+
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
|
2514
|
+
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
|
2515
|
+
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
|
2516
|
+
case LLAMA_VOCAB_TYPE_UGM: return "UGM";
|
2517
|
+
case LLAMA_VOCAB_TYPE_RWKV: return "RWKV";
|
2518
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2";
|
2519
|
+
default: return "unknown";
|
2124
2520
|
}
|
2125
2521
|
}
|
2126
2522
|
|
@@ -2203,6 +2599,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
|
|
2203
2599
|
case LLAMA_VOCAB_TYPE_RWKV:
|
2204
2600
|
tokenizer = std::make_unique<llm_tokenizer_rwkv>(vocab);
|
2205
2601
|
break;
|
2602
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
2603
|
+
tokenizer = std::make_unique<llm_tokenizer_plamo2>(vocab);
|
2604
|
+
break;
|
2206
2605
|
default:
|
2207
2606
|
GGML_ABORT("unsupported vocab type");
|
2208
2607
|
}
|
@@ -2535,6 +2934,23 @@ std::vector<llama_token> llama_vocab::impl::tokenize(
|
|
2535
2934
|
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
2536
2935
|
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
2537
2936
|
|
2937
|
+
#ifdef PRETOKENIZERDEBUG
|
2938
|
+
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
2939
|
+
#endif
|
2940
|
+
|
2941
|
+
session.tokenize(text, output);
|
2942
|
+
} else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
|
2943
|
+
output.push_back(fragment.token);
|
2944
|
+
}
|
2945
|
+
}
|
2946
|
+
} break;
|
2947
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
2948
|
+
{
|
2949
|
+
llm_tokenizer_plamo2_session session(*static_cast<const llm_tokenizer_plamo2 *>(tokenizer.get()));
|
2950
|
+
for (const auto & fragment : fragment_buffer) {
|
2951
|
+
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
2952
|
+
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
2953
|
+
|
2538
2954
|
#ifdef PRETOKENIZERDEBUG
|
2539
2955
|
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
2540
2956
|
#endif
|
@@ -2563,6 +2979,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
|
|
2563
2979
|
// copy piece chars to output text buffer
|
2564
2980
|
// skip up to 'lstrip' leading spaces before copying
|
2565
2981
|
auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
|
2982
|
+
if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
|
2983
|
+
GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
|
2984
|
+
}
|
2985
|
+
|
2566
2986
|
for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
|
2567
2987
|
token++;
|
2568
2988
|
size--;
|
@@ -2629,6 +3049,24 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
|
|
2629
3049
|
memcpy(buf, result.data(), result.size());
|
2630
3050
|
return (int)result.size();
|
2631
3051
|
}
|
3052
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
3053
|
+
// PLaMo-2 uses similar token handling as BPE/SPM
|
3054
|
+
if (vocab.is_byte(token)) {
|
3055
|
+
// Handle byte tokens like <0xXX>
|
3056
|
+
if (token_text.length() == 6 && token_text.substr(0, 3) == "<0x" && token_text.back() == '>') {
|
3057
|
+
int hex_val = std::stoi(token_text.substr(3, 2), nullptr, 16);
|
3058
|
+
if (length < 1) {
|
3059
|
+
return -1;
|
3060
|
+
}
|
3061
|
+
buf[0] = static_cast<char>(hex_val);
|
3062
|
+
return 1;
|
3063
|
+
}
|
3064
|
+
}
|
3065
|
+
|
3066
|
+
// Normal token - just copy the text
|
3067
|
+
std::string result = token_text;
|
3068
|
+
return _try_copy(result.data(), result.size());
|
3069
|
+
}
|
2632
3070
|
default:
|
2633
3071
|
GGML_ABORT("fatal error");
|
2634
3072
|
}
|
@@ -2759,26 +3197,26 @@ void llama_vocab::impl::print_info() const {
|
|
2759
3197
|
LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (uint32_t) bpe_ranks.size());
|
2760
3198
|
|
2761
3199
|
// special tokens
|
2762
|
-
if (special_bos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, special_bos_id, id_to_token
|
2763
|
-
if (special_eos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, special_eos_id, id_to_token
|
2764
|
-
if (special_eot_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, special_eot_id, id_to_token
|
2765
|
-
if (special_eom_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, special_eom_id, id_to_token
|
2766
|
-
if (special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, special_unk_id, id_to_token
|
2767
|
-
if (special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, special_sep_id, id_to_token
|
2768
|
-
if (special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, special_pad_id, id_to_token
|
2769
|
-
if (special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, special_mask_id, id_to_token
|
2770
|
-
|
2771
|
-
if (linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, linefeed_id, id_to_token
|
2772
|
-
|
2773
|
-
if (special_fim_pre_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, special_fim_pre_id, id_to_token
|
2774
|
-
if (special_fim_suf_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, special_fim_suf_id, id_to_token
|
2775
|
-
if (special_fim_mid_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, special_fim_mid_id, id_to_token
|
2776
|
-
if (special_fim_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, special_fim_pad_id, id_to_token
|
2777
|
-
if (special_fim_rep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, special_fim_rep_id, id_to_token
|
2778
|
-
if (special_fim_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, special_fim_sep_id, id_to_token
|
3200
|
+
if (special_bos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, special_bos_id, id_to_token.at(special_bos_id).text.c_str() ); }
|
3201
|
+
if (special_eos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, special_eos_id, id_to_token.at(special_eos_id).text.c_str() ); }
|
3202
|
+
if (special_eot_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, special_eot_id, id_to_token.at(special_eot_id).text.c_str() ); }
|
3203
|
+
if (special_eom_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, special_eom_id, id_to_token.at(special_eom_id).text.c_str() ); }
|
3204
|
+
if (special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, special_unk_id, id_to_token.at(special_unk_id).text.c_str() ); }
|
3205
|
+
if (special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, special_sep_id, id_to_token.at(special_sep_id).text.c_str() ); }
|
3206
|
+
if (special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, special_pad_id, id_to_token.at(special_pad_id).text.c_str() ); }
|
3207
|
+
if (special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, special_mask_id, id_to_token.at(special_mask_id).text.c_str() ); }
|
3208
|
+
|
3209
|
+
if (linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, linefeed_id, id_to_token.at(linefeed_id).text.c_str() ); }
|
3210
|
+
|
3211
|
+
if (special_fim_pre_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, special_fim_pre_id, id_to_token.at(special_fim_pre_id).text.c_str() ); }
|
3212
|
+
if (special_fim_suf_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, special_fim_suf_id, id_to_token.at(special_fim_suf_id).text.c_str() ); }
|
3213
|
+
if (special_fim_mid_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, special_fim_mid_id, id_to_token.at(special_fim_mid_id).text.c_str() ); }
|
3214
|
+
if (special_fim_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, special_fim_pad_id, id_to_token.at(special_fim_pad_id).text.c_str() ); }
|
3215
|
+
if (special_fim_rep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, special_fim_rep_id, id_to_token.at(special_fim_rep_id).text.c_str() ); }
|
3216
|
+
if (special_fim_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, special_fim_sep_id, id_to_token.at(special_fim_sep_id).text.c_str() ); }
|
2779
3217
|
|
2780
3218
|
for (const auto & id : special_eog_ids) {
|
2781
|
-
LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, id_to_token
|
3219
|
+
LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, id_to_token.at(id).text.c_str() );
|
2782
3220
|
}
|
2783
3221
|
|
2784
3222
|
LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, max_token_len);
|
@@ -2873,6 +3311,12 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const {
|
|
2873
3311
|
case LLAMA_VOCAB_TYPE_BPE: {
|
2874
3312
|
return pimpl->token_to_id.at(unicode_byte_to_utf8(ch));
|
2875
3313
|
}
|
3314
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
3315
|
+
// PLaMo-2 uses byte tokens in format <0xXX>
|
3316
|
+
char hex_str[8];
|
3317
|
+
snprintf(hex_str, sizeof(hex_str), "<0x%02X>", ch);
|
3318
|
+
return pimpl->token_to_id.at(hex_str);
|
3319
|
+
}
|
2876
3320
|
default:
|
2877
3321
|
GGML_ABORT("fatal error");
|
2878
3322
|
}
|
@@ -2974,6 +3418,10 @@ llama_token llama_vocab::token_fim_sep() const {
|
|
2974
3418
|
return pimpl->special_fim_sep_id;
|
2975
3419
|
}
|
2976
3420
|
|
3421
|
+
llama_token llama_vocab::token_mask() const {
|
3422
|
+
return pimpl->special_mask_id;
|
3423
|
+
}
|
3424
|
+
|
2977
3425
|
bool llama_vocab::get_add_space_prefix() const {
|
2978
3426
|
return pimpl->add_space_prefix;
|
2979
3427
|
}
|
@@ -2986,6 +3434,10 @@ bool llama_vocab::get_add_eos() const {
|
|
2986
3434
|
return pimpl->add_eos;
|
2987
3435
|
}
|
2988
3436
|
|
3437
|
+
bool llama_vocab::get_add_sep() const {
|
3438
|
+
return pimpl->add_sep;
|
3439
|
+
}
|
3440
|
+
|
2989
3441
|
bool llama_vocab::get_ignore_merges() const {
|
2990
3442
|
return pimpl->ignore_merges;
|
2991
3443
|
}
|
@@ -3046,6 +3498,11 @@ int32_t llama_vocab::tokenize(
|
|
3046
3498
|
bool add_special,
|
3047
3499
|
bool parse_special) const {
|
3048
3500
|
auto res = tokenize(std::string(text, text_len), add_special, parse_special);
|
3501
|
+
if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
|
3502
|
+
LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
|
3503
|
+
return std::numeric_limits<int32_t>::min();
|
3504
|
+
}
|
3505
|
+
|
3049
3506
|
if (n_tokens_max < (int) res.size()) {
|
3050
3507
|
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
3051
3508
|
return -((int) res.size());
|
@@ -3177,6 +3634,10 @@ bool llama_vocab_get_add_eos(const struct llama_vocab * vocab) {
|
|
3177
3634
|
return vocab->get_add_eos();
|
3178
3635
|
}
|
3179
3636
|
|
3637
|
+
bool llama_vocab_get_add_sep(const struct llama_vocab * vocab) {
|
3638
|
+
return vocab->get_add_sep();
|
3639
|
+
}
|
3640
|
+
|
3180
3641
|
llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab) {
|
3181
3642
|
return vocab->token_fim_pre();
|
3182
3643
|
}
|
@@ -3201,6 +3662,10 @@ llama_token llama_vocab_fim_sep(const struct llama_vocab * vocab) {
|
|
3201
3662
|
return vocab->token_fim_sep();
|
3202
3663
|
}
|
3203
3664
|
|
3665
|
+
llama_token llama_vocab_mask(const struct llama_vocab* vocab) {
|
3666
|
+
return vocab->token_mask();
|
3667
|
+
}
|
3668
|
+
|
3204
3669
|
// deprecated
|
3205
3670
|
const char * llama_token_get_text(const struct llama_vocab * vocab, llama_token token) {
|
3206
3671
|
return llama_vocab_get_text(vocab, token);
|
@@ -3337,4 +3802,3 @@ int32_t llama_detokenize(
|
|
3337
3802
|
bool unparse_special) {
|
3338
3803
|
return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
|
3339
3804
|
}
|
3340
|
-
|