@novastera-oss/llamarn 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/common/arg.cpp +7 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +1 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +118 -20
- package/cpp/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +83 -102
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +192 -67
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +211 -33
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +54 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +84 -31
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +227 -41
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +362 -182
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +240 -535
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +45 -54
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +57 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -13
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +76 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +21 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +64 -0
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -3
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +55 -0
- package/cpp/llama.cpp/src/llama-arch.h +18 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +570 -359
- package/cpp/llama.cpp/src/llama-batch.h +98 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +11 -6
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +199 -252
- package/cpp/llama.cpp/src/llama-graph.h +44 -32
- package/cpp/llama.cpp/src/llama-hparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-hparams.h +8 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +51 -53
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +19 -24
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +110 -104
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +17 -22
- package/cpp/llama.cpp/src/llama-kv-cells.h +35 -11
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +66 -67
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +16 -21
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +69 -68
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.h +18 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1006 -472
- package/cpp/llama.cpp/src/llama-model.h +22 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +87 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +26 -3
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/common.h +1 -0
- package/ios/include/llama.h +8 -3
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
|
@@ -101,6 +101,7 @@ extern "C" {
|
|
|
101
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
102
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
103
|
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
104
|
+
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
|
|
104
105
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
105
106
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
106
107
|
|
|
@@ -133,6 +134,7 @@ extern "C" {
|
|
|
133
134
|
|
|
134
135
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
135
136
|
|
|
137
|
+
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
136
138
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
|
137
139
|
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
|
138
140
|
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
|
@@ -470,6 +470,7 @@ extern "C" {
|
|
|
470
470
|
GGML_OP_TRANSPOSE,
|
|
471
471
|
GGML_OP_GET_ROWS,
|
|
472
472
|
GGML_OP_GET_ROWS_BACK,
|
|
473
|
+
GGML_OP_SET_ROWS,
|
|
473
474
|
GGML_OP_DIAG,
|
|
474
475
|
GGML_OP_DIAG_MASK_INF,
|
|
475
476
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -489,6 +490,7 @@ extern "C" {
|
|
|
489
490
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
490
491
|
GGML_OP_PAD,
|
|
491
492
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
|
+
GGML_OP_ROLL,
|
|
492
494
|
GGML_OP_ARANGE,
|
|
493
495
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
496
|
GGML_OP_ARGSORT,
|
|
@@ -686,6 +688,9 @@ extern "C" {
|
|
|
686
688
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
687
689
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
688
690
|
|
|
691
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
692
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
693
|
+
|
|
689
694
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
690
695
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
696
|
|
|
@@ -1374,6 +1379,23 @@ extern "C" {
|
|
|
1374
1379
|
struct ggml_tensor * b, // row indices
|
|
1375
1380
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1376
1381
|
|
|
1382
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1383
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1384
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1385
|
+
//
|
|
1386
|
+
// undefined behavior if destination rows overlap
|
|
1387
|
+
//
|
|
1388
|
+
// broadcast:
|
|
1389
|
+
// ne2 % ne11 == 0
|
|
1390
|
+
// ne3 % ne12 == 0
|
|
1391
|
+
//
|
|
1392
|
+
// return view(a)
|
|
1393
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1394
|
+
struct ggml_context * ctx,
|
|
1395
|
+
struct ggml_tensor * a, // destination
|
|
1396
|
+
struct ggml_tensor * b, // source
|
|
1397
|
+
struct ggml_tensor * c); // row indices
|
|
1398
|
+
|
|
1377
1399
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1378
1400
|
struct ggml_context * ctx,
|
|
1379
1401
|
struct ggml_tensor * a);
|
|
@@ -1801,6 +1823,17 @@ extern "C" {
|
|
|
1801
1823
|
int p0,
|
|
1802
1824
|
int p1);
|
|
1803
1825
|
|
|
1826
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1827
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1828
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1829
|
+
struct ggml_context * ctx,
|
|
1830
|
+
struct ggml_tensor * a,
|
|
1831
|
+
int shift0,
|
|
1832
|
+
int shift1,
|
|
1833
|
+
int shift2,
|
|
1834
|
+
int shift3);
|
|
1835
|
+
|
|
1836
|
+
|
|
1804
1837
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1805
1838
|
// timesteps: [N,]
|
|
1806
1839
|
// return: [N, dim]
|
|
@@ -390,6 +390,7 @@ extern "C" {
|
|
|
390
390
|
void * imatrix; // pointer to importance matrix data
|
|
391
391
|
void * kv_overrides; // pointer to vector containing overrides
|
|
392
392
|
void * tensor_types; // pointer to vector containing tensor types
|
|
393
|
+
void * prune_layers; // pointer to vector containing layer indices to prune
|
|
393
394
|
} llama_model_quantize_params;
|
|
394
395
|
|
|
395
396
|
typedef struct llama_logit_bias {
|
|
@@ -943,12 +944,14 @@ extern "C" {
|
|
|
943
944
|
// Requires the context to have a memory.
|
|
944
945
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
945
946
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
946
|
-
// Upon
|
|
947
|
+
// Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
|
|
948
|
+
// To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
|
|
949
|
+
// Upon other return values, the memory state is restored to the state before this call
|
|
947
950
|
// 0 - success
|
|
948
951
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
949
|
-
// 2 - aborted
|
|
952
|
+
// 2 - aborted (processed ubatches will remain in the context's memory)
|
|
950
953
|
// -1 - invalid input batch
|
|
951
|
-
// < -1 - error
|
|
954
|
+
// < -1 - fatal error (processed ubatches will remain in the context's memory)
|
|
952
955
|
LLAMA_API int32_t llama_decode(
|
|
953
956
|
struct llama_context * ctx,
|
|
954
957
|
struct llama_batch batch);
|
|
@@ -1044,6 +1047,7 @@ extern "C" {
|
|
|
1044
1047
|
|
|
1045
1048
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1046
1049
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
1050
|
+
LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
|
|
1047
1051
|
|
|
1048
1052
|
LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
|
|
1049
1053
|
LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
|
|
@@ -1087,6 +1091,7 @@ extern "C" {
|
|
|
1087
1091
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
|
1088
1092
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
|
1089
1093
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
|
1094
|
+
/// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
|
|
1090
1095
|
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
|
1091
1096
|
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
|
1092
1097
|
/// as plaintext. Does not insert a leading space.
|
package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h
CHANGED
|
@@ -101,6 +101,7 @@ extern "C" {
|
|
|
101
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
102
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
103
|
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
104
|
+
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
|
|
104
105
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
105
106
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
106
107
|
|
|
@@ -133,6 +134,7 @@ extern "C" {
|
|
|
133
134
|
|
|
134
135
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
135
136
|
|
|
137
|
+
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
136
138
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
|
137
139
|
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
|
138
140
|
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h
CHANGED
|
@@ -470,6 +470,7 @@ extern "C" {
|
|
|
470
470
|
GGML_OP_TRANSPOSE,
|
|
471
471
|
GGML_OP_GET_ROWS,
|
|
472
472
|
GGML_OP_GET_ROWS_BACK,
|
|
473
|
+
GGML_OP_SET_ROWS,
|
|
473
474
|
GGML_OP_DIAG,
|
|
474
475
|
GGML_OP_DIAG_MASK_INF,
|
|
475
476
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -489,6 +490,7 @@ extern "C" {
|
|
|
489
490
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
490
491
|
GGML_OP_PAD,
|
|
491
492
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
|
+
GGML_OP_ROLL,
|
|
492
494
|
GGML_OP_ARANGE,
|
|
493
495
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
496
|
GGML_OP_ARGSORT,
|
|
@@ -686,6 +688,9 @@ extern "C" {
|
|
|
686
688
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
687
689
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
688
690
|
|
|
691
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
692
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
693
|
+
|
|
689
694
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
690
695
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
696
|
|
|
@@ -1374,6 +1379,23 @@ extern "C" {
|
|
|
1374
1379
|
struct ggml_tensor * b, // row indices
|
|
1375
1380
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1376
1381
|
|
|
1382
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1383
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1384
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1385
|
+
//
|
|
1386
|
+
// undefined behavior if destination rows overlap
|
|
1387
|
+
//
|
|
1388
|
+
// broadcast:
|
|
1389
|
+
// ne2 % ne11 == 0
|
|
1390
|
+
// ne3 % ne12 == 0
|
|
1391
|
+
//
|
|
1392
|
+
// return view(a)
|
|
1393
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1394
|
+
struct ggml_context * ctx,
|
|
1395
|
+
struct ggml_tensor * a, // destination
|
|
1396
|
+
struct ggml_tensor * b, // source
|
|
1397
|
+
struct ggml_tensor * c); // row indices
|
|
1398
|
+
|
|
1377
1399
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1378
1400
|
struct ggml_context * ctx,
|
|
1379
1401
|
struct ggml_tensor * a);
|
|
@@ -1801,6 +1823,17 @@ extern "C" {
|
|
|
1801
1823
|
int p0,
|
|
1802
1824
|
int p1);
|
|
1803
1825
|
|
|
1826
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1827
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1828
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1829
|
+
struct ggml_context * ctx,
|
|
1830
|
+
struct ggml_tensor * a,
|
|
1831
|
+
int shift0,
|
|
1832
|
+
int shift1,
|
|
1833
|
+
int shift2,
|
|
1834
|
+
int shift3);
|
|
1835
|
+
|
|
1836
|
+
|
|
1804
1837
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1805
1838
|
// timesteps: [N,]
|
|
1806
1839
|
// return: [N, dim]
|
package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h
CHANGED
|
@@ -390,6 +390,7 @@ extern "C" {
|
|
|
390
390
|
void * imatrix; // pointer to importance matrix data
|
|
391
391
|
void * kv_overrides; // pointer to vector containing overrides
|
|
392
392
|
void * tensor_types; // pointer to vector containing tensor types
|
|
393
|
+
void * prune_layers; // pointer to vector containing layer indices to prune
|
|
393
394
|
} llama_model_quantize_params;
|
|
394
395
|
|
|
395
396
|
typedef struct llama_logit_bias {
|
|
@@ -943,12 +944,14 @@ extern "C" {
|
|
|
943
944
|
// Requires the context to have a memory.
|
|
944
945
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
945
946
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
946
|
-
// Upon
|
|
947
|
+
// Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
|
|
948
|
+
// To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
|
|
949
|
+
// Upon other return values, the memory state is restored to the state before this call
|
|
947
950
|
// 0 - success
|
|
948
951
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
949
|
-
// 2 - aborted
|
|
952
|
+
// 2 - aborted (processed ubatches will remain in the context's memory)
|
|
950
953
|
// -1 - invalid input batch
|
|
951
|
-
// < -1 - error
|
|
954
|
+
// < -1 - fatal error (processed ubatches will remain in the context's memory)
|
|
952
955
|
LLAMA_API int32_t llama_decode(
|
|
953
956
|
struct llama_context * ctx,
|
|
954
957
|
struct llama_batch batch);
|
|
@@ -1044,6 +1047,7 @@ extern "C" {
|
|
|
1044
1047
|
|
|
1045
1048
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1046
1049
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
1050
|
+
LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
|
|
1047
1051
|
|
|
1048
1052
|
LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
|
|
1049
1053
|
LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
|
|
@@ -1087,6 +1091,7 @@ extern "C" {
|
|
|
1087
1091
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
|
1088
1092
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
|
1089
1093
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
|
1094
|
+
/// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
|
|
1090
1095
|
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
|
1091
1096
|
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
|
1092
1097
|
/// as plaintext. Does not insert a leading space.
|
|
Binary file
|
|
@@ -101,6 +101,7 @@ extern "C" {
|
|
|
101
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
102
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
103
|
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
104
|
+
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
|
|
104
105
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
105
106
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
106
107
|
|
|
@@ -133,6 +134,7 @@ extern "C" {
|
|
|
133
134
|
|
|
134
135
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
135
136
|
|
|
137
|
+
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
|
|
136
138
|
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
|
137
139
|
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
|
138
140
|
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
|
@@ -470,6 +470,7 @@ extern "C" {
|
|
|
470
470
|
GGML_OP_TRANSPOSE,
|
|
471
471
|
GGML_OP_GET_ROWS,
|
|
472
472
|
GGML_OP_GET_ROWS_BACK,
|
|
473
|
+
GGML_OP_SET_ROWS,
|
|
473
474
|
GGML_OP_DIAG,
|
|
474
475
|
GGML_OP_DIAG_MASK_INF,
|
|
475
476
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -489,6 +490,7 @@ extern "C" {
|
|
|
489
490
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
490
491
|
GGML_OP_PAD,
|
|
491
492
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
|
+
GGML_OP_ROLL,
|
|
492
494
|
GGML_OP_ARANGE,
|
|
493
495
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
496
|
GGML_OP_ARGSORT,
|
|
@@ -686,6 +688,9 @@ extern "C" {
|
|
|
686
688
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
687
689
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
688
690
|
|
|
691
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
692
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
693
|
+
|
|
689
694
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
690
695
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
696
|
|
|
@@ -1374,6 +1379,23 @@ extern "C" {
|
|
|
1374
1379
|
struct ggml_tensor * b, // row indices
|
|
1375
1380
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1376
1381
|
|
|
1382
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1383
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1384
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1385
|
+
//
|
|
1386
|
+
// undefined behavior if destination rows overlap
|
|
1387
|
+
//
|
|
1388
|
+
// broadcast:
|
|
1389
|
+
// ne2 % ne11 == 0
|
|
1390
|
+
// ne3 % ne12 == 0
|
|
1391
|
+
//
|
|
1392
|
+
// return view(a)
|
|
1393
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1394
|
+
struct ggml_context * ctx,
|
|
1395
|
+
struct ggml_tensor * a, // destination
|
|
1396
|
+
struct ggml_tensor * b, // source
|
|
1397
|
+
struct ggml_tensor * c); // row indices
|
|
1398
|
+
|
|
1377
1399
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1378
1400
|
struct ggml_context * ctx,
|
|
1379
1401
|
struct ggml_tensor * a);
|
|
@@ -1801,6 +1823,17 @@ extern "C" {
|
|
|
1801
1823
|
int p0,
|
|
1802
1824
|
int p1);
|
|
1803
1825
|
|
|
1826
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1827
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1828
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1829
|
+
struct ggml_context * ctx,
|
|
1830
|
+
struct ggml_tensor * a,
|
|
1831
|
+
int shift0,
|
|
1832
|
+
int shift1,
|
|
1833
|
+
int shift2,
|
|
1834
|
+
int shift3);
|
|
1835
|
+
|
|
1836
|
+
|
|
1804
1837
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1805
1838
|
// timesteps: [N,]
|
|
1806
1839
|
// return: [N, dim]
|
|
@@ -390,6 +390,7 @@ extern "C" {
|
|
|
390
390
|
void * imatrix; // pointer to importance matrix data
|
|
391
391
|
void * kv_overrides; // pointer to vector containing overrides
|
|
392
392
|
void * tensor_types; // pointer to vector containing tensor types
|
|
393
|
+
void * prune_layers; // pointer to vector containing layer indices to prune
|
|
393
394
|
} llama_model_quantize_params;
|
|
394
395
|
|
|
395
396
|
typedef struct llama_logit_bias {
|
|
@@ -943,12 +944,14 @@ extern "C" {
|
|
|
943
944
|
// Requires the context to have a memory.
|
|
944
945
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
945
946
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
946
|
-
// Upon
|
|
947
|
+
// Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
|
|
948
|
+
// To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
|
|
949
|
+
// Upon other return values, the memory state is restored to the state before this call
|
|
947
950
|
// 0 - success
|
|
948
951
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
949
|
-
// 2 - aborted
|
|
952
|
+
// 2 - aborted (processed ubatches will remain in the context's memory)
|
|
950
953
|
// -1 - invalid input batch
|
|
951
|
-
// < -1 - error
|
|
954
|
+
// < -1 - fatal error (processed ubatches will remain in the context's memory)
|
|
952
955
|
LLAMA_API int32_t llama_decode(
|
|
953
956
|
struct llama_context * ctx,
|
|
954
957
|
struct llama_batch batch);
|
|
@@ -1044,6 +1047,7 @@ extern "C" {
|
|
|
1044
1047
|
|
|
1045
1048
|
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
|
1046
1049
|
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
|
1050
|
+
LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
|
|
1047
1051
|
|
|
1048
1052
|
LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
|
|
1049
1053
|
LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
|
|
@@ -1087,6 +1091,7 @@ extern "C" {
|
|
|
1087
1091
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
|
1088
1092
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
|
1089
1093
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
|
1094
|
+
/// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
|
|
1090
1095
|
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
|
1091
1096
|
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
|
1092
1097
|
/// as plaintext. Does not insert a leading space.
|
package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama
CHANGED
|
Binary file
|
|
Binary file
|