@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -968,6 +968,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
968
968
|
"GET_REL_POS",
|
|
969
969
|
"ADD_REL_POS",
|
|
970
970
|
"RWKV_WKV6",
|
|
971
|
+
"GATED_LINEAR_ATTN",
|
|
971
972
|
|
|
972
973
|
"UNARY",
|
|
973
974
|
|
|
@@ -987,7 +988,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
987
988
|
"OPT_STEP_ADAMW",
|
|
988
989
|
};
|
|
989
990
|
|
|
990
|
-
static_assert(GGML_OP_COUNT ==
|
|
991
|
+
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
|
|
991
992
|
|
|
992
993
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
993
994
|
"none",
|
|
@@ -1064,6 +1065,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1064
1065
|
"get_rel_pos(x)",
|
|
1065
1066
|
"add_rel_pos(x)",
|
|
1066
1067
|
"rwkv_wkv6(k, v, r, tf, td, s)",
|
|
1068
|
+
"gated_linear_attn(k, v, q, gate, s)",
|
|
1067
1069
|
|
|
1068
1070
|
"unary(x)",
|
|
1069
1071
|
|
|
@@ -1083,7 +1085,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1083
1085
|
"adamw(x)",
|
|
1084
1086
|
};
|
|
1085
1087
|
|
|
1086
|
-
static_assert(GGML_OP_COUNT ==
|
|
1088
|
+
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
|
|
1087
1089
|
|
|
1088
1090
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1089
1091
|
|
|
@@ -1588,15 +1590,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
|
1588
1590
|
|
|
1589
1591
|
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
|
1590
1592
|
|
|
1591
|
-
#ifdef __clang__
|
|
1592
|
-
// temporary until ggml_tensor::backend is removed
|
|
1593
|
-
#pragma clang diagnostic push
|
|
1594
|
-
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
1595
|
-
#endif
|
|
1596
|
-
|
|
1597
1593
|
*result = (struct ggml_tensor) {
|
|
1598
1594
|
/*.type =*/ type,
|
|
1599
|
-
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
|
1600
1595
|
/*.buffer =*/ NULL,
|
|
1601
1596
|
/*.ne =*/ { 1, 1, 1, 1 },
|
|
1602
1597
|
/*.nb =*/ { 0, 0, 0, 0 },
|
|
@@ -1612,10 +1607,6 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
|
1612
1607
|
/*.padding =*/ { 0 },
|
|
1613
1608
|
};
|
|
1614
1609
|
|
|
1615
|
-
#ifdef __clang__
|
|
1616
|
-
#pragma clang diagnostic pop
|
|
1617
|
-
#endif
|
|
1618
|
-
|
|
1619
1610
|
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
|
|
1620
1611
|
//GGML_ASSERT_ALIGNED(result->data);
|
|
1621
1612
|
|
|
@@ -3459,12 +3450,14 @@ struct ggml_tensor * ggml_soft_max_ext(
|
|
|
3459
3450
|
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
|
|
3460
3451
|
}
|
|
3461
3452
|
|
|
3462
|
-
//
|
|
3453
|
+
// ggml_soft_max_ext_back
|
|
3463
3454
|
|
|
3464
|
-
static struct ggml_tensor *
|
|
3455
|
+
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
|
|
3465
3456
|
struct ggml_context * ctx,
|
|
3466
3457
|
struct ggml_tensor * a,
|
|
3467
3458
|
struct ggml_tensor * b,
|
|
3459
|
+
float scale,
|
|
3460
|
+
float max_bias,
|
|
3468
3461
|
bool inplace) {
|
|
3469
3462
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
3470
3463
|
|
|
@@ -3472,21 +3465,28 @@ static struct ggml_tensor * ggml_soft_max_back_impl(
|
|
|
3472
3465
|
result->src[0] = a;
|
|
3473
3466
|
result->src[1] = b;
|
|
3474
3467
|
|
|
3468
|
+
memcpy((float *) result->op_params + 0, &scale, sizeof(float));
|
|
3469
|
+
memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
|
|
3470
|
+
|
|
3475
3471
|
return result;
|
|
3476
3472
|
}
|
|
3477
3473
|
|
|
3478
|
-
struct ggml_tensor *
|
|
3474
|
+
struct ggml_tensor * ggml_soft_max_ext_back(
|
|
3479
3475
|
struct ggml_context * ctx,
|
|
3480
3476
|
struct ggml_tensor * a,
|
|
3481
|
-
struct ggml_tensor * b
|
|
3482
|
-
|
|
3477
|
+
struct ggml_tensor * b,
|
|
3478
|
+
float scale,
|
|
3479
|
+
float max_bias) {
|
|
3480
|
+
return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
|
|
3483
3481
|
}
|
|
3484
3482
|
|
|
3485
|
-
struct ggml_tensor *
|
|
3483
|
+
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
|
|
3486
3484
|
struct ggml_context * ctx,
|
|
3487
3485
|
struct ggml_tensor * a,
|
|
3488
|
-
struct ggml_tensor * b
|
|
3489
|
-
|
|
3486
|
+
struct ggml_tensor * b,
|
|
3487
|
+
float scale,
|
|
3488
|
+
float max_bias) {
|
|
3489
|
+
return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
|
|
3490
3490
|
}
|
|
3491
3491
|
|
|
3492
3492
|
// ggml_rope
|
|
@@ -3704,7 +3704,7 @@ void ggml_rope_yarn_corr_dims(
|
|
|
3704
3704
|
|
|
3705
3705
|
// ggml_rope_back
|
|
3706
3706
|
|
|
3707
|
-
struct ggml_tensor *
|
|
3707
|
+
struct ggml_tensor * ggml_rope_ext_back(
|
|
3708
3708
|
struct ggml_context * ctx,
|
|
3709
3709
|
struct ggml_tensor * a,
|
|
3710
3710
|
struct ggml_tensor * b,
|
|
@@ -3718,29 +3718,32 @@ struct ggml_tensor * ggml_rope_back(
|
|
|
3718
3718
|
float attn_factor,
|
|
3719
3719
|
float beta_fast,
|
|
3720
3720
|
float beta_slow) {
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
3726
|
-
|
|
3727
|
-
int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
|
3728
|
-
memcpy(params + 5, &freq_base, sizeof(float));
|
|
3729
|
-
memcpy(params + 6, &freq_scale, sizeof(float));
|
|
3730
|
-
memcpy(params + 7, &ext_factor, sizeof(float));
|
|
3731
|
-
memcpy(params + 8, &attn_factor, sizeof(float));
|
|
3732
|
-
memcpy(params + 9, &beta_fast, sizeof(float));
|
|
3733
|
-
memcpy(params + 10, &beta_slow, sizeof(float));
|
|
3734
|
-
ggml_set_op_params(result, params, sizeof(params));
|
|
3735
|
-
|
|
3736
|
-
result->op = GGML_OP_ROPE_BACK;
|
|
3737
|
-
result->src[0] = a;
|
|
3738
|
-
result->src[1] = b;
|
|
3739
|
-
result->src[2] = c;
|
|
3740
|
-
|
|
3721
|
+
struct ggml_tensor * result = ggml_rope_ext(
|
|
3722
|
+
ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
|
|
3723
|
+
result->op = GGML_OP_ROPE_BACK;
|
|
3741
3724
|
return result;
|
|
3742
3725
|
}
|
|
3743
3726
|
|
|
3727
|
+
struct ggml_tensor * ggml_rope_multi_back(
|
|
3728
|
+
struct ggml_context * ctx,
|
|
3729
|
+
struct ggml_tensor * a,
|
|
3730
|
+
struct ggml_tensor * b,
|
|
3731
|
+
struct ggml_tensor * c,
|
|
3732
|
+
int n_dims,
|
|
3733
|
+
int sections[4],
|
|
3734
|
+
int mode,
|
|
3735
|
+
int n_ctx_orig,
|
|
3736
|
+
float freq_base,
|
|
3737
|
+
float freq_scale,
|
|
3738
|
+
float ext_factor,
|
|
3739
|
+
float attn_factor,
|
|
3740
|
+
float beta_fast,
|
|
3741
|
+
float beta_slow) {
|
|
3742
|
+
struct ggml_tensor * result = ggml_rope_multi(
|
|
3743
|
+
ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
|
|
3744
|
+
result->op = GGML_OP_ROPE_BACK;
|
|
3745
|
+
return result;
|
|
3746
|
+
}
|
|
3744
3747
|
// ggml_clamp
|
|
3745
3748
|
|
|
3746
3749
|
struct ggml_tensor * ggml_clamp(
|
|
@@ -4640,15 +4643,13 @@ struct ggml_tensor * ggml_rwkv_wkv6(
|
|
|
4640
4643
|
GGML_ASSERT(ggml_is_contiguous(state));
|
|
4641
4644
|
|
|
4642
4645
|
const int64_t S = k->ne[0];
|
|
4643
|
-
const int64_t H = k->ne[
|
|
4644
|
-
const int64_t n_tokens = k->ne[
|
|
4646
|
+
const int64_t H = k->ne[1];
|
|
4647
|
+
const int64_t n_tokens = k->ne[2];
|
|
4645
4648
|
const int64_t n_seqs = state->ne[1];
|
|
4646
4649
|
{
|
|
4647
|
-
GGML_ASSERT(
|
|
4648
|
-
GGML_ASSERT(
|
|
4649
|
-
GGML_ASSERT(
|
|
4650
|
-
// TODO: RWKV v4 and v5
|
|
4651
|
-
GGML_ASSERT(td->ne[0] == 1 && td->ne[1] == S && td->ne[2] == H && td->ne[3] == n_tokens);
|
|
4650
|
+
GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
|
|
4651
|
+
GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
|
|
4652
|
+
GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
|
|
4652
4653
|
GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
|
|
4653
4654
|
}
|
|
4654
4655
|
|
|
@@ -4667,6 +4668,49 @@ struct ggml_tensor * ggml_rwkv_wkv6(
|
|
|
4667
4668
|
return result;
|
|
4668
4669
|
}
|
|
4669
4670
|
|
|
4671
|
+
// ggml_gated_linear_attn
|
|
4672
|
+
|
|
4673
|
+
struct ggml_tensor * ggml_gated_linear_attn(
|
|
4674
|
+
struct ggml_context * ctx,
|
|
4675
|
+
struct ggml_tensor * k,
|
|
4676
|
+
struct ggml_tensor * v,
|
|
4677
|
+
struct ggml_tensor * q,
|
|
4678
|
+
struct ggml_tensor * g,
|
|
4679
|
+
struct ggml_tensor * state,
|
|
4680
|
+
float scale) {
|
|
4681
|
+
GGML_ASSERT(ggml_is_contiguous(k));
|
|
4682
|
+
GGML_ASSERT(ggml_is_contiguous(v));
|
|
4683
|
+
GGML_ASSERT(ggml_is_contiguous(q));
|
|
4684
|
+
GGML_ASSERT(ggml_is_contiguous(g));
|
|
4685
|
+
GGML_ASSERT(ggml_is_contiguous(state));
|
|
4686
|
+
|
|
4687
|
+
const int64_t S = k->ne[0];
|
|
4688
|
+
const int64_t H = k->ne[1];
|
|
4689
|
+
const int64_t n_tokens = k->ne[2];
|
|
4690
|
+
const int64_t n_seqs = state->ne[1];
|
|
4691
|
+
{
|
|
4692
|
+
GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
|
|
4693
|
+
GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
|
|
4694
|
+
GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
|
|
4695
|
+
GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
|
|
4696
|
+
}
|
|
4697
|
+
|
|
4698
|
+
// concat output and new_state
|
|
4699
|
+
const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
|
|
4700
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
4701
|
+
|
|
4702
|
+
ggml_set_op_params_f32(result, 0, scale);
|
|
4703
|
+
|
|
4704
|
+
result->op = GGML_OP_GATED_LINEAR_ATTN;
|
|
4705
|
+
result->src[0] = k;
|
|
4706
|
+
result->src[1] = v;
|
|
4707
|
+
result->src[2] = q;
|
|
4708
|
+
result->src[3] = g;
|
|
4709
|
+
result->src[4] = state;
|
|
4710
|
+
|
|
4711
|
+
return result;
|
|
4712
|
+
}
|
|
4713
|
+
|
|
4670
4714
|
// ggml_unary
|
|
4671
4715
|
|
|
4672
4716
|
static struct ggml_tensor * ggml_unary_impl(
|
|
@@ -5041,10 +5085,10 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
|
5041
5085
|
struct ggml_tensor * a,
|
|
5042
5086
|
struct ggml_tensor * b,
|
|
5043
5087
|
struct ggml_tensor * c) {
|
|
5044
|
-
GGML_ASSERT(
|
|
5045
|
-
GGML_ASSERT(
|
|
5088
|
+
GGML_ASSERT(ggml_is_scalar(a));
|
|
5089
|
+
GGML_ASSERT(ggml_are_same_shape(b, c));
|
|
5046
5090
|
|
|
5047
|
-
struct ggml_tensor * result = ggml_dup_tensor(ctx,
|
|
5091
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
|
|
5048
5092
|
|
|
5049
5093
|
result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
|
|
5050
5094
|
result->src[0] = a;
|
|
@@ -5223,7 +5267,7 @@ static void ggml_sub_or_set(
|
|
|
5223
5267
|
}
|
|
5224
5268
|
|
|
5225
5269
|
static void ggml_compute_backward(
|
|
5226
|
-
struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, bool * grads_needed) {
|
|
5270
|
+
struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
|
|
5227
5271
|
struct ggml_tensor * tensor = cgraph->nodes[i];
|
|
5228
5272
|
struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor);
|
|
5229
5273
|
|
|
@@ -5367,7 +5411,7 @@ static void ggml_compute_backward(
|
|
|
5367
5411
|
if (src0_needs_grads) {
|
|
5368
5412
|
float eps;
|
|
5369
5413
|
memcpy(&eps, tensor->op_params, sizeof(float));
|
|
5370
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx,
|
|
5414
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
|
|
5371
5415
|
}
|
|
5372
5416
|
} break;
|
|
5373
5417
|
case GGML_OP_MUL_MAT: {
|
|
@@ -5550,7 +5594,13 @@ static void ggml_compute_backward(
|
|
|
5550
5594
|
} break;
|
|
5551
5595
|
case GGML_OP_SOFT_MAX: {
|
|
5552
5596
|
if (src0_needs_grads) {
|
|
5553
|
-
|
|
5597
|
+
float scale = 1.0f;
|
|
5598
|
+
float max_bias = 0.0f;
|
|
5599
|
+
|
|
5600
|
+
memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float));
|
|
5601
|
+
memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
|
|
5602
|
+
|
|
5603
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
|
|
5554
5604
|
}
|
|
5555
5605
|
GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
|
|
5556
5606
|
} break;
|
|
@@ -5562,6 +5612,7 @@ static void ggml_compute_backward(
|
|
|
5562
5612
|
//const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
|
5563
5613
|
const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
|
|
5564
5614
|
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
|
5615
|
+
int sections[4] = {0, 0, 0, 0};
|
|
5565
5616
|
|
|
5566
5617
|
memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float));
|
|
5567
5618
|
memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float));
|
|
@@ -5569,10 +5620,14 @@ static void ggml_compute_backward(
|
|
|
5569
5620
|
memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float));
|
|
5570
5621
|
memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float));
|
|
5571
5622
|
memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float));
|
|
5572
|
-
|
|
5573
|
-
|
|
5574
|
-
|
|
5575
|
-
|
|
5623
|
+
memcpy(§ions, tensor->op_params + 11, sizeof(sections));
|
|
5624
|
+
|
|
5625
|
+
struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
|
|
5626
|
+
ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
|
|
5627
|
+
mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
|
|
5628
|
+
ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
|
|
5629
|
+
mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
|
|
5630
|
+
ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
|
|
5576
5631
|
}
|
|
5577
5632
|
GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
|
|
5578
5633
|
} break;
|
|
@@ -5586,7 +5641,7 @@ static void ggml_compute_backward(
|
|
|
5586
5641
|
const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
|
|
5587
5642
|
const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
|
|
5588
5643
|
|
|
5589
|
-
ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx,
|
|
5644
|
+
ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
|
|
5590
5645
|
}
|
|
5591
5646
|
} break;
|
|
5592
5647
|
case GGML_OP_POOL_2D: {
|
|
@@ -5629,7 +5684,7 @@ static void ggml_compute_backward(
|
|
|
5629
5684
|
} break;
|
|
5630
5685
|
case GGML_UNARY_OP_SILU: {
|
|
5631
5686
|
if (src0_needs_grads) {
|
|
5632
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx,
|
|
5687
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
|
|
5633
5688
|
}
|
|
5634
5689
|
} break;
|
|
5635
5690
|
case GGML_UNARY_OP_EXP: {
|
|
@@ -5646,7 +5701,7 @@ static void ggml_compute_backward(
|
|
|
5646
5701
|
} break;
|
|
5647
5702
|
case GGML_OP_CROSS_ENTROPY_LOSS: {
|
|
5648
5703
|
if (src0_needs_grads) {
|
|
5649
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, src0, src1
|
|
5704
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
|
|
5650
5705
|
}
|
|
5651
5706
|
GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
|
|
5652
5707
|
} break;
|
|
@@ -6417,1271 +6472,6 @@ size_t ggml_quantize_chunk(
|
|
|
6417
6472
|
|
|
6418
6473
|
////////////////////////////////////////////////////////////////////////////////
|
|
6419
6474
|
|
|
6420
|
-
struct gguf_str {
|
|
6421
|
-
uint64_t n; // GGUFv2
|
|
6422
|
-
char * data;
|
|
6423
|
-
};
|
|
6424
|
-
|
|
6425
|
-
static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
|
6426
|
-
[GGUF_TYPE_UINT8] = sizeof(uint8_t),
|
|
6427
|
-
[GGUF_TYPE_INT8] = sizeof(int8_t),
|
|
6428
|
-
[GGUF_TYPE_UINT16] = sizeof(uint16_t),
|
|
6429
|
-
[GGUF_TYPE_INT16] = sizeof(int16_t),
|
|
6430
|
-
[GGUF_TYPE_UINT32] = sizeof(uint32_t),
|
|
6431
|
-
[GGUF_TYPE_INT32] = sizeof(int32_t),
|
|
6432
|
-
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
|
6433
|
-
[GGUF_TYPE_BOOL] = sizeof(bool),
|
|
6434
|
-
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
|
6435
|
-
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
|
|
6436
|
-
[GGUF_TYPE_INT64] = sizeof(int64_t),
|
|
6437
|
-
[GGUF_TYPE_FLOAT64] = sizeof(double),
|
|
6438
|
-
[GGUF_TYPE_ARRAY] = 0, // undefined
|
|
6439
|
-
};
|
|
6440
|
-
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
|
6441
|
-
|
|
6442
|
-
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
|
6443
|
-
[GGUF_TYPE_UINT8] = "u8",
|
|
6444
|
-
[GGUF_TYPE_INT8] = "i8",
|
|
6445
|
-
[GGUF_TYPE_UINT16] = "u16",
|
|
6446
|
-
[GGUF_TYPE_INT16] = "i16",
|
|
6447
|
-
[GGUF_TYPE_UINT32] = "u32",
|
|
6448
|
-
[GGUF_TYPE_INT32] = "i32",
|
|
6449
|
-
[GGUF_TYPE_FLOAT32] = "f32",
|
|
6450
|
-
[GGUF_TYPE_BOOL] = "bool",
|
|
6451
|
-
[GGUF_TYPE_STRING] = "str",
|
|
6452
|
-
[GGUF_TYPE_ARRAY] = "arr",
|
|
6453
|
-
[GGUF_TYPE_UINT64] = "u64",
|
|
6454
|
-
[GGUF_TYPE_INT64] = "i64",
|
|
6455
|
-
[GGUF_TYPE_FLOAT64] = "f64",
|
|
6456
|
-
};
|
|
6457
|
-
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
|
6458
|
-
|
|
6459
|
-
union gguf_value {
|
|
6460
|
-
uint8_t uint8;
|
|
6461
|
-
int8_t int8;
|
|
6462
|
-
uint16_t uint16;
|
|
6463
|
-
int16_t int16;
|
|
6464
|
-
uint32_t uint32;
|
|
6465
|
-
int32_t int32;
|
|
6466
|
-
float float32;
|
|
6467
|
-
uint64_t uint64;
|
|
6468
|
-
int64_t int64;
|
|
6469
|
-
double float64;
|
|
6470
|
-
bool bool_;
|
|
6471
|
-
|
|
6472
|
-
struct gguf_str str;
|
|
6473
|
-
|
|
6474
|
-
struct {
|
|
6475
|
-
enum gguf_type type;
|
|
6476
|
-
|
|
6477
|
-
uint64_t n; // GGUFv2
|
|
6478
|
-
void * data;
|
|
6479
|
-
} arr;
|
|
6480
|
-
};
|
|
6481
|
-
|
|
6482
|
-
struct gguf_kv {
|
|
6483
|
-
struct gguf_str key;
|
|
6484
|
-
|
|
6485
|
-
enum gguf_type type;
|
|
6486
|
-
union gguf_value value;
|
|
6487
|
-
};
|
|
6488
|
-
|
|
6489
|
-
struct gguf_header {
|
|
6490
|
-
char magic[4];
|
|
6491
|
-
|
|
6492
|
-
uint32_t version;
|
|
6493
|
-
uint64_t n_tensors; // GGUFv2
|
|
6494
|
-
uint64_t n_kv; // GGUFv2
|
|
6495
|
-
};
|
|
6496
|
-
|
|
6497
|
-
struct gguf_tensor_info {
|
|
6498
|
-
struct gguf_str name;
|
|
6499
|
-
|
|
6500
|
-
uint32_t n_dims;
|
|
6501
|
-
uint64_t ne[GGML_MAX_DIMS];
|
|
6502
|
-
|
|
6503
|
-
enum ggml_type type;
|
|
6504
|
-
|
|
6505
|
-
uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
|
|
6506
|
-
|
|
6507
|
-
// for writing API
|
|
6508
|
-
const void * data;
|
|
6509
|
-
size_t size;
|
|
6510
|
-
};
|
|
6511
|
-
|
|
6512
|
-
struct gguf_context {
|
|
6513
|
-
struct gguf_header header;
|
|
6514
|
-
|
|
6515
|
-
struct gguf_kv * kv;
|
|
6516
|
-
struct gguf_tensor_info * infos;
|
|
6517
|
-
|
|
6518
|
-
size_t alignment;
|
|
6519
|
-
size_t offset; // offset of `data` from beginning of file
|
|
6520
|
-
size_t size; // size of `data` in bytes
|
|
6521
|
-
|
|
6522
|
-
//uint8_t * padding;
|
|
6523
|
-
void * data;
|
|
6524
|
-
};
|
|
6525
|
-
|
|
6526
|
-
size_t gguf_type_size(enum gguf_type type) {
|
|
6527
|
-
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
|
6528
|
-
return GGUF_TYPE_SIZE[type];
|
|
6529
|
-
}
|
|
6530
|
-
|
|
6531
|
-
static bool gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
|
|
6532
|
-
if (info->n_dims > GGML_MAX_DIMS) {
|
|
6533
|
-
fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
|
|
6534
|
-
return false;
|
|
6535
|
-
}
|
|
6536
|
-
|
|
6537
|
-
if (info->type < 0 || info->type >= GGML_TYPE_COUNT) {
|
|
6538
|
-
fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
|
|
6539
|
-
return false;
|
|
6540
|
-
}
|
|
6541
|
-
|
|
6542
|
-
if (strlen(info->name.data) >= GGML_MAX_NAME) {
|
|
6543
|
-
fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
|
|
6544
|
-
return false;
|
|
6545
|
-
}
|
|
6546
|
-
|
|
6547
|
-
for (uint32_t i = 0; i < info->n_dims; ++i) {
|
|
6548
|
-
if (info->ne[i] <= 0) {
|
|
6549
|
-
fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
|
|
6550
|
-
return false;
|
|
6551
|
-
}
|
|
6552
|
-
}
|
|
6553
|
-
|
|
6554
|
-
// prevent overflow for total number of elements
|
|
6555
|
-
if (INT64_MAX/info->ne[1] <= info->ne[0]) {
|
|
6556
|
-
fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
|
|
6557
|
-
return false;
|
|
6558
|
-
}
|
|
6559
|
-
|
|
6560
|
-
if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
|
|
6561
|
-
fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
|
|
6562
|
-
return false;
|
|
6563
|
-
}
|
|
6564
|
-
|
|
6565
|
-
if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
|
|
6566
|
-
fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
|
|
6567
|
-
return false;
|
|
6568
|
-
}
|
|
6569
|
-
|
|
6570
|
-
return true;
|
|
6571
|
-
}
|
|
6572
|
-
|
|
6573
|
-
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
|
|
6574
|
-
const size_t n = fread(dst, 1, size, file);
|
|
6575
|
-
*offset += n;
|
|
6576
|
-
return n == size;
|
|
6577
|
-
}
|
|
6578
|
-
|
|
6579
|
-
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|
6580
|
-
p->n = 0;
|
|
6581
|
-
p->data = NULL;
|
|
6582
|
-
|
|
6583
|
-
bool ok = true;
|
|
6584
|
-
|
|
6585
|
-
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
|
|
6586
|
-
|
|
6587
|
-
// early exit if string length is invalid, prevents from integer overflow
|
|
6588
|
-
if (p->n == SIZE_MAX) {
|
|
6589
|
-
fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
|
|
6590
|
-
return false;
|
|
6591
|
-
}
|
|
6592
|
-
|
|
6593
|
-
p->data = calloc(p->n + 1, 1);
|
|
6594
|
-
if (!p->data) {
|
|
6595
|
-
fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n);
|
|
6596
|
-
return false;
|
|
6597
|
-
}
|
|
6598
|
-
|
|
6599
|
-
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
|
6600
|
-
|
|
6601
|
-
return ok;
|
|
6602
|
-
}
|
|
6603
|
-
|
|
6604
|
-
static void gguf_free_kv(struct gguf_kv * kv) {
|
|
6605
|
-
if (kv->key.data) {
|
|
6606
|
-
GGML_FREE(kv->key.data);
|
|
6607
|
-
}
|
|
6608
|
-
|
|
6609
|
-
if (kv->type == GGUF_TYPE_STRING) {
|
|
6610
|
-
if (kv->value.str.data) {
|
|
6611
|
-
GGML_FREE(kv->value.str.data);
|
|
6612
|
-
}
|
|
6613
|
-
}
|
|
6614
|
-
|
|
6615
|
-
if (kv->type == GGUF_TYPE_ARRAY) {
|
|
6616
|
-
if (kv->value.arr.data) {
|
|
6617
|
-
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
|
6618
|
-
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
|
6619
|
-
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
|
6620
|
-
if (str->data) {
|
|
6621
|
-
GGML_FREE(str->data);
|
|
6622
|
-
}
|
|
6623
|
-
}
|
|
6624
|
-
}
|
|
6625
|
-
GGML_FREE(kv->value.arr.data);
|
|
6626
|
-
}
|
|
6627
|
-
}
|
|
6628
|
-
}
|
|
6629
|
-
|
|
6630
|
-
struct gguf_context * gguf_init_empty(void) {
|
|
6631
|
-
struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
|
|
6632
|
-
if (!ctx) {
|
|
6633
|
-
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
|
|
6634
|
-
return NULL;
|
|
6635
|
-
}
|
|
6636
|
-
|
|
6637
|
-
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
|
|
6638
|
-
ctx->header.version = GGUF_VERSION;
|
|
6639
|
-
ctx->header.n_tensors = 0;
|
|
6640
|
-
ctx->header.n_kv = 0;
|
|
6641
|
-
|
|
6642
|
-
ctx->kv = NULL;
|
|
6643
|
-
ctx->infos = NULL;
|
|
6644
|
-
|
|
6645
|
-
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
|
6646
|
-
ctx->offset = 0;
|
|
6647
|
-
ctx->size = 0;
|
|
6648
|
-
|
|
6649
|
-
ctx->data = NULL;
|
|
6650
|
-
|
|
6651
|
-
return ctx;
|
|
6652
|
-
}
|
|
6653
|
-
|
|
6654
|
-
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
|
|
6655
|
-
// offset from start of file
|
|
6656
|
-
size_t offset = 0;
|
|
6657
|
-
|
|
6658
|
-
char magic[4];
|
|
6659
|
-
|
|
6660
|
-
// check the magic before making allocations
|
|
6661
|
-
{
|
|
6662
|
-
gguf_fread_el(file, &magic, sizeof(magic), &offset);
|
|
6663
|
-
|
|
6664
|
-
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
|
6665
|
-
if (magic[i] != GGUF_MAGIC[i]) {
|
|
6666
|
-
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
|
6667
|
-
return NULL;
|
|
6668
|
-
}
|
|
6669
|
-
}
|
|
6670
|
-
}
|
|
6671
|
-
|
|
6672
|
-
bool ok = true;
|
|
6673
|
-
|
|
6674
|
-
struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
|
|
6675
|
-
if (!ctx) {
|
|
6676
|
-
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
|
|
6677
|
-
return NULL;
|
|
6678
|
-
}
|
|
6679
|
-
|
|
6680
|
-
// read the header
|
|
6681
|
-
{
|
|
6682
|
-
strncpy(ctx->header.magic, magic, 4);
|
|
6683
|
-
|
|
6684
|
-
ctx->kv = NULL;
|
|
6685
|
-
ctx->infos = NULL;
|
|
6686
|
-
ctx->data = NULL;
|
|
6687
|
-
|
|
6688
|
-
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
|
6689
|
-
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
|
6690
|
-
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
|
6691
|
-
|
|
6692
|
-
if (ctx->header.version == 1) {
|
|
6693
|
-
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
|
6694
|
-
gguf_free(ctx);
|
|
6695
|
-
return NULL;
|
|
6696
|
-
}
|
|
6697
|
-
|
|
6698
|
-
// sanity-checks to prevent from integer/buffer overflows
|
|
6699
|
-
|
|
6700
|
-
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
|
|
6701
|
-
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
|
|
6702
|
-
ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv));
|
|
6703
|
-
|
|
6704
|
-
if (!ok) {
|
|
6705
|
-
fprintf(stderr, "%s: failed to read header\n", __func__);
|
|
6706
|
-
gguf_free(ctx);
|
|
6707
|
-
return NULL;
|
|
6708
|
-
}
|
|
6709
|
-
}
|
|
6710
|
-
|
|
6711
|
-
// read the kv pairs
|
|
6712
|
-
{
|
|
6713
|
-
const uint64_t n_kv = ctx->header.n_kv;
|
|
6714
|
-
|
|
6715
|
-
if (n_kv > 0) {
|
|
6716
|
-
ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
|
|
6717
|
-
if (!ctx->kv) {
|
|
6718
|
-
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
|
|
6719
|
-
gguf_free(ctx);
|
|
6720
|
-
return NULL;
|
|
6721
|
-
}
|
|
6722
|
-
}
|
|
6723
|
-
|
|
6724
|
-
for (uint64_t i = 0; i < n_kv; ++i) {
|
|
6725
|
-
struct gguf_kv * kv = &ctx->kv[i];
|
|
6726
|
-
|
|
6727
|
-
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
|
6728
|
-
|
|
6729
|
-
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
|
6730
|
-
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
|
6731
|
-
|
|
6732
|
-
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
|
6733
|
-
|
|
6734
|
-
switch (kv->type) {
|
|
6735
|
-
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
|
|
6736
|
-
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
|
|
6737
|
-
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
|
|
6738
|
-
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
|
|
6739
|
-
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
|
6740
|
-
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
|
6741
|
-
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
|
6742
|
-
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
|
6743
|
-
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
|
6744
|
-
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
|
6745
|
-
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
|
6746
|
-
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
|
6747
|
-
case GGUF_TYPE_ARRAY:
|
|
6748
|
-
{
|
|
6749
|
-
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
|
6750
|
-
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
|
6751
|
-
|
|
6752
|
-
switch (kv->value.arr.type) {
|
|
6753
|
-
case GGUF_TYPE_UINT8:
|
|
6754
|
-
case GGUF_TYPE_INT8:
|
|
6755
|
-
case GGUF_TYPE_UINT16:
|
|
6756
|
-
case GGUF_TYPE_INT16:
|
|
6757
|
-
case GGUF_TYPE_UINT32:
|
|
6758
|
-
case GGUF_TYPE_INT32:
|
|
6759
|
-
case GGUF_TYPE_FLOAT32:
|
|
6760
|
-
case GGUF_TYPE_UINT64:
|
|
6761
|
-
case GGUF_TYPE_INT64:
|
|
6762
|
-
case GGUF_TYPE_FLOAT64:
|
|
6763
|
-
case GGUF_TYPE_BOOL:
|
|
6764
|
-
{
|
|
6765
|
-
// prevent from integer overflow in the malloc below
|
|
6766
|
-
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
|
6767
|
-
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
|
6768
|
-
gguf_free(ctx);
|
|
6769
|
-
return NULL;
|
|
6770
|
-
}
|
|
6771
|
-
|
|
6772
|
-
kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
|
6773
|
-
if (!kv->value.arr.data) {
|
|
6774
|
-
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
|
6775
|
-
gguf_free(ctx);
|
|
6776
|
-
return NULL;
|
|
6777
|
-
}
|
|
6778
|
-
|
|
6779
|
-
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
|
6780
|
-
} break;
|
|
6781
|
-
case GGUF_TYPE_STRING:
|
|
6782
|
-
{
|
|
6783
|
-
// prevent from integer overflow in the malloc below
|
|
6784
|
-
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
|
6785
|
-
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
|
6786
|
-
gguf_free(ctx);
|
|
6787
|
-
return NULL;
|
|
6788
|
-
}
|
|
6789
|
-
|
|
6790
|
-
kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
|
|
6791
|
-
if (!kv->value.arr.data) {
|
|
6792
|
-
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
|
6793
|
-
gguf_free(ctx);
|
|
6794
|
-
return NULL;
|
|
6795
|
-
}
|
|
6796
|
-
|
|
6797
|
-
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
|
6798
|
-
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
|
6799
|
-
}
|
|
6800
|
-
} break;
|
|
6801
|
-
case GGUF_TYPE_ARRAY:
|
|
6802
|
-
default:
|
|
6803
|
-
{
|
|
6804
|
-
fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type);
|
|
6805
|
-
ok = false;
|
|
6806
|
-
} break;
|
|
6807
|
-
}
|
|
6808
|
-
} break;
|
|
6809
|
-
default:
|
|
6810
|
-
{
|
|
6811
|
-
fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type);
|
|
6812
|
-
ok = false;
|
|
6813
|
-
} break;
|
|
6814
|
-
}
|
|
6815
|
-
|
|
6816
|
-
if (!ok) {
|
|
6817
|
-
break;
|
|
6818
|
-
}
|
|
6819
|
-
}
|
|
6820
|
-
|
|
6821
|
-
if (!ok) {
|
|
6822
|
-
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
|
6823
|
-
gguf_free(ctx);
|
|
6824
|
-
return NULL;
|
|
6825
|
-
}
|
|
6826
|
-
}
|
|
6827
|
-
|
|
6828
|
-
// read the tensor infos
|
|
6829
|
-
if (ctx->header.n_tensors > 0) {
|
|
6830
|
-
ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
|
|
6831
|
-
if (!ctx->infos) {
|
|
6832
|
-
fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
|
|
6833
|
-
gguf_free(ctx);
|
|
6834
|
-
return NULL;
|
|
6835
|
-
}
|
|
6836
|
-
|
|
6837
|
-
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
6838
|
-
struct gguf_tensor_info * info = &ctx->infos[i];
|
|
6839
|
-
|
|
6840
|
-
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
|
6841
|
-
info->ne[j] = 1;
|
|
6842
|
-
}
|
|
6843
|
-
|
|
6844
|
-
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
|
6845
|
-
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
|
6846
|
-
|
|
6847
|
-
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
|
6848
|
-
|
|
6849
|
-
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
|
6850
|
-
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
|
6851
|
-
}
|
|
6852
|
-
|
|
6853
|
-
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
|
6854
|
-
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
|
6855
|
-
|
|
6856
|
-
ok = ok && gguf_tensor_info_sanitize(info);
|
|
6857
|
-
|
|
6858
|
-
// make sure there is no duplicated tensor names
|
|
6859
|
-
for (uint64_t j = 0; j < i && ok; ++j) {
|
|
6860
|
-
if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
|
|
6861
|
-
fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
|
|
6862
|
-
ok = false;
|
|
6863
|
-
}
|
|
6864
|
-
}
|
|
6865
|
-
|
|
6866
|
-
if (!ok) {
|
|
6867
|
-
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
|
6868
|
-
gguf_free(ctx);
|
|
6869
|
-
return NULL;
|
|
6870
|
-
}
|
|
6871
|
-
}
|
|
6872
|
-
}
|
|
6873
|
-
|
|
6874
|
-
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
|
6875
|
-
|
|
6876
|
-
int alignment_idx = gguf_find_key(ctx, "general.alignment");
|
|
6877
|
-
if (alignment_idx != -1) {
|
|
6878
|
-
ctx->alignment = gguf_get_val_u32(ctx, alignment_idx);
|
|
6879
|
-
}
|
|
6880
|
-
|
|
6881
|
-
// we require the data section to be aligned, so take into account any padding
|
|
6882
|
-
{
|
|
6883
|
-
const size_t offset_pad = offset % ctx->alignment;
|
|
6884
|
-
|
|
6885
|
-
if (offset_pad != 0) {
|
|
6886
|
-
offset += ctx->alignment - offset_pad;
|
|
6887
|
-
fseek(file, offset, SEEK_SET);
|
|
6888
|
-
}
|
|
6889
|
-
}
|
|
6890
|
-
|
|
6891
|
-
// store the current file offset - this is where the data section starts
|
|
6892
|
-
ctx->offset = offset;
|
|
6893
|
-
|
|
6894
|
-
// compute the total size of the data section, taking into account the alignment
|
|
6895
|
-
{
|
|
6896
|
-
ctx->size = 0;
|
|
6897
|
-
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
6898
|
-
struct gguf_tensor_info * info = &ctx->infos[i];
|
|
6899
|
-
|
|
6900
|
-
const int64_t ne =
|
|
6901
|
-
(int64_t) info->ne[0] *
|
|
6902
|
-
(int64_t) info->ne[1] *
|
|
6903
|
-
(int64_t) info->ne[2] *
|
|
6904
|
-
(int64_t) info->ne[3];
|
|
6905
|
-
|
|
6906
|
-
if (ggml_blck_size(info->type) == 0 ) {
|
|
6907
|
-
// this tensor type support have been removed:
|
|
6908
|
-
fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
|
|
6909
|
-
__func__, info->name.data, (int) info->type, ggml_type_name(info->type));
|
|
6910
|
-
gguf_free(ctx);
|
|
6911
|
-
return NULL;
|
|
6912
|
-
}
|
|
6913
|
-
|
|
6914
|
-
if (ne % ggml_blck_size(info->type) != 0) {
|
|
6915
|
-
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
|
6916
|
-
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
|
|
6917
|
-
gguf_free(ctx);
|
|
6918
|
-
return NULL;
|
|
6919
|
-
}
|
|
6920
|
-
|
|
6921
|
-
const size_t size_cur = ggml_row_size(info->type, ne);
|
|
6922
|
-
|
|
6923
|
-
ctx->size += GGML_PAD(size_cur, ctx->alignment);
|
|
6924
|
-
}
|
|
6925
|
-
}
|
|
6926
|
-
|
|
6927
|
-
// load the tensor data only if requested
|
|
6928
|
-
if (params.ctx != NULL) {
|
|
6929
|
-
// if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
|
|
6930
|
-
// otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
|
|
6931
|
-
// the ggml_tensor structs to the appropriate locations in the binary blob
|
|
6932
|
-
|
|
6933
|
-
// compute the exact size needed for the new ggml_context
|
|
6934
|
-
const size_t mem_size =
|
|
6935
|
-
params.no_alloc ?
|
|
6936
|
-
(ctx->header.n_tensors )*ggml_tensor_overhead() :
|
|
6937
|
-
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
|
|
6938
|
-
|
|
6939
|
-
struct ggml_init_params pdata = {
|
|
6940
|
-
.mem_size = mem_size,
|
|
6941
|
-
.mem_buffer = NULL,
|
|
6942
|
-
.no_alloc = params.no_alloc,
|
|
6943
|
-
};
|
|
6944
|
-
|
|
6945
|
-
*params.ctx = ggml_init(pdata);
|
|
6946
|
-
if (*params.ctx == NULL) {
|
|
6947
|
-
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
|
6948
|
-
gguf_free(ctx);
|
|
6949
|
-
return NULL;
|
|
6950
|
-
}
|
|
6951
|
-
|
|
6952
|
-
struct ggml_context * ctx_data = *params.ctx;
|
|
6953
|
-
|
|
6954
|
-
struct ggml_tensor * data = NULL;
|
|
6955
|
-
|
|
6956
|
-
if (!params.no_alloc) {
|
|
6957
|
-
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
|
|
6958
|
-
|
|
6959
|
-
ok = ok && data != NULL;
|
|
6960
|
-
|
|
6961
|
-
// read the binary blob with the tensor data
|
|
6962
|
-
ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
|
|
6963
|
-
|
|
6964
|
-
if (!ok) {
|
|
6965
|
-
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
|
6966
|
-
ggml_free(ctx_data);
|
|
6967
|
-
gguf_free(ctx);
|
|
6968
|
-
return NULL;
|
|
6969
|
-
}
|
|
6970
|
-
|
|
6971
|
-
ctx->data = data->data;
|
|
6972
|
-
}
|
|
6973
|
-
|
|
6974
|
-
ggml_set_no_alloc(ctx_data, true);
|
|
6975
|
-
|
|
6976
|
-
// create the tensors
|
|
6977
|
-
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
6978
|
-
const int64_t ne[GGML_MAX_DIMS] = {
|
|
6979
|
-
ctx->infos[i].ne[0],
|
|
6980
|
-
ctx->infos[i].ne[1],
|
|
6981
|
-
ctx->infos[i].ne[2],
|
|
6982
|
-
ctx->infos[i].ne[3],
|
|
6983
|
-
};
|
|
6984
|
-
|
|
6985
|
-
struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
|
|
6986
|
-
|
|
6987
|
-
ok = ok && cur != NULL;
|
|
6988
|
-
|
|
6989
|
-
if (!ok) {
|
|
6990
|
-
break;
|
|
6991
|
-
}
|
|
6992
|
-
|
|
6993
|
-
ggml_set_name(cur, ctx->infos[i].name.data);
|
|
6994
|
-
|
|
6995
|
-
// point the data member to the appropriate location in the binary blob using the tensor infos
|
|
6996
|
-
if (!params.no_alloc) {
|
|
6997
|
-
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
|
|
6998
|
-
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
|
|
6999
|
-
}
|
|
7000
|
-
}
|
|
7001
|
-
|
|
7002
|
-
if (!ok) {
|
|
7003
|
-
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
|
7004
|
-
ggml_free(ctx_data);
|
|
7005
|
-
gguf_free(ctx);
|
|
7006
|
-
return NULL;
|
|
7007
|
-
}
|
|
7008
|
-
|
|
7009
|
-
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
|
7010
|
-
}
|
|
7011
|
-
|
|
7012
|
-
return ctx;
|
|
7013
|
-
}
|
|
7014
|
-
|
|
7015
|
-
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
|
7016
|
-
FILE * file = ggml_fopen(fname, "rb");
|
|
7017
|
-
if (!file) {
|
|
7018
|
-
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
|
7019
|
-
return NULL;
|
|
7020
|
-
}
|
|
7021
|
-
|
|
7022
|
-
struct gguf_context * result = gguf_init_from_file_impl(file, params);
|
|
7023
|
-
fclose(file);
|
|
7024
|
-
return result;
|
|
7025
|
-
}
|
|
7026
|
-
|
|
7027
|
-
void gguf_free(struct gguf_context * ctx) {
|
|
7028
|
-
if (ctx == NULL) {
|
|
7029
|
-
return;
|
|
7030
|
-
}
|
|
7031
|
-
|
|
7032
|
-
if (ctx->kv) {
|
|
7033
|
-
// free string memory - not great..
|
|
7034
|
-
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
|
7035
|
-
gguf_free_kv(&ctx->kv[i]);
|
|
7036
|
-
}
|
|
7037
|
-
|
|
7038
|
-
GGML_FREE(ctx->kv);
|
|
7039
|
-
}
|
|
7040
|
-
|
|
7041
|
-
if (ctx->infos) {
|
|
7042
|
-
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
7043
|
-
struct gguf_tensor_info * info = &ctx->infos[i];
|
|
7044
|
-
|
|
7045
|
-
if (info->name.data) {
|
|
7046
|
-
GGML_FREE(info->name.data);
|
|
7047
|
-
}
|
|
7048
|
-
}
|
|
7049
|
-
|
|
7050
|
-
GGML_FREE(ctx->infos);
|
|
7051
|
-
}
|
|
7052
|
-
|
|
7053
|
-
GGML_FREE(ctx);
|
|
7054
|
-
}
|
|
7055
|
-
|
|
7056
|
-
const char * gguf_type_name(enum gguf_type type) {
|
|
7057
|
-
return GGUF_TYPE_NAME[type];
|
|
7058
|
-
}
|
|
7059
|
-
|
|
7060
|
-
int gguf_get_version(const struct gguf_context * ctx) {
|
|
7061
|
-
return ctx->header.version;
|
|
7062
|
-
}
|
|
7063
|
-
|
|
7064
|
-
size_t gguf_get_alignment(const struct gguf_context * ctx) {
|
|
7065
|
-
return ctx->alignment;
|
|
7066
|
-
}
|
|
7067
|
-
|
|
7068
|
-
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
|
|
7069
|
-
return ctx->offset;
|
|
7070
|
-
}
|
|
7071
|
-
|
|
7072
|
-
void * gguf_get_data(const struct gguf_context * ctx) {
|
|
7073
|
-
return ctx->data;
|
|
7074
|
-
}
|
|
7075
|
-
|
|
7076
|
-
int gguf_get_n_kv(const struct gguf_context * ctx) {
|
|
7077
|
-
return ctx->header.n_kv;
|
|
7078
|
-
}
|
|
7079
|
-
|
|
7080
|
-
int gguf_find_key(const struct gguf_context * ctx, const char * key) {
|
|
7081
|
-
// return -1 if key not found
|
|
7082
|
-
int keyfound = -1;
|
|
7083
|
-
|
|
7084
|
-
const int n_kv = gguf_get_n_kv(ctx);
|
|
7085
|
-
|
|
7086
|
-
for (int i = 0; i < n_kv; ++i) {
|
|
7087
|
-
if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
|
|
7088
|
-
keyfound = i;
|
|
7089
|
-
break;
|
|
7090
|
-
}
|
|
7091
|
-
}
|
|
7092
|
-
|
|
7093
|
-
return keyfound;
|
|
7094
|
-
}
|
|
7095
|
-
|
|
7096
|
-
const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
|
|
7097
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7098
|
-
return ctx->kv[key_id].key.data;
|
|
7099
|
-
}
|
|
7100
|
-
|
|
7101
|
-
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
|
|
7102
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7103
|
-
return ctx->kv[key_id].type;
|
|
7104
|
-
}
|
|
7105
|
-
|
|
7106
|
-
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
|
|
7107
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7108
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
|
7109
|
-
return ctx->kv[key_id].value.arr.type;
|
|
7110
|
-
}
|
|
7111
|
-
|
|
7112
|
-
const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
|
|
7113
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7114
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
|
7115
|
-
return ctx->kv[key_id].value.arr.data;
|
|
7116
|
-
}
|
|
7117
|
-
|
|
7118
|
-
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
|
|
7119
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7120
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
|
7121
|
-
struct gguf_kv * kv = &ctx->kv[key_id];
|
|
7122
|
-
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
|
|
7123
|
-
return str->data;
|
|
7124
|
-
}
|
|
7125
|
-
|
|
7126
|
-
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
|
|
7127
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7128
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
|
7129
|
-
return ctx->kv[key_id].value.arr.n;
|
|
7130
|
-
}
|
|
7131
|
-
|
|
7132
|
-
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
|
|
7133
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7134
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
|
|
7135
|
-
return ctx->kv[key_id].value.uint8;
|
|
7136
|
-
}
|
|
7137
|
-
|
|
7138
|
-
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
|
|
7139
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7140
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
|
|
7141
|
-
return ctx->kv[key_id].value.int8;
|
|
7142
|
-
}
|
|
7143
|
-
|
|
7144
|
-
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
|
|
7145
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7146
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
|
|
7147
|
-
return ctx->kv[key_id].value.uint16;
|
|
7148
|
-
}
|
|
7149
|
-
|
|
7150
|
-
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
|
|
7151
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7152
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
|
|
7153
|
-
return ctx->kv[key_id].value.int16;
|
|
7154
|
-
}
|
|
7155
|
-
|
|
7156
|
-
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
|
|
7157
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7158
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
|
|
7159
|
-
return ctx->kv[key_id].value.uint32;
|
|
7160
|
-
}
|
|
7161
|
-
|
|
7162
|
-
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
|
|
7163
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7164
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
|
|
7165
|
-
return ctx->kv[key_id].value.int32;
|
|
7166
|
-
}
|
|
7167
|
-
|
|
7168
|
-
float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
|
|
7169
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7170
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
|
|
7171
|
-
return ctx->kv[key_id].value.float32;
|
|
7172
|
-
}
|
|
7173
|
-
|
|
7174
|
-
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
|
|
7175
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7176
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
|
|
7177
|
-
return ctx->kv[key_id].value.uint64;
|
|
7178
|
-
}
|
|
7179
|
-
|
|
7180
|
-
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
|
|
7181
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7182
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
|
|
7183
|
-
return ctx->kv[key_id].value.int64;
|
|
7184
|
-
}
|
|
7185
|
-
|
|
7186
|
-
double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
|
|
7187
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7188
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
|
|
7189
|
-
return ctx->kv[key_id].value.float64;
|
|
7190
|
-
}
|
|
7191
|
-
|
|
7192
|
-
bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
|
|
7193
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7194
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
|
|
7195
|
-
return ctx->kv[key_id].value.bool_;
|
|
7196
|
-
}
|
|
7197
|
-
|
|
7198
|
-
const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
|
|
7199
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7200
|
-
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
|
|
7201
|
-
return ctx->kv[key_id].value.str.data;
|
|
7202
|
-
}
|
|
7203
|
-
|
|
7204
|
-
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
|
|
7205
|
-
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
|
7206
|
-
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
|
|
7207
|
-
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
|
|
7208
|
-
return &ctx->kv[key_id].value;
|
|
7209
|
-
}
|
|
7210
|
-
|
|
7211
|
-
int gguf_get_n_tensors(const struct gguf_context * ctx) {
|
|
7212
|
-
return ctx->header.n_tensors;
|
|
7213
|
-
}
|
|
7214
|
-
|
|
7215
|
-
int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
|
|
7216
|
-
// return -1 if tensor not found
|
|
7217
|
-
int tensorfound = -1;
|
|
7218
|
-
|
|
7219
|
-
const int n_tensors = gguf_get_n_tensors(ctx);
|
|
7220
|
-
|
|
7221
|
-
for (int i = 0; i < n_tensors; ++i) {
|
|
7222
|
-
if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
|
|
7223
|
-
tensorfound = i;
|
|
7224
|
-
break;
|
|
7225
|
-
}
|
|
7226
|
-
}
|
|
7227
|
-
|
|
7228
|
-
return tensorfound;
|
|
7229
|
-
}
|
|
7230
|
-
|
|
7231
|
-
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
|
|
7232
|
-
return ctx->infos[i].offset;
|
|
7233
|
-
}
|
|
7234
|
-
|
|
7235
|
-
char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
|
|
7236
|
-
return ctx->infos[i].name.data;
|
|
7237
|
-
}
|
|
7238
|
-
|
|
7239
|
-
enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) {
|
|
7240
|
-
return ctx->infos[i].type;
|
|
7241
|
-
}
|
|
7242
|
-
|
|
7243
|
-
// returns the index
|
|
7244
|
-
static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|
7245
|
-
const int idx = gguf_find_key(ctx, key);
|
|
7246
|
-
if (idx >= 0) {
|
|
7247
|
-
return idx;
|
|
7248
|
-
}
|
|
7249
|
-
|
|
7250
|
-
const int n_kv = gguf_get_n_kv(ctx);
|
|
7251
|
-
|
|
7252
|
-
ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
|
|
7253
|
-
ctx->kv[n_kv].key.n = strlen(key);
|
|
7254
|
-
ctx->kv[n_kv].key.data = strdup(key);
|
|
7255
|
-
ctx->header.n_kv++;
|
|
7256
|
-
|
|
7257
|
-
return n_kv;
|
|
7258
|
-
}
|
|
7259
|
-
|
|
7260
|
-
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
|
|
7261
|
-
const int idx = gguf_find_key(ctx, key);
|
|
7262
|
-
if (idx >= 0) {
|
|
7263
|
-
const int n_kv = gguf_get_n_kv(ctx);
|
|
7264
|
-
gguf_free_kv(&ctx->kv[idx]);
|
|
7265
|
-
for (int i = idx; i < n_kv-1; ++i) {
|
|
7266
|
-
ctx->kv[i] = ctx->kv[i+1];
|
|
7267
|
-
}
|
|
7268
|
-
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
|
|
7269
|
-
ctx->header.n_kv--;
|
|
7270
|
-
}
|
|
7271
|
-
}
|
|
7272
|
-
|
|
7273
|
-
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
|
7274
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7275
|
-
|
|
7276
|
-
ctx->kv[idx].type = GGUF_TYPE_UINT8;
|
|
7277
|
-
ctx->kv[idx].value.uint8 = val;
|
|
7278
|
-
}
|
|
7279
|
-
|
|
7280
|
-
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
|
|
7281
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7282
|
-
|
|
7283
|
-
ctx->kv[idx].type = GGUF_TYPE_INT8;
|
|
7284
|
-
ctx->kv[idx].value.int8 = val;
|
|
7285
|
-
}
|
|
7286
|
-
|
|
7287
|
-
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
|
|
7288
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7289
|
-
|
|
7290
|
-
ctx->kv[idx].type = GGUF_TYPE_UINT16;
|
|
7291
|
-
ctx->kv[idx].value.uint16 = val;
|
|
7292
|
-
}
|
|
7293
|
-
|
|
7294
|
-
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
|
|
7295
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7296
|
-
|
|
7297
|
-
ctx->kv[idx].type = GGUF_TYPE_INT16;
|
|
7298
|
-
ctx->kv[idx].value.int16 = val;
|
|
7299
|
-
}
|
|
7300
|
-
|
|
7301
|
-
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
|
|
7302
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7303
|
-
|
|
7304
|
-
ctx->kv[idx].type = GGUF_TYPE_UINT32;
|
|
7305
|
-
ctx->kv[idx].value.uint32 = val;
|
|
7306
|
-
}
|
|
7307
|
-
|
|
7308
|
-
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
|
|
7309
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7310
|
-
|
|
7311
|
-
ctx->kv[idx].type = GGUF_TYPE_INT32;
|
|
7312
|
-
ctx->kv[idx].value.int32 = val;
|
|
7313
|
-
}
|
|
7314
|
-
|
|
7315
|
-
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
|
|
7316
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7317
|
-
|
|
7318
|
-
ctx->kv[idx].type = GGUF_TYPE_FLOAT32;
|
|
7319
|
-
ctx->kv[idx].value.float32 = val;
|
|
7320
|
-
}
|
|
7321
|
-
|
|
7322
|
-
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
|
|
7323
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7324
|
-
|
|
7325
|
-
ctx->kv[idx].type = GGUF_TYPE_UINT64;
|
|
7326
|
-
ctx->kv[idx].value.uint64 = val;
|
|
7327
|
-
}
|
|
7328
|
-
|
|
7329
|
-
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
|
|
7330
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7331
|
-
|
|
7332
|
-
ctx->kv[idx].type = GGUF_TYPE_INT64;
|
|
7333
|
-
ctx->kv[idx].value.int64 = val;
|
|
7334
|
-
}
|
|
7335
|
-
|
|
7336
|
-
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
|
|
7337
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7338
|
-
|
|
7339
|
-
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
|
|
7340
|
-
ctx->kv[idx].value.float64 = val;
|
|
7341
|
-
}
|
|
7342
|
-
|
|
7343
|
-
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
|
|
7344
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7345
|
-
|
|
7346
|
-
ctx->kv[idx].type = GGUF_TYPE_BOOL;
|
|
7347
|
-
ctx->kv[idx].value.bool_ = val;
|
|
7348
|
-
}
|
|
7349
|
-
|
|
7350
|
-
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
|
|
7351
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7352
|
-
|
|
7353
|
-
ctx->kv[idx].type = GGUF_TYPE_STRING;
|
|
7354
|
-
ctx->kv[idx].value.str.n = strlen(val);
|
|
7355
|
-
ctx->kv[idx].value.str.data = strdup(val);
|
|
7356
|
-
}
|
|
7357
|
-
|
|
7358
|
-
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
|
|
7359
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7360
|
-
|
|
7361
|
-
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
|
7362
|
-
ctx->kv[idx].value.arr.type = type;
|
|
7363
|
-
ctx->kv[idx].value.arr.n = n;
|
|
7364
|
-
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type));
|
|
7365
|
-
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
|
7366
|
-
}
|
|
7367
|
-
|
|
7368
|
-
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
|
|
7369
|
-
const int idx = gguf_get_or_add_key(ctx, key);
|
|
7370
|
-
|
|
7371
|
-
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
|
7372
|
-
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
|
7373
|
-
ctx->kv[idx].value.arr.n = n;
|
|
7374
|
-
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str));
|
|
7375
|
-
for (int i = 0; i < n; i++) {
|
|
7376
|
-
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
|
7377
|
-
str->n = strlen(data[i]);
|
|
7378
|
-
str->data = strdup(data[i]);
|
|
7379
|
-
}
|
|
7380
|
-
}
|
|
7381
|
-
|
|
7382
|
-
// set or add KV pairs from another context
|
|
7383
|
-
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
7384
|
-
for (uint32_t i = 0; i < src->header.n_kv; i++) {
|
|
7385
|
-
switch (src->kv[i].type) {
|
|
7386
|
-
case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
|
|
7387
|
-
case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
|
|
7388
|
-
case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
|
|
7389
|
-
case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
|
|
7390
|
-
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
|
|
7391
|
-
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
|
|
7392
|
-
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
|
|
7393
|
-
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
|
|
7394
|
-
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
|
|
7395
|
-
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
|
|
7396
|
-
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
|
|
7397
|
-
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
|
|
7398
|
-
case GGUF_TYPE_ARRAY:
|
|
7399
|
-
{
|
|
7400
|
-
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
|
7401
|
-
const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
|
|
7402
|
-
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
|
7403
|
-
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
|
7404
|
-
}
|
|
7405
|
-
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
|
7406
|
-
GGML_FREE((void *)data);
|
|
7407
|
-
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
|
7408
|
-
GGML_ABORT("nested arrays not supported");
|
|
7409
|
-
} else {
|
|
7410
|
-
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
|
|
7411
|
-
}
|
|
7412
|
-
} break;
|
|
7413
|
-
default: GGML_ABORT("invalid type");
|
|
7414
|
-
}
|
|
7415
|
-
}
|
|
7416
|
-
}
|
|
7417
|
-
|
|
7418
|
-
void gguf_add_tensor(
|
|
7419
|
-
struct gguf_context * ctx,
|
|
7420
|
-
const struct ggml_tensor * tensor) {
|
|
7421
|
-
GGML_ASSERT(tensor);
|
|
7422
|
-
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
|
7423
|
-
GGML_ABORT("duplicated tensor name");
|
|
7424
|
-
}
|
|
7425
|
-
|
|
7426
|
-
const int idx = ctx->header.n_tensors;
|
|
7427
|
-
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
|
7428
|
-
|
|
7429
|
-
ctx->infos[idx].name.n = strlen(tensor->name);
|
|
7430
|
-
ctx->infos[idx].name.data = strdup(tensor->name);
|
|
7431
|
-
|
|
7432
|
-
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
|
7433
|
-
ctx->infos[idx].ne[i] = 1;
|
|
7434
|
-
}
|
|
7435
|
-
|
|
7436
|
-
ctx->infos[idx].n_dims = ggml_n_dims(tensor);
|
|
7437
|
-
for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
|
|
7438
|
-
ctx->infos[idx].ne[i] = tensor->ne[i];
|
|
7439
|
-
}
|
|
7440
|
-
|
|
7441
|
-
ctx->infos[idx].type = tensor->type;
|
|
7442
|
-
ctx->infos[idx].offset = 0;
|
|
7443
|
-
ctx->infos[idx].data = tensor->data;
|
|
7444
|
-
ctx->infos[idx].size = ggml_nbytes(tensor);
|
|
7445
|
-
|
|
7446
|
-
if (ctx->header.n_tensors > 0) {
|
|
7447
|
-
ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
|
|
7448
|
-
}
|
|
7449
|
-
|
|
7450
|
-
ctx->header.n_tensors++;
|
|
7451
|
-
}
|
|
7452
|
-
|
|
7453
|
-
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
|
|
7454
|
-
const int idx = gguf_find_tensor(ctx, name);
|
|
7455
|
-
if (idx < 0) {
|
|
7456
|
-
GGML_ABORT("tensor not found");
|
|
7457
|
-
}
|
|
7458
|
-
|
|
7459
|
-
ctx->infos[idx].type = type;
|
|
7460
|
-
}
|
|
7461
|
-
|
|
7462
|
-
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
|
|
7463
|
-
const int idx = gguf_find_tensor(ctx, name);
|
|
7464
|
-
if (idx < 0) {
|
|
7465
|
-
GGML_ABORT("tensor not found");
|
|
7466
|
-
}
|
|
7467
|
-
|
|
7468
|
-
ctx->infos[idx].data = data;
|
|
7469
|
-
ctx->infos[idx].size = size;
|
|
7470
|
-
|
|
7471
|
-
// update offsets
|
|
7472
|
-
for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
|
|
7473
|
-
ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
|
|
7474
|
-
}
|
|
7475
|
-
}
|
|
7476
|
-
|
|
7477
|
-
//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
|
|
7478
|
-
// fwrite(&val->n, sizeof(val->n), 1, file);
|
|
7479
|
-
// fwrite(val->data, sizeof(char), val->n, file);
|
|
7480
|
-
//}
|
|
7481
|
-
//
|
|
7482
|
-
//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
|
|
7483
|
-
// fwrite(val, sizeof(char), size, file);
|
|
7484
|
-
//}
|
|
7485
|
-
|
|
7486
|
-
struct gguf_buf gguf_buf_init(size_t size) {
|
|
7487
|
-
struct gguf_buf buf = {
|
|
7488
|
-
/*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
|
|
7489
|
-
/*buf.size =*/ size,
|
|
7490
|
-
/*buf.offset =*/ 0,
|
|
7491
|
-
};
|
|
7492
|
-
|
|
7493
|
-
return buf;
|
|
7494
|
-
}
|
|
7495
|
-
|
|
7496
|
-
void gguf_buf_free(struct gguf_buf buf) {
|
|
7497
|
-
if (buf.data) {
|
|
7498
|
-
GGML_FREE(buf.data);
|
|
7499
|
-
}
|
|
7500
|
-
}
|
|
7501
|
-
|
|
7502
|
-
static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
|
|
7503
|
-
if (buf->offset + size > buf->size) {
|
|
7504
|
-
buf->size = 1.5*(buf->offset + size);
|
|
7505
|
-
if (buf->data) {
|
|
7506
|
-
buf->data = realloc(buf->data, buf->size);
|
|
7507
|
-
}
|
|
7508
|
-
}
|
|
7509
|
-
}
|
|
7510
|
-
|
|
7511
|
-
static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
|
|
7512
|
-
gguf_buf_grow(buf, sizeof(val->n) + val->n);
|
|
7513
|
-
|
|
7514
|
-
if (buf->data) {
|
|
7515
|
-
memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
|
|
7516
|
-
}
|
|
7517
|
-
buf->offset += sizeof(val->n);
|
|
7518
|
-
|
|
7519
|
-
if (buf->data) {
|
|
7520
|
-
memcpy((char *) buf->data + buf->offset, val->data, val->n);
|
|
7521
|
-
}
|
|
7522
|
-
buf->offset += val->n;
|
|
7523
|
-
}
|
|
7524
|
-
|
|
7525
|
-
static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
|
|
7526
|
-
gguf_buf_grow(buf, el_size);
|
|
7527
|
-
|
|
7528
|
-
if (buf->data) {
|
|
7529
|
-
memcpy((char *) buf->data + buf->offset, val, el_size);
|
|
7530
|
-
}
|
|
7531
|
-
buf->offset += el_size;
|
|
7532
|
-
}
|
|
7533
|
-
|
|
7534
|
-
void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
|
|
7535
|
-
// write header
|
|
7536
|
-
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
|
7537
|
-
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|
|
7538
|
-
gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
|
|
7539
|
-
gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
|
|
7540
|
-
|
|
7541
|
-
// write key-value pairs
|
|
7542
|
-
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
|
7543
|
-
struct gguf_kv * kv = &ctx->kv[i];
|
|
7544
|
-
|
|
7545
|
-
gguf_bwrite_str(buf, &kv->key);
|
|
7546
|
-
gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
|
|
7547
|
-
|
|
7548
|
-
switch (kv->type) {
|
|
7549
|
-
case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
|
|
7550
|
-
case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
|
|
7551
|
-
case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
|
|
7552
|
-
case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
|
|
7553
|
-
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
|
7554
|
-
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
|
7555
|
-
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
|
7556
|
-
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
|
|
7557
|
-
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
|
|
7558
|
-
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
|
|
7559
|
-
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
|
7560
|
-
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
|
7561
|
-
case GGUF_TYPE_ARRAY:
|
|
7562
|
-
{
|
|
7563
|
-
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
|
|
7564
|
-
gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
|
|
7565
|
-
|
|
7566
|
-
switch (kv->value.arr.type) {
|
|
7567
|
-
case GGUF_TYPE_UINT8:
|
|
7568
|
-
case GGUF_TYPE_INT8:
|
|
7569
|
-
case GGUF_TYPE_UINT16:
|
|
7570
|
-
case GGUF_TYPE_INT16:
|
|
7571
|
-
case GGUF_TYPE_UINT32:
|
|
7572
|
-
case GGUF_TYPE_INT32:
|
|
7573
|
-
case GGUF_TYPE_FLOAT32:
|
|
7574
|
-
case GGUF_TYPE_UINT64:
|
|
7575
|
-
case GGUF_TYPE_INT64:
|
|
7576
|
-
case GGUF_TYPE_FLOAT64:
|
|
7577
|
-
case GGUF_TYPE_BOOL:
|
|
7578
|
-
{
|
|
7579
|
-
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
|
7580
|
-
} break;
|
|
7581
|
-
case GGUF_TYPE_STRING:
|
|
7582
|
-
{
|
|
7583
|
-
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
|
7584
|
-
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
|
|
7585
|
-
}
|
|
7586
|
-
} break;
|
|
7587
|
-
case GGUF_TYPE_ARRAY:
|
|
7588
|
-
default: GGML_ABORT("invalid type");
|
|
7589
|
-
}
|
|
7590
|
-
} break;
|
|
7591
|
-
default: GGML_ABORT("invalid type");
|
|
7592
|
-
}
|
|
7593
|
-
}
|
|
7594
|
-
|
|
7595
|
-
// write tensor infos
|
|
7596
|
-
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
7597
|
-
struct gguf_tensor_info * info = &ctx->infos[i];
|
|
7598
|
-
|
|
7599
|
-
gguf_bwrite_str(buf, &info->name);
|
|
7600
|
-
gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
|
|
7601
|
-
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
|
7602
|
-
gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
|
|
7603
|
-
}
|
|
7604
|
-
gguf_bwrite_el(buf, &info->type, sizeof(info->type));
|
|
7605
|
-
gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
|
|
7606
|
-
}
|
|
7607
|
-
|
|
7608
|
-
// we require the data section to be aligned, so take into account any padding
|
|
7609
|
-
{
|
|
7610
|
-
const size_t offset = buf->offset;
|
|
7611
|
-
const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
|
|
7612
|
-
|
|
7613
|
-
if (offset_pad != offset) {
|
|
7614
|
-
uint8_t pad = 0;
|
|
7615
|
-
for (size_t i = 0; i < offset_pad - offset; ++i) {
|
|
7616
|
-
gguf_bwrite_el(buf, &pad, sizeof(pad));
|
|
7617
|
-
}
|
|
7618
|
-
}
|
|
7619
|
-
}
|
|
7620
|
-
|
|
7621
|
-
if (only_meta) {
|
|
7622
|
-
return;
|
|
7623
|
-
}
|
|
7624
|
-
|
|
7625
|
-
size_t offset = 0;
|
|
7626
|
-
|
|
7627
|
-
// write tensor data
|
|
7628
|
-
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
|
7629
|
-
struct gguf_tensor_info * info = &ctx->infos[i];
|
|
7630
|
-
|
|
7631
|
-
const size_t size = info->size;
|
|
7632
|
-
const size_t size_pad = GGML_PAD(size, ctx->alignment);
|
|
7633
|
-
|
|
7634
|
-
gguf_bwrite_el(buf, info->data, size);
|
|
7635
|
-
|
|
7636
|
-
if (size_pad != size) {
|
|
7637
|
-
uint8_t pad = 0;
|
|
7638
|
-
for (size_t j = 0; j < size_pad - size; ++j) {
|
|
7639
|
-
gguf_bwrite_el(buf, &pad, sizeof(pad));
|
|
7640
|
-
}
|
|
7641
|
-
}
|
|
7642
|
-
|
|
7643
|
-
GGML_ASSERT(offset == info->offset);
|
|
7644
|
-
|
|
7645
|
-
offset += size_pad;
|
|
7646
|
-
}
|
|
7647
|
-
}
|
|
7648
|
-
|
|
7649
|
-
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
|
|
7650
|
-
FILE * file = ggml_fopen(fname, "wb");
|
|
7651
|
-
if (!file) {
|
|
7652
|
-
GGML_ABORT("failed to open file for writing");
|
|
7653
|
-
}
|
|
7654
|
-
|
|
7655
|
-
struct gguf_buf buf = gguf_buf_init(16*1024);
|
|
7656
|
-
|
|
7657
|
-
gguf_write_to_buf(ctx, &buf, only_meta);
|
|
7658
|
-
|
|
7659
|
-
fwrite(buf.data, 1, buf.offset, file);
|
|
7660
|
-
|
|
7661
|
-
gguf_buf_free(buf);
|
|
7662
|
-
|
|
7663
|
-
fclose(file);
|
|
7664
|
-
}
|
|
7665
|
-
|
|
7666
|
-
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
|
|
7667
|
-
// no allocs - only compute size
|
|
7668
|
-
struct gguf_buf buf = gguf_buf_init(0);
|
|
7669
|
-
|
|
7670
|
-
gguf_write_to_buf(ctx, &buf, true);
|
|
7671
|
-
|
|
7672
|
-
return buf.offset;
|
|
7673
|
-
}
|
|
7674
|
-
|
|
7675
|
-
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
|
|
7676
|
-
struct gguf_buf buf = gguf_buf_init(16*1024);
|
|
7677
|
-
|
|
7678
|
-
gguf_write_to_buf(ctx, &buf, true);
|
|
7679
|
-
|
|
7680
|
-
memcpy(data, buf.data, buf.offset);
|
|
7681
|
-
|
|
7682
|
-
gguf_buf_free(buf);
|
|
7683
|
-
}
|
|
7684
|
-
|
|
7685
6475
|
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
7686
6476
|
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
|
7687
6477
|
g_logger_state.log_callback_user_data = user_data;
|