@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -34,7 +34,7 @@ int main(int argc, char ** argv) {
|
|
|
34
34
|
llama_numa_init(params.numa);
|
|
35
35
|
|
|
36
36
|
llama_model * model_tgt = NULL;
|
|
37
|
-
llama_model * model_dft = NULL;
|
|
37
|
+
//llama_model * model_dft = NULL;
|
|
38
38
|
|
|
39
39
|
llama_context * ctx_tgt = NULL;
|
|
40
40
|
llama_context * ctx_dft = NULL;
|
|
@@ -42,8 +42,10 @@ int main(int argc, char ** argv) {
|
|
|
42
42
|
// load the target model
|
|
43
43
|
common_init_result llama_init_tgt = common_init_from_params(params);
|
|
44
44
|
|
|
45
|
-
model_tgt = llama_init_tgt.model;
|
|
46
|
-
ctx_tgt = llama_init_tgt.context;
|
|
45
|
+
model_tgt = llama_init_tgt.model.get();
|
|
46
|
+
ctx_tgt = llama_init_tgt.context.get();
|
|
47
|
+
|
|
48
|
+
const llama_vocab * vocab = llama_model_get_vocab(model_tgt);
|
|
47
49
|
|
|
48
50
|
// load the draft model
|
|
49
51
|
params.devices = params.speculative.devices;
|
|
@@ -59,8 +61,8 @@ int main(int argc, char ** argv) {
|
|
|
59
61
|
params.cpuparams_batch.n_threads = params.speculative.cpuparams_batch.n_threads;
|
|
60
62
|
common_init_result llama_init_dft = common_init_from_params(params);
|
|
61
63
|
|
|
62
|
-
model_dft = llama_init_dft.model;
|
|
63
|
-
ctx_dft = llama_init_dft.context;
|
|
64
|
+
//model_dft = llama_init_dft.model.get();
|
|
65
|
+
ctx_dft = llama_init_dft.context.get();
|
|
64
66
|
|
|
65
67
|
if (!common_speculative_are_compatible(ctx_tgt, ctx_dft)) {
|
|
66
68
|
return 1;
|
|
@@ -196,7 +198,7 @@ int main(int argc, char ** argv) {
|
|
|
196
198
|
|
|
197
199
|
id_last = ids[i];
|
|
198
200
|
|
|
199
|
-
if (
|
|
201
|
+
if (llama_vocab_is_eog(vocab, id_last)) {
|
|
200
202
|
has_eos = true;
|
|
201
203
|
break;
|
|
202
204
|
}
|
|
@@ -251,12 +253,6 @@ int main(int argc, char ** argv) {
|
|
|
251
253
|
common_sampler_free(smpl);
|
|
252
254
|
common_speculative_free(spec);
|
|
253
255
|
|
|
254
|
-
llama_free(ctx_tgt);
|
|
255
|
-
llama_free_model(model_tgt);
|
|
256
|
-
|
|
257
|
-
llama_free(ctx_dft);
|
|
258
|
-
llama_free_model(model_dft);
|
|
259
|
-
|
|
260
256
|
llama_backend_free();
|
|
261
257
|
|
|
262
258
|
LOG("\n\n");
|
|
@@ -31,6 +31,7 @@ static void print_usage_information(const char * argv0) {
|
|
|
31
31
|
printf(" -p PROMPT, --prompt PROMPT read prompt from the argument.\n");
|
|
32
32
|
printf(" --stdin read prompt from standard input.\n");
|
|
33
33
|
printf(" --no-bos do not ever add a BOS token to the prompt, even if normally the model uses a BOS token.\n");
|
|
34
|
+
printf(" --no-escape do not escape input (such as \\n, \\t, etc.).\n");
|
|
34
35
|
printf(" --no-parse-special do not parse control tokens.\n");
|
|
35
36
|
printf(" --log-disable disable logs. Makes stderr quiet when loading the model.\n");
|
|
36
37
|
printf(" --show-count print the total number of tokens.\n");
|
|
@@ -198,6 +199,7 @@ int main(int raw_argc, char ** raw_argv) {
|
|
|
198
199
|
// variables where to put any arguments we see.
|
|
199
200
|
bool printing_ids = false;
|
|
200
201
|
bool no_bos = false;
|
|
202
|
+
bool no_escape = false;
|
|
201
203
|
bool no_parse_special = false;
|
|
202
204
|
bool disable_logging = false;
|
|
203
205
|
bool show_token_count = false;
|
|
@@ -233,6 +235,9 @@ int main(int raw_argc, char ** raw_argv) {
|
|
|
233
235
|
else if (arg == "--no-bos") {
|
|
234
236
|
no_bos = true;
|
|
235
237
|
}
|
|
238
|
+
else if (arg == "--no-escape") {
|
|
239
|
+
no_escape = true;
|
|
240
|
+
}
|
|
236
241
|
else if (arg == "--no-parse-special") {
|
|
237
242
|
no_parse_special = true;
|
|
238
243
|
}
|
|
@@ -333,14 +338,16 @@ int main(int raw_argc, char ** raw_argv) {
|
|
|
333
338
|
|
|
334
339
|
llama_model_params model_params = llama_model_default_params();
|
|
335
340
|
model_params.vocab_only = true;
|
|
336
|
-
llama_model * model =
|
|
341
|
+
llama_model * model = llama_model_load_from_file(model_path, model_params);
|
|
337
342
|
if (!model) {
|
|
338
343
|
fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path);
|
|
339
344
|
return 1;
|
|
340
345
|
}
|
|
341
346
|
|
|
347
|
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
348
|
+
|
|
342
349
|
llama_context_params ctx_params = llama_context_default_params();
|
|
343
|
-
llama_context * ctx =
|
|
350
|
+
llama_context * ctx = llama_init_from_model(model, ctx_params);
|
|
344
351
|
if (!ctx) {
|
|
345
352
|
fprintf(stderr, "Error: could not create context.\n");
|
|
346
353
|
return 1;
|
|
@@ -360,12 +367,17 @@ int main(int raw_argc, char ** raw_argv) {
|
|
|
360
367
|
prompt = stdin_buffer.str();
|
|
361
368
|
}
|
|
362
369
|
|
|
363
|
-
const bool model_wants_add_bos =
|
|
370
|
+
const bool model_wants_add_bos = llama_vocab_get_add_bos(vocab);
|
|
364
371
|
const bool add_bos = model_wants_add_bos && !no_bos;
|
|
365
372
|
const bool parse_special = !no_parse_special;
|
|
373
|
+
const bool escape = !no_escape;
|
|
374
|
+
|
|
375
|
+
if (escape) {
|
|
376
|
+
string_process_escapes(prompt);
|
|
377
|
+
}
|
|
366
378
|
|
|
367
379
|
std::vector<llama_token> tokens;
|
|
368
|
-
tokens = common_tokenize(
|
|
380
|
+
tokens = common_tokenize(vocab, prompt, add_bos, parse_special);
|
|
369
381
|
|
|
370
382
|
if (printing_ids) {
|
|
371
383
|
printf("[");
|
|
@@ -398,7 +410,7 @@ int main(int raw_argc, char ** raw_argv) {
|
|
|
398
410
|
}
|
|
399
411
|
// silence valgrind
|
|
400
412
|
llama_free(ctx);
|
|
401
|
-
|
|
413
|
+
llama_model_free(model);
|
|
402
414
|
|
|
403
415
|
return 0;
|
|
404
416
|
}
|
|
@@ -414,15 +414,42 @@ static void prompt_add(llama_tokens & prompt, const llama_tokens & tokens) {
|
|
|
414
414
|
prompt.insert(prompt.end(), tokens.begin(), tokens.end());
|
|
415
415
|
}
|
|
416
416
|
|
|
417
|
-
static void prompt_add(llama_tokens & prompt, const
|
|
418
|
-
auto tmp = common_tokenize(
|
|
417
|
+
static void prompt_add(llama_tokens & prompt, const llama_vocab * vocab, const std::string & txt, bool add_special, bool parse_special) {
|
|
418
|
+
auto tmp = common_tokenize(vocab, txt, add_special, parse_special);
|
|
419
419
|
prompt_add(prompt, tmp);
|
|
420
420
|
}
|
|
421
421
|
|
|
422
|
-
static void prompt_init(llama_tokens & prompt, const
|
|
422
|
+
static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) {
|
|
423
423
|
prompt.clear();
|
|
424
424
|
|
|
425
|
-
prompt_add(prompt,
|
|
425
|
+
prompt_add(prompt, vocab, "<|im_start|>\n", true, true);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) {
|
|
429
|
+
const std::string& delimiter = "<|text_sep|>";
|
|
430
|
+
|
|
431
|
+
std::vector<llama_token> result;
|
|
432
|
+
size_t start = 0;
|
|
433
|
+
size_t end = str.find(delimiter);
|
|
434
|
+
|
|
435
|
+
//first token is always a newline, as it was not previously added
|
|
436
|
+
result.push_back(common_tokenize(vocab, "\n", false, true)[0]);
|
|
437
|
+
|
|
438
|
+
while (end != std::string::npos) {
|
|
439
|
+
std::string current_word = str.substr(start, end - start);
|
|
440
|
+
auto tmp = common_tokenize(vocab, current_word, false, true);
|
|
441
|
+
result.push_back(tmp[0]);
|
|
442
|
+
start = end + delimiter.length();
|
|
443
|
+
end = str.find(delimiter, start);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Add the last part
|
|
447
|
+
std::string current_word = str.substr(start);
|
|
448
|
+
auto tmp = common_tokenize(vocab, current_word, false, true);
|
|
449
|
+
if (tmp.size() > 0) {
|
|
450
|
+
result.push_back(tmp[0]);
|
|
451
|
+
}
|
|
452
|
+
return result;
|
|
426
453
|
}
|
|
427
454
|
|
|
428
455
|
int main(int argc, char ** argv) {
|
|
@@ -458,8 +485,11 @@ int main(int argc, char ** argv) {
|
|
|
458
485
|
llama_context * ctx_cts = NULL;
|
|
459
486
|
|
|
460
487
|
common_init_result llama_init_ttc = common_init_from_params(params);
|
|
461
|
-
|
|
462
|
-
|
|
488
|
+
|
|
489
|
+
model_ttc = llama_init_ttc.model.get();
|
|
490
|
+
ctx_ttc = llama_init_ttc.context.get();
|
|
491
|
+
|
|
492
|
+
const llama_vocab * vocab = llama_model_get_vocab(model_ttc);
|
|
463
493
|
|
|
464
494
|
// TODO: refactor in a common struct
|
|
465
495
|
params.model = params.vocoder.model;
|
|
@@ -470,8 +500,9 @@ int main(int argc, char ** argv) {
|
|
|
470
500
|
params.embedding = true;
|
|
471
501
|
|
|
472
502
|
common_init_result llama_init_cts = common_init_from_params(params);
|
|
473
|
-
|
|
474
|
-
|
|
503
|
+
|
|
504
|
+
model_cts = llama_init_cts.model.get();
|
|
505
|
+
ctx_cts = llama_init_cts.context.get();
|
|
475
506
|
|
|
476
507
|
std::vector<common_sampler *> smpl(n_parallel);
|
|
477
508
|
for (int i = 0; i < n_parallel; ++i) {
|
|
@@ -490,6 +521,7 @@ int main(int argc, char ** argv) {
|
|
|
490
521
|
const auto t_main_start = ggml_time_us();
|
|
491
522
|
|
|
492
523
|
std::vector<llama_token> codes;
|
|
524
|
+
std::vector<llama_token> guide_tokens;
|
|
493
525
|
|
|
494
526
|
// process prompt and generate voice codes
|
|
495
527
|
{
|
|
@@ -497,20 +529,23 @@ int main(int argc, char ** argv) {
|
|
|
497
529
|
|
|
498
530
|
std::vector<llama_token> prompt_inp;
|
|
499
531
|
|
|
500
|
-
prompt_init(prompt_inp,
|
|
532
|
+
prompt_init(prompt_inp, vocab);
|
|
501
533
|
|
|
502
|
-
prompt_add(prompt_inp,
|
|
534
|
+
prompt_add(prompt_inp, vocab, "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>", false, true);
|
|
503
535
|
|
|
504
536
|
// convert the input text into the necessary format expected by OuteTTS
|
|
505
537
|
{
|
|
506
538
|
std::string prompt_clean = process_text(params.prompt);
|
|
539
|
+
if (params.vocoder.use_guide_tokens) {
|
|
540
|
+
guide_tokens = prepare_guide_tokens(vocab, prompt_clean);
|
|
541
|
+
}
|
|
507
542
|
|
|
508
543
|
LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
|
|
509
544
|
|
|
510
|
-
prompt_add(prompt_inp,
|
|
545
|
+
prompt_add(prompt_inp, vocab, prompt_clean, false, true);
|
|
511
546
|
}
|
|
512
547
|
|
|
513
|
-
prompt_add(prompt_inp,
|
|
548
|
+
prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
|
|
514
549
|
|
|
515
550
|
// disabled to save time on tokenizing each time
|
|
516
551
|
// TODO: load voices from the json files
|
|
@@ -547,7 +582,7 @@ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><
|
|
|
547
582
|
looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
|
|
548
583
|
lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
|
|
549
584
|
|
|
550
|
-
auto tmp = common_tokenize(
|
|
585
|
+
auto tmp = common_tokenize(vocab, voice_data, false, true);
|
|
551
586
|
printf("\n\n");
|
|
552
587
|
for (int i = 0; i < tmp.size(); ++i) {
|
|
553
588
|
printf("%d, ", tmp[i]);
|
|
@@ -713,6 +748,8 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
713
748
|
int n_past = batch.n_tokens;
|
|
714
749
|
int n_decode = 0;
|
|
715
750
|
|
|
751
|
+
bool next_token_uses_guide_token = true;
|
|
752
|
+
|
|
716
753
|
while (n_decode <= n_predict) {
|
|
717
754
|
// prepare the next batch
|
|
718
755
|
common_batch_clear(batch);
|
|
@@ -724,7 +761,17 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
724
761
|
continue;
|
|
725
762
|
}
|
|
726
763
|
|
|
727
|
-
|
|
764
|
+
llama_token new_token_id = common_sampler_sample(smpl[i], ctx_ttc, i_batch[i]);
|
|
765
|
+
|
|
766
|
+
//guide tokens help prevent hallucinations by forcing the TTS to use the correct word
|
|
767
|
+
if (!guide_tokens.empty() && next_token_uses_guide_token && !llama_vocab_is_control(vocab, new_token_id) && !llama_vocab_is_eog(vocab, new_token_id)) {
|
|
768
|
+
llama_token guide_token = guide_tokens[0];
|
|
769
|
+
guide_tokens.erase(guide_tokens.begin());
|
|
770
|
+
new_token_id = guide_token; //ensure correct word fragment is used
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
//this is the token id that always precedes a new word
|
|
774
|
+
next_token_uses_guide_token = (new_token_id == 198);
|
|
728
775
|
|
|
729
776
|
common_sampler_accept(smpl[i], new_token_id, true);
|
|
730
777
|
|
|
@@ -733,9 +780,9 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
733
780
|
const auto * cands = common_sampler_get_candidates(smpl[i]);
|
|
734
781
|
|
|
735
782
|
// is it an end of generation? -> mark the stream as finished
|
|
736
|
-
if (
|
|
783
|
+
if (llama_vocab_is_eog(vocab, new_token_id) || n_decode == n_predict) {
|
|
737
784
|
std::string reason;
|
|
738
|
-
if (
|
|
785
|
+
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
739
786
|
reason = "eos";
|
|
740
787
|
} else {
|
|
741
788
|
reason = "n_predict";
|
|
@@ -871,7 +918,7 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
871
918
|
|
|
872
919
|
#if 1
|
|
873
920
|
// spectral operations
|
|
874
|
-
const int n_embd =
|
|
921
|
+
const int n_embd = llama_model_n_embd(model_cts);
|
|
875
922
|
const float * embd = llama_get_embeddings(ctx_cts);
|
|
876
923
|
|
|
877
924
|
auto audio = embd_to_audio(embd, n_codes, n_embd, params.cpuparams.n_threads);
|
|
@@ -920,12 +967,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
920
967
|
|
|
921
968
|
LOG_INF("%s: audio written to file '%s'\n", __func__, fname.c_str());
|
|
922
969
|
|
|
923
|
-
llama_free(ctx_ttc);
|
|
924
|
-
llama_free_model(model_ttc);
|
|
925
|
-
|
|
926
|
-
llama_free(ctx_cts);
|
|
927
|
-
llama_free_model(model_cts);
|
|
928
|
-
|
|
929
970
|
llama_backend_free();
|
|
930
971
|
|
|
931
972
|
return 0;
|
|
@@ -185,6 +185,9 @@ option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increas
|
|
|
185
185
|
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
|
186
186
|
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
|
187
187
|
|
|
188
|
+
# toolchain for vulkan-shaders-gen
|
|
189
|
+
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
|
|
190
|
+
|
|
188
191
|
# extra artifacts
|
|
189
192
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
|
190
193
|
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
|
@@ -243,7 +246,8 @@ set(GGML_PUBLIC_HEADERS
|
|
|
243
246
|
include/ggml-metal.h
|
|
244
247
|
include/ggml-rpc.h
|
|
245
248
|
include/ggml-sycl.h
|
|
246
|
-
include/ggml-vulkan.h
|
|
249
|
+
include/ggml-vulkan.h
|
|
250
|
+
include/gguf.h)
|
|
247
251
|
|
|
248
252
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
249
253
|
#if (GGML_METAL)
|
|
@@ -252,26 +256,6 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
|
252
256
|
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
|
|
253
257
|
install(TARGETS ggml-base LIBRARY)
|
|
254
258
|
|
|
255
|
-
# FIXME: this should be done in the backend cmake files
|
|
256
|
-
if (GGML_METAL)
|
|
257
|
-
# FIXME: does this need to be installed with GGML_METAL_EMBED_LIBRARY?
|
|
258
|
-
install(
|
|
259
|
-
FILES src/ggml-metal/ggml-metal.metal
|
|
260
|
-
PERMISSIONS
|
|
261
|
-
OWNER_READ
|
|
262
|
-
OWNER_WRITE
|
|
263
|
-
GROUP_READ
|
|
264
|
-
WORLD_READ
|
|
265
|
-
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
266
|
-
|
|
267
|
-
if (NOT GGML_METAL_EMBED_LIBRARY)
|
|
268
|
-
install(
|
|
269
|
-
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
|
270
|
-
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
|
271
|
-
)
|
|
272
|
-
endif()
|
|
273
|
-
endif()
|
|
274
|
-
|
|
275
259
|
if (GGML_STANDALONE)
|
|
276
260
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
|
277
261
|
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
|
@@ -203,6 +203,8 @@ extern "C" {
|
|
|
203
203
|
// Backend registry
|
|
204
204
|
//
|
|
205
205
|
|
|
206
|
+
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
|
207
|
+
|
|
206
208
|
// Backend (reg) enumeration
|
|
207
209
|
GGML_API size_t ggml_backend_reg_count(void);
|
|
208
210
|
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
|
@@ -241,12 +241,6 @@
|
|
|
241
241
|
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
242
|
#define GGML_ROPE_TYPE_VISION 24
|
|
243
243
|
|
|
244
|
-
#define GGUF_MAGIC "GGUF"
|
|
245
|
-
|
|
246
|
-
#define GGUF_VERSION 3
|
|
247
|
-
|
|
248
|
-
#define GGUF_DEFAULT_ALIGNMENT 32
|
|
249
|
-
|
|
250
244
|
#define GGML_UNUSED(x) (void)(x)
|
|
251
245
|
|
|
252
246
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
@@ -403,12 +397,6 @@ extern "C" {
|
|
|
403
397
|
GGML_PREC_F32,
|
|
404
398
|
};
|
|
405
399
|
|
|
406
|
-
enum ggml_backend_type {
|
|
407
|
-
GGML_BACKEND_TYPE_CPU = 0,
|
|
408
|
-
GGML_BACKEND_TYPE_GPU = 10,
|
|
409
|
-
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
|
410
|
-
};
|
|
411
|
-
|
|
412
400
|
// model file types
|
|
413
401
|
enum ggml_ftype {
|
|
414
402
|
GGML_FTYPE_UNKNOWN = -1,
|
|
@@ -513,6 +501,7 @@ extern "C" {
|
|
|
513
501
|
GGML_OP_GET_REL_POS,
|
|
514
502
|
GGML_OP_ADD_REL_POS,
|
|
515
503
|
GGML_OP_RWKV_WKV6,
|
|
504
|
+
GGML_OP_GATED_LINEAR_ATTN,
|
|
516
505
|
|
|
517
506
|
GGML_OP_UNARY,
|
|
518
507
|
|
|
@@ -587,8 +576,6 @@ extern "C" {
|
|
|
587
576
|
struct ggml_tensor {
|
|
588
577
|
enum ggml_type type;
|
|
589
578
|
|
|
590
|
-
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
|
591
|
-
|
|
592
579
|
struct ggml_backend_buffer * buffer;
|
|
593
580
|
|
|
594
581
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
|
@@ -1397,16 +1384,20 @@ extern "C" {
|
|
|
1397
1384
|
float scale,
|
|
1398
1385
|
float max_bias);
|
|
1399
1386
|
|
|
1400
|
-
GGML_API struct ggml_tensor *
|
|
1387
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
|
1401
1388
|
struct ggml_context * ctx,
|
|
1402
1389
|
struct ggml_tensor * a,
|
|
1403
|
-
struct ggml_tensor * b
|
|
1390
|
+
struct ggml_tensor * b,
|
|
1391
|
+
float scale,
|
|
1392
|
+
float max_bias);
|
|
1404
1393
|
|
|
1405
1394
|
// in-place, returns view(a)
|
|
1406
|
-
GGML_API struct ggml_tensor *
|
|
1395
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
|
|
1407
1396
|
struct ggml_context * ctx,
|
|
1408
1397
|
struct ggml_tensor * a,
|
|
1409
|
-
struct ggml_tensor * b
|
|
1398
|
+
struct ggml_tensor * b,
|
|
1399
|
+
float scale,
|
|
1400
|
+
float max_bias);
|
|
1410
1401
|
|
|
1411
1402
|
// rotary position embedding
|
|
1412
1403
|
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
|
@@ -1513,7 +1504,7 @@ extern "C" {
|
|
|
1513
1504
|
|
|
1514
1505
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1515
1506
|
// a - dy
|
|
1516
|
-
GGML_API struct ggml_tensor *
|
|
1507
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_back(
|
|
1517
1508
|
struct ggml_context * ctx,
|
|
1518
1509
|
struct ggml_tensor * a, // gradients of ggml_rope result
|
|
1519
1510
|
struct ggml_tensor * b, // positions
|
|
@@ -1528,6 +1519,23 @@ extern "C" {
|
|
|
1528
1519
|
float beta_fast,
|
|
1529
1520
|
float beta_slow);
|
|
1530
1521
|
|
|
1522
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_back(
|
|
1523
|
+
struct ggml_context * ctx,
|
|
1524
|
+
struct ggml_tensor * a,
|
|
1525
|
+
struct ggml_tensor * b,
|
|
1526
|
+
struct ggml_tensor * c,
|
|
1527
|
+
int n_dims,
|
|
1528
|
+
int sections[4],
|
|
1529
|
+
int mode,
|
|
1530
|
+
int n_ctx_orig,
|
|
1531
|
+
float freq_base,
|
|
1532
|
+
float freq_scale,
|
|
1533
|
+
float ext_factor,
|
|
1534
|
+
float attn_factor,
|
|
1535
|
+
float beta_fast,
|
|
1536
|
+
float beta_slow);
|
|
1537
|
+
|
|
1538
|
+
|
|
1531
1539
|
// clamp
|
|
1532
1540
|
// in-place, returns view(a)
|
|
1533
1541
|
GGML_API struct ggml_tensor * ggml_clamp(
|
|
@@ -1873,6 +1881,15 @@ extern "C" {
|
|
|
1873
1881
|
struct ggml_tensor * td,
|
|
1874
1882
|
struct ggml_tensor * state);
|
|
1875
1883
|
|
|
1884
|
+
GGML_API struct ggml_tensor * ggml_gated_linear_attn(
|
|
1885
|
+
struct ggml_context * ctx,
|
|
1886
|
+
struct ggml_tensor * k,
|
|
1887
|
+
struct ggml_tensor * v,
|
|
1888
|
+
struct ggml_tensor * q,
|
|
1889
|
+
struct ggml_tensor * g,
|
|
1890
|
+
struct ggml_tensor * state,
|
|
1891
|
+
float scale);
|
|
1892
|
+
|
|
1876
1893
|
// custom operators
|
|
1877
1894
|
|
|
1878
1895
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
@@ -2111,132 +2128,6 @@ extern "C" {
|
|
|
2111
2128
|
int64_t n_per_row,
|
|
2112
2129
|
const float * imatrix);
|
|
2113
2130
|
|
|
2114
|
-
//
|
|
2115
|
-
// gguf
|
|
2116
|
-
//
|
|
2117
|
-
|
|
2118
|
-
enum gguf_type {
|
|
2119
|
-
GGUF_TYPE_UINT8 = 0,
|
|
2120
|
-
GGUF_TYPE_INT8 = 1,
|
|
2121
|
-
GGUF_TYPE_UINT16 = 2,
|
|
2122
|
-
GGUF_TYPE_INT16 = 3,
|
|
2123
|
-
GGUF_TYPE_UINT32 = 4,
|
|
2124
|
-
GGUF_TYPE_INT32 = 5,
|
|
2125
|
-
GGUF_TYPE_FLOAT32 = 6,
|
|
2126
|
-
GGUF_TYPE_BOOL = 7,
|
|
2127
|
-
GGUF_TYPE_STRING = 8,
|
|
2128
|
-
GGUF_TYPE_ARRAY = 9,
|
|
2129
|
-
GGUF_TYPE_UINT64 = 10,
|
|
2130
|
-
GGUF_TYPE_INT64 = 11,
|
|
2131
|
-
GGUF_TYPE_FLOAT64 = 12,
|
|
2132
|
-
GGUF_TYPE_COUNT, // marks the end of the enum
|
|
2133
|
-
};
|
|
2134
|
-
|
|
2135
|
-
struct gguf_context;
|
|
2136
|
-
|
|
2137
|
-
struct gguf_init_params {
|
|
2138
|
-
bool no_alloc;
|
|
2139
|
-
|
|
2140
|
-
// if not NULL, create a ggml_context and allocate the tensor data in it
|
|
2141
|
-
struct ggml_context ** ctx;
|
|
2142
|
-
};
|
|
2143
|
-
|
|
2144
|
-
GGML_API struct gguf_context * gguf_init_empty(void);
|
|
2145
|
-
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
|
2146
|
-
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
|
2147
|
-
|
|
2148
|
-
GGML_API void gguf_free(struct gguf_context * ctx);
|
|
2149
|
-
|
|
2150
|
-
GGML_API const char * gguf_type_name(enum gguf_type type);
|
|
2151
|
-
|
|
2152
|
-
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
|
2153
|
-
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
|
2154
|
-
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
|
2155
|
-
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
|
2156
|
-
|
|
2157
|
-
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
|
2158
|
-
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
|
2159
|
-
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
|
|
2160
|
-
|
|
2161
|
-
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
|
|
2162
|
-
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
|
|
2163
|
-
|
|
2164
|
-
// will abort if the wrong type is used for the key
|
|
2165
|
-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
|
|
2166
|
-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
|
|
2167
|
-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
|
|
2168
|
-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
|
|
2169
|
-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
|
|
2170
|
-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
|
|
2171
|
-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
|
|
2172
|
-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
|
|
2173
|
-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
|
|
2174
|
-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
|
|
2175
|
-
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
|
|
2176
|
-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
|
|
2177
|
-
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
|
|
2178
|
-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
|
|
2179
|
-
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
|
2180
|
-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
|
2181
|
-
|
|
2182
|
-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
|
2183
|
-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
|
2184
|
-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
|
2185
|
-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
|
2186
|
-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
|
2187
|
-
|
|
2188
|
-
// removes key if it exists
|
|
2189
|
-
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
|
|
2190
|
-
|
|
2191
|
-
// overrides existing values or adds a new one
|
|
2192
|
-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
|
2193
|
-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
|
2194
|
-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
|
2195
|
-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
|
2196
|
-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
|
2197
|
-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
|
2198
|
-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
|
2199
|
-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
|
2200
|
-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
|
2201
|
-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
|
2202
|
-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
|
2203
|
-
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
|
2204
|
-
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
|
2205
|
-
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
|
2206
|
-
|
|
2207
|
-
// set or add KV pairs from another context
|
|
2208
|
-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
|
2209
|
-
|
|
2210
|
-
// manage tensor info
|
|
2211
|
-
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
|
2212
|
-
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
|
2213
|
-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
|
2214
|
-
|
|
2215
|
-
// writing gguf files can be done in 2 ways:
|
|
2216
|
-
//
|
|
2217
|
-
// - write the entire gguf_context to a binary file in a single pass:
|
|
2218
|
-
//
|
|
2219
|
-
// gguf_write_to_file(ctx, fname);
|
|
2220
|
-
//
|
|
2221
|
-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
|
2222
|
-
//
|
|
2223
|
-
// FILE * f = fopen(fname, "wb");
|
|
2224
|
-
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
|
2225
|
-
// fwrite(f, ...);
|
|
2226
|
-
// void * data = gguf_meta_get_meta_data(ctx);
|
|
2227
|
-
// fseek(f, 0, SEEK_SET);
|
|
2228
|
-
// fwrite(f, data, gguf_get_meta_size(ctx));
|
|
2229
|
-
// free(data);
|
|
2230
|
-
// fclose(f);
|
|
2231
|
-
//
|
|
2232
|
-
|
|
2233
|
-
// write the entire context to a binary file
|
|
2234
|
-
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
|
2235
|
-
|
|
2236
|
-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
|
2237
|
-
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
|
2238
|
-
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
|
2239
|
-
|
|
2240
2131
|
#ifdef __cplusplus
|
|
2241
2132
|
// restrict not standard in C++
|
|
2242
2133
|
# if defined(__GNUC__)
|