@fugood/llama.node 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +89 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +132 -13
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +33 -174
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +10 -9
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +79 -34
- package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +196 -108
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +113 -101
- package/src/llama.cpp/examples/server/utils.hpp +94 -105
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +263 -151
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
- package/src/llama.cpp/ggml/src/ggml.c +93 -5
- package/src/llama.cpp/include/llama.h +105 -27
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +123 -16
- package/src/llama.cpp/src/llama-arch.h +19 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -109
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +8230 -122
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +51 -9837
- package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
#define _USE_MATH_DEFINES // For M_PI on MSVC
|
|
2
|
+
|
|
1
3
|
#include "arg.h"
|
|
2
4
|
#include "common.h"
|
|
3
5
|
#include "sampling.h"
|
|
4
6
|
#include "log.h"
|
|
5
7
|
#include "llama.h"
|
|
6
|
-
|
|
7
|
-
#define _USE_MATH_DEFINES // For M_PI on MSVC
|
|
8
|
+
#include "json.hpp"
|
|
8
9
|
|
|
9
10
|
#include <algorithm>
|
|
10
11
|
#include <cmath>
|
|
@@ -16,6 +17,13 @@
|
|
|
16
17
|
#include <thread>
|
|
17
18
|
#include <vector>
|
|
18
19
|
|
|
20
|
+
using json = nlohmann::ordered_json;
|
|
21
|
+
|
|
22
|
+
enum outetts_version {
|
|
23
|
+
OUTETTS_V0_2,
|
|
24
|
+
OUTETTS_V0_3,
|
|
25
|
+
};
|
|
26
|
+
|
|
19
27
|
//
|
|
20
28
|
// Terminal utils
|
|
21
29
|
//
|
|
@@ -79,11 +87,11 @@ struct wav_header {
|
|
|
79
87
|
uint32_t data_size;
|
|
80
88
|
};
|
|
81
89
|
|
|
82
|
-
static
|
|
90
|
+
static bool save_wav16(const std::string & fname, const std::vector<float> & data, int sample_rate) {
|
|
83
91
|
std::ofstream file(fname, std::ios::binary);
|
|
84
92
|
if (!file) {
|
|
85
|
-
LOG_ERR("%s: Failed to open file '%s' for writing", __func__, fname.c_str());
|
|
86
|
-
return;
|
|
93
|
+
LOG_ERR("%s: Failed to open file '%s' for writing.\n", __func__, fname.c_str());
|
|
94
|
+
return false;
|
|
87
95
|
}
|
|
88
96
|
|
|
89
97
|
wav_header header;
|
|
@@ -100,7 +108,7 @@ static void save_wav16(const std::string & fname, const std::vector<float> & dat
|
|
|
100
108
|
file.write(reinterpret_cast<const char*>(&pcm_sample), sizeof(pcm_sample));
|
|
101
109
|
}
|
|
102
110
|
|
|
103
|
-
file.
|
|
111
|
+
return file.good();
|
|
104
112
|
}
|
|
105
113
|
|
|
106
114
|
static void fill_hann_window(int length, bool periodic, float * output) {
|
|
@@ -371,7 +379,7 @@ static std::string replace_numbers_with_words(const std::string & input_text) {
|
|
|
371
379
|
}
|
|
372
380
|
|
|
373
381
|
// Based on: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/version/v1/prompt_processor.py#L39
|
|
374
|
-
static std::string process_text(const std::string & text) {
|
|
382
|
+
static std::string process_text(const std::string & text, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
375
383
|
|
|
376
384
|
// For now I skipped text romanization as I am unsure how to handle
|
|
377
385
|
// uroman and MeCab implementations in C++
|
|
@@ -401,7 +409,8 @@ static std::string process_text(const std::string & text) {
|
|
|
401
409
|
if (c == ' ') {
|
|
402
410
|
prompt_clean += "<|text_sep|>";
|
|
403
411
|
*/
|
|
404
|
-
|
|
412
|
+
std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
|
|
413
|
+
processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), separator);
|
|
405
414
|
|
|
406
415
|
return processed_text;
|
|
407
416
|
}
|
|
@@ -425,8 +434,8 @@ static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) {
|
|
|
425
434
|
prompt_add(prompt, vocab, "<|im_start|>\n", true, true);
|
|
426
435
|
}
|
|
427
436
|
|
|
428
|
-
static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) {
|
|
429
|
-
const std::string& delimiter = "<|text_sep|>";
|
|
437
|
+
static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
438
|
+
const std::string& delimiter = (tts_version == OUTETTS_V0_3 ? "<|space|>" : "<|text_sep|>");
|
|
430
439
|
|
|
431
440
|
std::vector<llama_token> result;
|
|
432
441
|
size_t start = 0;
|
|
@@ -452,9 +461,82 @@ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab,
|
|
|
452
461
|
return result;
|
|
453
462
|
}
|
|
454
463
|
|
|
464
|
+
static json speaker_from_file(const std::string & speaker_file) {
|
|
465
|
+
std::ifstream file(speaker_file);
|
|
466
|
+
if (!file) {
|
|
467
|
+
LOG_ERR("%s: Failed to open file '%s' for reading\n", __func__, speaker_file.c_str());
|
|
468
|
+
return json();
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
json speaker = json::parse(file);
|
|
472
|
+
return speaker;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
static outetts_version get_tts_version(llama_model *model, json speaker = json::object()) {
|
|
476
|
+
if (speaker.contains("version")) {
|
|
477
|
+
std::string version = speaker["version"].get<std::string>();
|
|
478
|
+
if (version == "0.2") {
|
|
479
|
+
return OUTETTS_V0_2;
|
|
480
|
+
} else if (version == "0.3") {
|
|
481
|
+
return OUTETTS_V0_3;
|
|
482
|
+
} else {
|
|
483
|
+
LOG_ERR("%s: Unsupported speaker version '%s'\n", __func__, version.c_str());
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// Also could get version from model itself
|
|
488
|
+
const char *chat_template = llama_model_chat_template(model, nullptr);
|
|
489
|
+
if (chat_template && std::string(chat_template) == "outetts-0.3") {
|
|
490
|
+
return OUTETTS_V0_3;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Use 0.2 as the default version
|
|
494
|
+
return OUTETTS_V0_2;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
static std::string audio_text_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
498
|
+
std::string audio_text = "<|text_start|>";
|
|
499
|
+
|
|
500
|
+
if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
|
|
501
|
+
std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
|
|
502
|
+
for (const auto &word : speaker["words"]) {
|
|
503
|
+
audio_text += word["word"].get<std::string>() + separator;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
return audio_text;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
static std::string audio_data_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
511
|
+
std::string audio_data = "<|audio_start|>\n";
|
|
512
|
+
|
|
513
|
+
if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
|
|
514
|
+
std::string code_start = (tts_version == OUTETTS_V0_3) ? "" : "<|code_start|>";
|
|
515
|
+
std::string code_end = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|code_end|>";
|
|
516
|
+
for (const auto &word : speaker["words"]) {
|
|
517
|
+
std::string word_text = word["word"].get<std::string>();
|
|
518
|
+
double duration = word["duration"].get<double>();
|
|
519
|
+
std::vector<int> codes = word["codes"].get<std::vector<int>>();
|
|
520
|
+
|
|
521
|
+
// Create the audio output entry
|
|
522
|
+
std::ostringstream word_entry;
|
|
523
|
+
word_entry << word_text << "<|t_" << std::fixed << std::setprecision(2)
|
|
524
|
+
<< duration << "|>" + code_start;
|
|
525
|
+
for (const auto &Code : codes) {
|
|
526
|
+
word_entry << "<|" << Code << "|>";
|
|
527
|
+
}
|
|
528
|
+
word_entry << code_end << "\n";
|
|
529
|
+
audio_data += word_entry.str();
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return audio_data;
|
|
534
|
+
}
|
|
535
|
+
|
|
455
536
|
int main(int argc, char ** argv) {
|
|
456
537
|
common_params params;
|
|
457
538
|
|
|
539
|
+
params.out_file = "output.wav";
|
|
458
540
|
params.prompt = "";
|
|
459
541
|
|
|
460
542
|
params.n_predict = 4096;
|
|
@@ -523,34 +605,9 @@ int main(int argc, char ** argv) {
|
|
|
523
605
|
std::vector<llama_token> codes;
|
|
524
606
|
std::vector<llama_token> guide_tokens;
|
|
525
607
|
|
|
526
|
-
//
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
std::vector<llama_token> prompt_inp;
|
|
531
|
-
|
|
532
|
-
prompt_init(prompt_inp, vocab);
|
|
533
|
-
|
|
534
|
-
prompt_add(prompt_inp, vocab, "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>", false, true);
|
|
535
|
-
|
|
536
|
-
// convert the input text into the necessary format expected by OuteTTS
|
|
537
|
-
{
|
|
538
|
-
std::string prompt_clean = process_text(params.prompt);
|
|
539
|
-
if (params.vocoder.use_guide_tokens) {
|
|
540
|
-
guide_tokens = prepare_guide_tokens(vocab, prompt_clean);
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
|
|
544
|
-
|
|
545
|
-
prompt_add(prompt_inp, vocab, prompt_clean, false, true);
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
|
|
549
|
-
|
|
550
|
-
// disabled to save time on tokenizing each time
|
|
551
|
-
// TODO: load voices from the json files
|
|
552
|
-
#if 0
|
|
553
|
-
const std::string voice_data = R"(<|audio_start|>
|
|
608
|
+
// the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
|
|
609
|
+
std::string audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
|
|
610
|
+
std::string audio_data = R"(<|audio_start|>
|
|
554
611
|
the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
|
|
555
612
|
overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
|
|
556
613
|
package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
|
|
@@ -582,117 +639,170 @@ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><
|
|
|
582
639
|
looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
|
|
583
640
|
lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
|
|
584
641
|
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
642
|
+
// audio data for 0.3 version
|
|
643
|
+
outetts_version tts_version = get_tts_version(model_ttc);
|
|
644
|
+
if (tts_version == OUTETTS_V0_3) {
|
|
645
|
+
audio_text = std::regex_replace(audio_text, std::regex(R"(<\|text_sep\|>)"), "<|space|>");
|
|
646
|
+
audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_start\|>)"), "");
|
|
647
|
+
audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_end\|>)"), "<|space|>");
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// load speaker if given
|
|
651
|
+
if (!params.vocoder.speaker_file.empty()) {
|
|
652
|
+
LOG_INF("%s: loading speaker ..\n", __func__);
|
|
653
|
+
json speaker = speaker_from_file(params.vocoder.speaker_file);
|
|
654
|
+
if (speaker.empty()) {
|
|
655
|
+
LOG_ERR("%s: Failed to load speaker file '%s'\n", __func__, params.vocoder.speaker_file.c_str());
|
|
656
|
+
return 1;
|
|
589
657
|
}
|
|
590
|
-
|
|
658
|
+
audio_text = audio_text_from_speaker(speaker, tts_version);
|
|
659
|
+
audio_data = audio_data_from_speaker(speaker, tts_version);
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// process prompt and generate voice codes
|
|
663
|
+
{
|
|
664
|
+
LOG_INF("%s: constructing prompt ..\n", __func__);
|
|
665
|
+
|
|
666
|
+
std::vector<llama_token> prompt_inp;
|
|
667
|
+
|
|
668
|
+
prompt_init(prompt_inp, vocab);
|
|
669
|
+
|
|
670
|
+
prompt_add(prompt_inp, vocab, audio_text, false, true);
|
|
671
|
+
|
|
672
|
+
// convert the input text into the necessary format expected by OuteTTS
|
|
673
|
+
{
|
|
674
|
+
std::string prompt_clean = process_text(params.prompt, tts_version);
|
|
675
|
+
if (params.vocoder.use_guide_tokens) {
|
|
676
|
+
guide_tokens = prepare_guide_tokens(vocab, prompt_clean, tts_version);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
|
|
680
|
+
|
|
681
|
+
prompt_add(prompt_inp, vocab, prompt_clean, false, true);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
|
|
685
|
+
|
|
686
|
+
if (!params.vocoder.speaker_file.empty()) {
|
|
687
|
+
prompt_add(prompt_inp, vocab, audio_data, false, true);
|
|
688
|
+
} else {
|
|
689
|
+
// disabled to save time on tokenizing each time
|
|
690
|
+
#if 1
|
|
691
|
+
const std::string voice_data = audio_data;
|
|
692
|
+
|
|
693
|
+
auto tmp = common_tokenize(vocab, voice_data, false, true);
|
|
694
|
+
printf("\n\n");
|
|
695
|
+
for (size_t i = 0; i < tmp.size(); ++i) {
|
|
696
|
+
printf("%d, ", tmp[i]);
|
|
697
|
+
}
|
|
698
|
+
printf("\n\n");
|
|
699
|
+
prompt_add(prompt_inp, tmp);
|
|
591
700
|
#else
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
701
|
+
prompt_add(prompt_inp, llama_tokens {
|
|
702
|
+
151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
|
|
703
|
+
152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
|
|
704
|
+
151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
|
|
705
|
+
151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
|
|
706
|
+
153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
|
|
707
|
+
153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
|
|
708
|
+
152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
|
|
709
|
+
152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
|
|
710
|
+
152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
|
|
711
|
+
153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
|
|
712
|
+
153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
|
|
713
|
+
152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
|
|
714
|
+
153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
|
|
715
|
+
153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
|
|
716
|
+
151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
|
|
717
|
+
152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
|
|
718
|
+
152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
|
|
719
|
+
151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
|
|
720
|
+
152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
|
|
721
|
+
152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
|
|
722
|
+
152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
|
|
723
|
+
152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
|
|
724
|
+
152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
|
|
725
|
+
153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
|
|
726
|
+
155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
|
|
727
|
+
152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
|
|
728
|
+
32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
|
|
729
|
+
152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
|
|
730
|
+
153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
|
|
731
|
+
152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
|
|
732
|
+
151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
|
|
733
|
+
153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
|
|
734
|
+
152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
|
|
735
|
+
152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
|
|
736
|
+
152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
|
|
737
|
+
153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
|
|
738
|
+
152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
|
|
739
|
+
152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
|
|
740
|
+
153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
|
|
741
|
+
152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
|
|
742
|
+
152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
|
|
743
|
+
152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
|
|
744
|
+
155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
|
|
745
|
+
152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
|
|
746
|
+
14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
|
|
747
|
+
153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
|
|
748
|
+
198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
|
|
749
|
+
152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
|
|
750
|
+
151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
|
|
751
|
+
153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
|
|
752
|
+
152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
|
|
753
|
+
152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
|
|
754
|
+
152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
|
|
755
|
+
153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
|
|
756
|
+
152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
|
|
757
|
+
152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
|
|
758
|
+
155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
|
|
759
|
+
151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
|
|
760
|
+
153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
|
|
761
|
+
151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
|
|
762
|
+
155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
|
|
763
|
+
151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
|
|
764
|
+
153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
|
|
765
|
+
152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
|
|
766
|
+
152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
|
|
767
|
+
152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
|
|
768
|
+
151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
|
|
769
|
+
152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
|
|
770
|
+
151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
|
|
771
|
+
152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
|
|
772
|
+
151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
|
|
773
|
+
153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
|
|
774
|
+
151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
|
|
775
|
+
152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
|
|
776
|
+
153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
|
|
777
|
+
151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
|
|
778
|
+
151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
|
|
779
|
+
152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
|
|
780
|
+
151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
|
|
781
|
+
151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
|
|
782
|
+
152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
|
|
783
|
+
151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
|
|
784
|
+
152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
|
|
785
|
+
153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
|
|
786
|
+
152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
|
|
787
|
+
153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
|
|
788
|
+
152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
|
|
789
|
+
152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
|
|
790
|
+
151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
|
|
791
|
+
151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
|
|
792
|
+
151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
|
|
793
|
+
151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
|
|
794
|
+
151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
|
|
795
|
+
152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
|
|
796
|
+
152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
|
|
797
|
+
152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
|
|
798
|
+
153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
|
|
799
|
+
152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
|
|
800
|
+
152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
|
|
801
|
+
152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
|
|
802
|
+
152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
|
|
803
|
+
151670,});
|
|
695
804
|
#endif
|
|
805
|
+
}
|
|
696
806
|
|
|
697
807
|
// print the prompt token-by-token
|
|
698
808
|
|
|
@@ -951,8 +1061,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
951
1061
|
}
|
|
952
1062
|
#endif
|
|
953
1063
|
|
|
954
|
-
const std::string fname = "output.wav";
|
|
955
|
-
|
|
956
1064
|
const int n_sr = 24000; // sampling rate
|
|
957
1065
|
|
|
958
1066
|
// zero out first 0.25 seconds
|
|
@@ -963,11 +1071,15 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
963
1071
|
LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f);
|
|
964
1072
|
LOG_INF("%s: total time: %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f);
|
|
965
1073
|
|
|
966
|
-
|
|
1074
|
+
int retval = 0;
|
|
967
1075
|
|
|
968
|
-
|
|
1076
|
+
if (save_wav16(params.out_file, audio, n_sr)) {
|
|
1077
|
+
LOG_INF("%s: audio written to file '%s'\n", __func__, params.out_file.c_str());
|
|
1078
|
+
} else {
|
|
1079
|
+
retval = ENOENT;
|
|
1080
|
+
}
|
|
969
1081
|
|
|
970
1082
|
llama_backend_free();
|
|
971
1083
|
|
|
972
|
-
return
|
|
1084
|
+
return retval;
|
|
973
1085
|
}
|
|
@@ -102,9 +102,11 @@ endif()
|
|
|
102
102
|
|
|
103
103
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
|
104
104
|
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
105
|
+
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
|
|
105
106
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
|
106
107
|
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
|
107
108
|
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
|
109
|
+
option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
|
|
108
110
|
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
|
|
109
111
|
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
|
110
112
|
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
|
@@ -121,6 +123,7 @@ endif()
|
|
|
121
123
|
option(GGML_LASX "ggml: enable lasx" ON)
|
|
122
124
|
option(GGML_LSX "ggml: enable lsx" ON)
|
|
123
125
|
option(GGML_RVV "ggml: enable rvv" ON)
|
|
126
|
+
option(GGML_VXE "ggml: enable vxe" ON)
|
|
124
127
|
|
|
125
128
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
126
129
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
@@ -150,12 +153,17 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
|
|
150
153
|
"ggml: max. batch size for using peer access")
|
|
151
154
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
|
152
155
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
|
156
|
+
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
|
|
153
157
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
|
154
158
|
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
159
|
+
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
|
|
160
|
+
"ggml: cuda link binary compression mode; requires cuda 12.8+")
|
|
161
|
+
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
|
|
155
162
|
|
|
156
163
|
option(GGML_HIP "ggml: use HIP" OFF)
|
|
157
164
|
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
|
158
165
|
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
|
166
|
+
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
|
159
167
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
|
160
168
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
161
169
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
@@ -178,6 +186,7 @@ option(GGML_OPENMP "ggml: use OpenMP"
|
|
|
178
186
|
option(GGML_RPC "ggml: use RPC" OFF)
|
|
179
187
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
|
180
188
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
|
189
|
+
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
|
|
181
190
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
|
182
191
|
"ggml: sycl target device")
|
|
183
192
|
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
@@ -187,6 +196,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
|
|
|
187
196
|
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
|
|
188
197
|
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
|
189
198
|
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
|
199
|
+
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
|
|
200
|
+
"gmml: OpenCL API version to target")
|
|
190
201
|
|
|
191
202
|
# toolchain for vulkan-shaders-gen
|
|
192
203
|
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
|
|
@@ -209,6 +220,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|
|
209
220
|
|
|
210
221
|
find_package(Threads REQUIRED)
|
|
211
222
|
|
|
223
|
+
include(GNUInstallDirs)
|
|
224
|
+
|
|
212
225
|
#
|
|
213
226
|
# build the library
|
|
214
227
|
#
|
|
@@ -232,7 +245,6 @@ endif ()
|
|
|
232
245
|
# install
|
|
233
246
|
#
|
|
234
247
|
|
|
235
|
-
include(GNUInstallDirs)
|
|
236
248
|
include(CMakePackageConfigHelpers)
|
|
237
249
|
|
|
238
250
|
# all public headers
|
|
@@ -243,6 +255,7 @@ set(GGML_PUBLIC_HEADERS
|
|
|
243
255
|
include/ggml-backend.h
|
|
244
256
|
include/ggml-blas.h
|
|
245
257
|
include/ggml-cann.h
|
|
258
|
+
include/ggml-cpp.h
|
|
246
259
|
include/ggml-cuda.h
|
|
247
260
|
include/ggml-kompute.h
|
|
248
261
|
include/ggml-opt.h
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
function(ggml_get_flags CCID CCVER)
|
|
2
|
+
set(C_FLAGS "")
|
|
3
|
+
set(CXX_FLAGS "")
|
|
4
|
+
|
|
5
|
+
if (CCID MATCHES "Clang")
|
|
6
|
+
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
|
|
7
|
+
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
|
|
8
|
+
|
|
9
|
+
if (
|
|
10
|
+
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
|
11
|
+
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
|
12
|
+
)
|
|
13
|
+
list(APPEND C_FLAGS -Wdouble-promotion)
|
|
14
|
+
endif()
|
|
15
|
+
elseif (CCID STREQUAL "GNU")
|
|
16
|
+
set(C_FLAGS -Wdouble-promotion)
|
|
17
|
+
set(CXX_FLAGS -Wno-array-bounds)
|
|
18
|
+
|
|
19
|
+
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
|
20
|
+
list(APPEND CXX_FLAGS -Wextra-semi)
|
|
21
|
+
endif()
|
|
22
|
+
endif()
|
|
23
|
+
|
|
24
|
+
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
|
25
|
+
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
|
26
|
+
endfunction()
|
|
@@ -19,7 +19,7 @@ struct ggml_tallocr {
|
|
|
19
19
|
};
|
|
20
20
|
|
|
21
21
|
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
|
22
|
-
GGML_API
|
|
22
|
+
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
|
23
23
|
|
|
24
24
|
// Graph allocator
|
|
25
25
|
/*
|