@fugood/llama.node 0.3.12 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -1
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +110 -79
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +95 -13
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +23 -6
- package/src/llama.cpp/common/arg.cpp +292 -14
- package/src/llama.cpp/common/chat.cpp +1128 -315
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +41 -73
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +93 -49
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +47 -9
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +115 -79
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +134 -128
- package/src/llama.cpp/examples/server/utils.hpp +95 -106
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +6 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
- package/src/llama.cpp/ggml/src/ggml.c +9 -4
- package/src/llama.cpp/include/llama.h +32 -14
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +183 -183
- package/src/llama.cpp/src/llama-grammar.h +13 -4
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +2 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +70 -6
- package/src/llama.cpp/src/llama-sampling.cpp +174 -67
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +154 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +691 -325
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -52
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
#define _USE_MATH_DEFINES // For M_PI on MSVC
|
|
2
|
+
|
|
1
3
|
#include "arg.h"
|
|
2
4
|
#include "common.h"
|
|
3
5
|
#include "sampling.h"
|
|
4
6
|
#include "log.h"
|
|
5
7
|
#include "llama.h"
|
|
6
|
-
|
|
7
|
-
#define _USE_MATH_DEFINES // For M_PI on MSVC
|
|
8
|
+
#include "json.hpp"
|
|
8
9
|
|
|
9
10
|
#include <algorithm>
|
|
10
11
|
#include <cmath>
|
|
@@ -16,6 +17,13 @@
|
|
|
16
17
|
#include <thread>
|
|
17
18
|
#include <vector>
|
|
18
19
|
|
|
20
|
+
using json = nlohmann::ordered_json;
|
|
21
|
+
|
|
22
|
+
enum outetts_version {
|
|
23
|
+
OUTETTS_V0_2,
|
|
24
|
+
OUTETTS_V0_3,
|
|
25
|
+
};
|
|
26
|
+
|
|
19
27
|
//
|
|
20
28
|
// Terminal utils
|
|
21
29
|
//
|
|
@@ -371,7 +379,7 @@ static std::string replace_numbers_with_words(const std::string & input_text) {
|
|
|
371
379
|
}
|
|
372
380
|
|
|
373
381
|
// Based on: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/version/v1/prompt_processor.py#L39
|
|
374
|
-
static std::string process_text(const std::string & text) {
|
|
382
|
+
static std::string process_text(const std::string & text, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
375
383
|
|
|
376
384
|
// For now I skipped text romanization as I am unsure how to handle
|
|
377
385
|
// uroman and MeCab implementations in C++
|
|
@@ -401,7 +409,8 @@ static std::string process_text(const std::string & text) {
|
|
|
401
409
|
if (c == ' ') {
|
|
402
410
|
prompt_clean += "<|text_sep|>";
|
|
403
411
|
*/
|
|
404
|
-
|
|
412
|
+
std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
|
|
413
|
+
processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), separator);
|
|
405
414
|
|
|
406
415
|
return processed_text;
|
|
407
416
|
}
|
|
@@ -425,8 +434,8 @@ static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) {
|
|
|
425
434
|
prompt_add(prompt, vocab, "<|im_start|>\n", true, true);
|
|
426
435
|
}
|
|
427
436
|
|
|
428
|
-
static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) {
|
|
429
|
-
const std::string& delimiter = "<|text_sep|>";
|
|
437
|
+
static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
438
|
+
const std::string& delimiter = (tts_version == OUTETTS_V0_3 ? "<|space|>" : "<|text_sep|>");
|
|
430
439
|
|
|
431
440
|
std::vector<llama_token> result;
|
|
432
441
|
size_t start = 0;
|
|
@@ -452,6 +461,78 @@ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab,
|
|
|
452
461
|
return result;
|
|
453
462
|
}
|
|
454
463
|
|
|
464
|
+
static json speaker_from_file(const std::string & speaker_file) {
|
|
465
|
+
std::ifstream file(speaker_file);
|
|
466
|
+
if (!file) {
|
|
467
|
+
LOG_ERR("%s: Failed to open file '%s' for reading\n", __func__, speaker_file.c_str());
|
|
468
|
+
return json();
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
json speaker = json::parse(file);
|
|
472
|
+
return speaker;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
static outetts_version get_tts_version(llama_model *model, json speaker = json::object()) {
|
|
476
|
+
if (speaker.contains("version")) {
|
|
477
|
+
std::string version = speaker["version"].get<std::string>();
|
|
478
|
+
if (version == "0.2") {
|
|
479
|
+
return OUTETTS_V0_2;
|
|
480
|
+
} else if (version == "0.3") {
|
|
481
|
+
return OUTETTS_V0_3;
|
|
482
|
+
} else {
|
|
483
|
+
LOG_ERR("%s: Unsupported speaker version '%s'\n", __func__, version.c_str());
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// Also could get version from model itself
|
|
488
|
+
const char *chat_template = llama_model_chat_template(model, nullptr);
|
|
489
|
+
if (chat_template && std::string(chat_template) == "outetts-0.3") {
|
|
490
|
+
return OUTETTS_V0_3;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Use 0.2 as the default version
|
|
494
|
+
return OUTETTS_V0_2;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
static std::string audio_text_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
498
|
+
std::string audio_text = "<|text_start|>";
|
|
499
|
+
|
|
500
|
+
if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
|
|
501
|
+
std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
|
|
502
|
+
for (const auto &word : speaker["words"]) {
|
|
503
|
+
audio_text += word["word"].get<std::string>() + separator;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
return audio_text;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
static std::string audio_data_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
|
|
511
|
+
std::string audio_data = "<|audio_start|>\n";
|
|
512
|
+
|
|
513
|
+
if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
|
|
514
|
+
std::string code_start = (tts_version == OUTETTS_V0_3) ? "" : "<|code_start|>";
|
|
515
|
+
std::string code_end = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|code_end|>";
|
|
516
|
+
for (const auto &word : speaker["words"]) {
|
|
517
|
+
std::string word_text = word["word"].get<std::string>();
|
|
518
|
+
double duration = word["duration"].get<double>();
|
|
519
|
+
std::vector<int> codes = word["codes"].get<std::vector<int>>();
|
|
520
|
+
|
|
521
|
+
// Create the audio output entry
|
|
522
|
+
std::ostringstream word_entry;
|
|
523
|
+
word_entry << word_text << "<|t_" << std::fixed << std::setprecision(2)
|
|
524
|
+
<< duration << "|>" + code_start;
|
|
525
|
+
for (const auto &Code : codes) {
|
|
526
|
+
word_entry << "<|" << Code << "|>";
|
|
527
|
+
}
|
|
528
|
+
word_entry << code_end << "\n";
|
|
529
|
+
audio_data += word_entry.str();
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return audio_data;
|
|
534
|
+
}
|
|
535
|
+
|
|
455
536
|
int main(int argc, char ** argv) {
|
|
456
537
|
common_params params;
|
|
457
538
|
|
|
@@ -523,34 +604,9 @@ int main(int argc, char ** argv) {
|
|
|
523
604
|
std::vector<llama_token> codes;
|
|
524
605
|
std::vector<llama_token> guide_tokens;
|
|
525
606
|
|
|
526
|
-
//
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
std::vector<llama_token> prompt_inp;
|
|
531
|
-
|
|
532
|
-
prompt_init(prompt_inp, vocab);
|
|
533
|
-
|
|
534
|
-
prompt_add(prompt_inp, vocab, "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>", false, true);
|
|
535
|
-
|
|
536
|
-
// convert the input text into the necessary format expected by OuteTTS
|
|
537
|
-
{
|
|
538
|
-
std::string prompt_clean = process_text(params.prompt);
|
|
539
|
-
if (params.vocoder.use_guide_tokens) {
|
|
540
|
-
guide_tokens = prepare_guide_tokens(vocab, prompt_clean);
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
|
|
544
|
-
|
|
545
|
-
prompt_add(prompt_inp, vocab, prompt_clean, false, true);
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
|
|
549
|
-
|
|
550
|
-
// disabled to save time on tokenizing each time
|
|
551
|
-
// TODO: load voices from the json files
|
|
552
|
-
#if 0
|
|
553
|
-
const std::string voice_data = R"(<|audio_start|>
|
|
607
|
+
// the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
|
|
608
|
+
std::string audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
|
|
609
|
+
std::string audio_data = R"(<|audio_start|>
|
|
554
610
|
the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
|
|
555
611
|
overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
|
|
556
612
|
package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
|
|
@@ -582,117 +638,170 @@ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><
|
|
|
582
638
|
looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
|
|
583
639
|
lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
|
|
584
640
|
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
641
|
+
// audio data for 0.3 version
|
|
642
|
+
outetts_version tts_version = get_tts_version(model_ttc);
|
|
643
|
+
if (tts_version == OUTETTS_V0_3) {
|
|
644
|
+
audio_text = std::regex_replace(audio_text, std::regex(R"(<\|text_sep\|>)"), "<|space|>");
|
|
645
|
+
audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_start\|>)"), "");
|
|
646
|
+
audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_end\|>)"), "<|space|>");
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// load speaker if given
|
|
650
|
+
if (!params.vocoder.speaker_file.empty()) {
|
|
651
|
+
LOG_INF("%s: loading speaker ..\n", __func__);
|
|
652
|
+
json speaker = speaker_from_file(params.vocoder.speaker_file);
|
|
653
|
+
if (speaker.empty()) {
|
|
654
|
+
LOG_ERR("%s: Failed to load speaker file '%s'\n", __func__, params.vocoder.speaker_file.c_str());
|
|
655
|
+
return 1;
|
|
656
|
+
}
|
|
657
|
+
audio_text = audio_text_from_speaker(speaker, tts_version);
|
|
658
|
+
audio_data = audio_data_from_speaker(speaker, tts_version);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// process prompt and generate voice codes
|
|
662
|
+
{
|
|
663
|
+
LOG_INF("%s: constructing prompt ..\n", __func__);
|
|
664
|
+
|
|
665
|
+
std::vector<llama_token> prompt_inp;
|
|
666
|
+
|
|
667
|
+
prompt_init(prompt_inp, vocab);
|
|
668
|
+
|
|
669
|
+
prompt_add(prompt_inp, vocab, audio_text, false, true);
|
|
670
|
+
|
|
671
|
+
// convert the input text into the necessary format expected by OuteTTS
|
|
672
|
+
{
|
|
673
|
+
std::string prompt_clean = process_text(params.prompt, tts_version);
|
|
674
|
+
if (params.vocoder.use_guide_tokens) {
|
|
675
|
+
guide_tokens = prepare_guide_tokens(vocab, prompt_clean, tts_version);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
|
|
679
|
+
|
|
680
|
+
prompt_add(prompt_inp, vocab, prompt_clean, false, true);
|
|
589
681
|
}
|
|
590
|
-
|
|
682
|
+
|
|
683
|
+
prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
|
|
684
|
+
|
|
685
|
+
if (!params.vocoder.speaker_file.empty()) {
|
|
686
|
+
prompt_add(prompt_inp, vocab, audio_data, false, true);
|
|
687
|
+
} else {
|
|
688
|
+
// disabled to save time on tokenizing each time
|
|
689
|
+
#if 1
|
|
690
|
+
const std::string voice_data = audio_data;
|
|
691
|
+
|
|
692
|
+
auto tmp = common_tokenize(vocab, voice_data, false, true);
|
|
693
|
+
printf("\n\n");
|
|
694
|
+
for (size_t i = 0; i < tmp.size(); ++i) {
|
|
695
|
+
printf("%d, ", tmp[i]);
|
|
696
|
+
}
|
|
697
|
+
printf("\n\n");
|
|
698
|
+
prompt_add(prompt_inp, tmp);
|
|
591
699
|
#else
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
700
|
+
prompt_add(prompt_inp, llama_tokens {
|
|
701
|
+
151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
|
|
702
|
+
152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
|
|
703
|
+
151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
|
|
704
|
+
151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
|
|
705
|
+
153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
|
|
706
|
+
153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
|
|
707
|
+
152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
|
|
708
|
+
152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
|
|
709
|
+
152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
|
|
710
|
+
153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
|
|
711
|
+
153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
|
|
712
|
+
152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
|
|
713
|
+
153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
|
|
714
|
+
153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
|
|
715
|
+
151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
|
|
716
|
+
152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
|
|
717
|
+
152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
|
|
718
|
+
151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
|
|
719
|
+
152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
|
|
720
|
+
152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
|
|
721
|
+
152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
|
|
722
|
+
152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
|
|
723
|
+
152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
|
|
724
|
+
153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
|
|
725
|
+
155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
|
|
726
|
+
152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
|
|
727
|
+
32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
|
|
728
|
+
152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
|
|
729
|
+
153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
|
|
730
|
+
152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
|
|
731
|
+
151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
|
|
732
|
+
153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
|
|
733
|
+
152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
|
|
734
|
+
152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
|
|
735
|
+
152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
|
|
736
|
+
153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
|
|
737
|
+
152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
|
|
738
|
+
152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
|
|
739
|
+
153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
|
|
740
|
+
152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
|
|
741
|
+
152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
|
|
742
|
+
152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
|
|
743
|
+
155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
|
|
744
|
+
152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
|
|
745
|
+
14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
|
|
746
|
+
153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
|
|
747
|
+
198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
|
|
748
|
+
152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
|
|
749
|
+
151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
|
|
750
|
+
153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
|
|
751
|
+
152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
|
|
752
|
+
152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
|
|
753
|
+
152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
|
|
754
|
+
153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
|
|
755
|
+
152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
|
|
756
|
+
152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
|
|
757
|
+
155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
|
|
758
|
+
151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
|
|
759
|
+
153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
|
|
760
|
+
151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
|
|
761
|
+
155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
|
|
762
|
+
151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
|
|
763
|
+
153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
|
|
764
|
+
152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
|
|
765
|
+
152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
|
|
766
|
+
152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
|
|
767
|
+
151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
|
|
768
|
+
152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
|
|
769
|
+
151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
|
|
770
|
+
152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
|
|
771
|
+
151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
|
|
772
|
+
153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
|
|
773
|
+
151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
|
|
774
|
+
152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
|
|
775
|
+
153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
|
|
776
|
+
151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
|
|
777
|
+
151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
|
|
778
|
+
152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
|
|
779
|
+
151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
|
|
780
|
+
151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
|
|
781
|
+
152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
|
|
782
|
+
151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
|
|
783
|
+
152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
|
|
784
|
+
153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
|
|
785
|
+
152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
|
|
786
|
+
153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
|
|
787
|
+
152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
|
|
788
|
+
152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
|
|
789
|
+
151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
|
|
790
|
+
151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
|
|
791
|
+
151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
|
|
792
|
+
151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
|
|
793
|
+
151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
|
|
794
|
+
152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
|
|
795
|
+
152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
|
|
796
|
+
152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
|
|
797
|
+
153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
|
|
798
|
+
152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
|
|
799
|
+
152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
|
|
800
|
+
152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
|
|
801
|
+
152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
|
|
802
|
+
151670,});
|
|
695
803
|
#endif
|
|
804
|
+
}
|
|
696
805
|
|
|
697
806
|
// print the prompt token-by-token
|
|
698
807
|
|
|
@@ -102,9 +102,11 @@ endif()
|
|
|
102
102
|
|
|
103
103
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
|
104
104
|
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
105
|
+
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
|
|
105
106
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
|
106
107
|
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
|
107
108
|
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
|
109
|
+
option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
|
|
108
110
|
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
|
|
109
111
|
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
|
110
112
|
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
|
@@ -121,6 +123,7 @@ endif()
|
|
|
121
123
|
option(GGML_LASX "ggml: enable lasx" ON)
|
|
122
124
|
option(GGML_LSX "ggml: enable lsx" ON)
|
|
123
125
|
option(GGML_RVV "ggml: enable rvv" ON)
|
|
126
|
+
option(GGML_VXE "ggml: enable vxe" ON)
|
|
124
127
|
|
|
125
128
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
126
129
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
@@ -150,12 +153,17 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
|
|
150
153
|
"ggml: max. batch size for using peer access")
|
|
151
154
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
|
152
155
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
|
156
|
+
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
|
|
153
157
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
|
154
158
|
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
159
|
+
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
|
|
160
|
+
"ggml: cuda link binary compression mode; requires cuda 12.8+")
|
|
161
|
+
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
|
|
155
162
|
|
|
156
163
|
option(GGML_HIP "ggml: use HIP" OFF)
|
|
157
164
|
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
|
158
165
|
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
|
166
|
+
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
|
159
167
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
|
160
168
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
161
169
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
@@ -187,6 +195,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
|
|
|
187
195
|
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
|
|
188
196
|
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
|
189
197
|
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
|
198
|
+
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
|
|
199
|
+
"gmml: OpenCL API version to target")
|
|
190
200
|
|
|
191
201
|
# toolchain for vulkan-shaders-gen
|
|
192
202
|
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
|
|
@@ -209,6 +219,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|
|
209
219
|
|
|
210
220
|
find_package(Threads REQUIRED)
|
|
211
221
|
|
|
222
|
+
include(GNUInstallDirs)
|
|
223
|
+
|
|
212
224
|
#
|
|
213
225
|
# build the library
|
|
214
226
|
#
|
|
@@ -232,7 +244,6 @@ endif ()
|
|
|
232
244
|
# install
|
|
233
245
|
#
|
|
234
246
|
|
|
235
|
-
include(GNUInstallDirs)
|
|
236
247
|
include(CMakePackageConfigHelpers)
|
|
237
248
|
|
|
238
249
|
# all public headers
|
|
@@ -243,6 +254,7 @@ set(GGML_PUBLIC_HEADERS
|
|
|
243
254
|
include/ggml-backend.h
|
|
244
255
|
include/ggml-blas.h
|
|
245
256
|
include/ggml-cann.h
|
|
257
|
+
include/ggml-cpp.h
|
|
246
258
|
include/ggml-cuda.h
|
|
247
259
|
include/ggml-kompute.h
|
|
248
260
|
include/ggml-opt.h
|
|
@@ -19,7 +19,7 @@ struct ggml_tallocr {
|
|
|
19
19
|
};
|
|
20
20
|
|
|
21
21
|
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
|
22
|
-
GGML_API
|
|
22
|
+
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
|
23
23
|
|
|
24
24
|
// Graph allocator
|
|
25
25
|
/*
|
|
@@ -56,7 +56,7 @@ extern "C" {
|
|
|
56
56
|
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
|
57
57
|
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
|
58
58
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
|
59
|
-
GGML_API
|
|
59
|
+
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
60
60
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
|
61
61
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
|
62
62
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
@@ -342,8 +342,8 @@ extern "C" {
|
|
|
342
342
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
|
-
GGML_API
|
|
346
|
-
GGML_API
|
|
345
|
+
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
346
|
+
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
|
|
347
347
|
|
|
348
348
|
// CPU buffer types are always available
|
|
349
349
|
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
|
@@ -8,7 +8,7 @@ extern "C" {
|
|
|
8
8
|
#endif
|
|
9
9
|
|
|
10
10
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
|
11
|
-
// since https://github.com/
|
|
11
|
+
// since https://github.com/ggml-org/ggml/issues/287
|
|
12
12
|
struct ggml_cplan {
|
|
13
13
|
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
|
14
14
|
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
|
@@ -80,6 +80,7 @@ extern "C" {
|
|
|
80
80
|
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
|
81
81
|
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
|
82
82
|
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
|
83
|
+
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
|
|
83
84
|
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
|
84
85
|
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
|
85
86
|
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
|
@@ -95,9 +96,11 @@ extern "C" {
|
|
|
95
96
|
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
|
96
97
|
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
|
97
98
|
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
|
99
|
+
GGML_BACKEND_API int ggml_cpu_has_sme (void);
|
|
98
100
|
// other
|
|
99
101
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
|
100
102
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
|
103
|
+
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
|
101
104
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
|
102
105
|
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
|
103
106
|
|
|
@@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
|
|
45
45
|
|
|
46
46
|
GGML_DEPRECATED(
|
|
47
47
|
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
|
48
|
-
"obsoleted by the new device interface - https://github.com/
|
|
48
|
+
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
|
|
49
49
|
|
|
50
50
|
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
|
51
51
|
|
|
@@ -198,7 +198,7 @@
|
|
|
198
198
|
|
|
199
199
|
#ifndef __GNUC__
|
|
200
200
|
# define GGML_ATTRIBUTE_FORMAT(...)
|
|
201
|
-
#elif defined(__MINGW32__)
|
|
201
|
+
#elif defined(__MINGW32__) && !defined(__clang__)
|
|
202
202
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
203
203
|
#else
|
|
204
204
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
@@ -2140,7 +2140,11 @@ extern "C" {
|
|
|
2140
2140
|
# define GGML_RESTRICT
|
|
2141
2141
|
# endif
|
|
2142
2142
|
#else
|
|
2143
|
-
#
|
|
2143
|
+
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
|
|
2144
|
+
# define GGML_RESTRICT __restrict
|
|
2145
|
+
# else
|
|
2146
|
+
# define GGML_RESTRICT restrict
|
|
2147
|
+
# endif
|
|
2144
2148
|
#endif
|
|
2145
2149
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
|
2146
2150
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|