@fugood/llama.node 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +2 -1
  18. package/package.json +1 -1
  19. package/src/LlamaCompletionWorker.cpp +14 -0
  20. package/src/LlamaContext.cpp +110 -79
  21. package/src/LlamaContext.h +1 -1
  22. package/src/common.hpp +1 -2
  23. package/src/llama.cpp/.github/workflows/build.yml +95 -13
  24. package/src/llama.cpp/.github/workflows/docker.yml +2 -0
  25. package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  27. package/src/llama.cpp/common/CMakeLists.txt +23 -6
  28. package/src/llama.cpp/common/arg.cpp +292 -14
  29. package/src/llama.cpp/common/chat.cpp +1128 -315
  30. package/src/llama.cpp/common/chat.h +135 -0
  31. package/src/llama.cpp/common/common.cpp +27 -171
  32. package/src/llama.cpp/common/common.h +41 -73
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  34. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  35. package/src/llama.cpp/common/llguidance.cpp +3 -3
  36. package/src/llama.cpp/common/log.cpp +1 -0
  37. package/src/llama.cpp/common/log.h +2 -1
  38. package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
  39. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
  40. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  41. package/src/llama.cpp/common/sampling.cpp +93 -49
  42. package/src/llama.cpp/common/speculative.cpp +6 -5
  43. package/src/llama.cpp/common/speculative.h +1 -1
  44. package/src/llama.cpp/docs/build.md +47 -9
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
  47. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  48. package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
  49. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
  50. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
  52. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  54. package/src/llama.cpp/examples/llava/clip.h +19 -3
  55. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  56. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  57. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  58. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
  59. package/src/llama.cpp/examples/main/main.cpp +73 -28
  60. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
  61. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
  62. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
  63. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  64. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  65. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  66. package/src/llama.cpp/examples/run/run.cpp +115 -79
  67. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/server/httplib.h +381 -292
  69. package/src/llama.cpp/examples/server/server.cpp +134 -128
  70. package/src/llama.cpp/examples/server/utils.hpp +95 -106
  71. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  72. package/src/llama.cpp/examples/tts/tts.cpp +251 -142
  73. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  74. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  75. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  76. package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
  77. package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
  78. package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
  79. package/src/llama.cpp/ggml/include/ggml.h +6 -2
  80. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  81. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  82. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  83. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  84. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  85. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
  86. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  87. package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
  88. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
  89. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  90. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
  93. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
  94. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
  96. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  97. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
  101. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
  102. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  103. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  104. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  105. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
  106. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
  107. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
  109. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  110. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  111. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
  112. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  113. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  114. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
  115. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  116. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  117. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  120. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
  121. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  123. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
  124. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
  125. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
  126. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  128. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
  129. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
  130. package/src/llama.cpp/ggml/src/ggml.c +9 -4
  131. package/src/llama.cpp/include/llama.h +32 -14
  132. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  133. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  134. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  135. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  136. package/src/llama.cpp/requirements.txt +1 -0
  137. package/src/llama.cpp/src/llama-arch.cpp +21 -0
  138. package/src/llama.cpp/src/llama-arch.h +1 -0
  139. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  140. package/src/llama.cpp/src/llama-grammar.cpp +183 -183
  141. package/src/llama.cpp/src/llama-grammar.h +13 -4
  142. package/src/llama.cpp/src/llama-impl.h +6 -6
  143. package/src/llama.cpp/src/llama-kv-cache.h +2 -1
  144. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  145. package/src/llama.cpp/src/llama-mmap.h +1 -0
  146. package/src/llama.cpp/src/llama-model.cpp +70 -6
  147. package/src/llama.cpp/src/llama-sampling.cpp +174 -67
  148. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  149. package/src/llama.cpp/src/llama.cpp +154 -5
  150. package/src/llama.cpp/src/unicode.cpp +9 -2
  151. package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
  152. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  153. package/src/llama.cpp/tests/test-chat.cpp +691 -325
  154. package/src/llama.cpp/tests/test-gguf.cpp +4 -4
  155. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  156. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  157. package/src/llama.cpp/tests/test-sampling.cpp +15 -0
  158. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  159. package/src/llama.cpp/common/chat.hpp +0 -52
@@ -1,10 +1,11 @@
1
+ #define _USE_MATH_DEFINES // For M_PI on MSVC
2
+
1
3
  #include "arg.h"
2
4
  #include "common.h"
3
5
  #include "sampling.h"
4
6
  #include "log.h"
5
7
  #include "llama.h"
6
-
7
- #define _USE_MATH_DEFINES // For M_PI on MSVC
8
+ #include "json.hpp"
8
9
 
9
10
  #include <algorithm>
10
11
  #include <cmath>
@@ -16,6 +17,13 @@
16
17
  #include <thread>
17
18
  #include <vector>
18
19
 
20
+ using json = nlohmann::ordered_json;
21
+
22
+ enum outetts_version {
23
+ OUTETTS_V0_2,
24
+ OUTETTS_V0_3,
25
+ };
26
+
19
27
  //
20
28
  // Terminal utils
21
29
  //
@@ -371,7 +379,7 @@ static std::string replace_numbers_with_words(const std::string & input_text) {
371
379
  }
372
380
 
373
381
  // Based on: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/version/v1/prompt_processor.py#L39
374
- static std::string process_text(const std::string & text) {
382
+ static std::string process_text(const std::string & text, const outetts_version tts_version = OUTETTS_V0_2) {
375
383
 
376
384
  // For now I skipped text romanization as I am unsure how to handle
377
385
  // uroman and MeCab implementations in C++
@@ -401,7 +409,8 @@ static std::string process_text(const std::string & text) {
401
409
  if (c == ' ') {
402
410
  prompt_clean += "<|text_sep|>";
403
411
  */
404
- processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), "<|text_sep|>");
412
+ std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
413
+ processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), separator);
405
414
 
406
415
  return processed_text;
407
416
  }
@@ -425,8 +434,8 @@ static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) {
425
434
  prompt_add(prompt, vocab, "<|im_start|>\n", true, true);
426
435
  }
427
436
 
428
- static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) {
429
- const std::string& delimiter = "<|text_sep|>";
437
+ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str, const outetts_version tts_version = OUTETTS_V0_2) {
438
+ const std::string& delimiter = (tts_version == OUTETTS_V0_3 ? "<|space|>" : "<|text_sep|>");
430
439
 
431
440
  std::vector<llama_token> result;
432
441
  size_t start = 0;
@@ -452,6 +461,78 @@ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab,
452
461
  return result;
453
462
  }
454
463
 
464
+ static json speaker_from_file(const std::string & speaker_file) {
465
+ std::ifstream file(speaker_file);
466
+ if (!file) {
467
+ LOG_ERR("%s: Failed to open file '%s' for reading\n", __func__, speaker_file.c_str());
468
+ return json();
469
+ }
470
+
471
+ json speaker = json::parse(file);
472
+ return speaker;
473
+ }
474
+
475
+ static outetts_version get_tts_version(llama_model *model, json speaker = json::object()) {
476
+ if (speaker.contains("version")) {
477
+ std::string version = speaker["version"].get<std::string>();
478
+ if (version == "0.2") {
479
+ return OUTETTS_V0_2;
480
+ } else if (version == "0.3") {
481
+ return OUTETTS_V0_3;
482
+ } else {
483
+ LOG_ERR("%s: Unsupported speaker version '%s'\n", __func__, version.c_str());
484
+ }
485
+ }
486
+
487
+ // Also could get version from model itself
488
+ const char *chat_template = llama_model_chat_template(model, nullptr);
489
+ if (chat_template && std::string(chat_template) == "outetts-0.3") {
490
+ return OUTETTS_V0_3;
491
+ }
492
+
493
+ // Use 0.2 as the default version
494
+ return OUTETTS_V0_2;
495
+ }
496
+
497
+ static std::string audio_text_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
498
+ std::string audio_text = "<|text_start|>";
499
+
500
+ if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
501
+ std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
502
+ for (const auto &word : speaker["words"]) {
503
+ audio_text += word["word"].get<std::string>() + separator;
504
+ }
505
+ }
506
+
507
+ return audio_text;
508
+ }
509
+
510
+ static std::string audio_data_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
511
+ std::string audio_data = "<|audio_start|>\n";
512
+
513
+ if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
514
+ std::string code_start = (tts_version == OUTETTS_V0_3) ? "" : "<|code_start|>";
515
+ std::string code_end = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|code_end|>";
516
+ for (const auto &word : speaker["words"]) {
517
+ std::string word_text = word["word"].get<std::string>();
518
+ double duration = word["duration"].get<double>();
519
+ std::vector<int> codes = word["codes"].get<std::vector<int>>();
520
+
521
+ // Create the audio output entry
522
+ std::ostringstream word_entry;
523
+ word_entry << word_text << "<|t_" << std::fixed << std::setprecision(2)
524
+ << duration << "|>" + code_start;
525
+ for (const auto &Code : codes) {
526
+ word_entry << "<|" << Code << "|>";
527
+ }
528
+ word_entry << code_end << "\n";
529
+ audio_data += word_entry.str();
530
+ }
531
+ }
532
+
533
+ return audio_data;
534
+ }
535
+
455
536
  int main(int argc, char ** argv) {
456
537
  common_params params;
457
538
 
@@ -523,34 +604,9 @@ int main(int argc, char ** argv) {
523
604
  std::vector<llama_token> codes;
524
605
  std::vector<llama_token> guide_tokens;
525
606
 
526
- // process prompt and generate voice codes
527
- {
528
- LOG_INF("%s: constructing prompt ..\n", __func__);
529
-
530
- std::vector<llama_token> prompt_inp;
531
-
532
- prompt_init(prompt_inp, vocab);
533
-
534
- prompt_add(prompt_inp, vocab, "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>", false, true);
535
-
536
- // convert the input text into the necessary format expected by OuteTTS
537
- {
538
- std::string prompt_clean = process_text(params.prompt);
539
- if (params.vocoder.use_guide_tokens) {
540
- guide_tokens = prepare_guide_tokens(vocab, prompt_clean);
541
- }
542
-
543
- LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
544
-
545
- prompt_add(prompt_inp, vocab, prompt_clean, false, true);
546
- }
547
-
548
- prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
549
-
550
- // disabled to save time on tokenizing each time
551
- // TODO: load voices from the json files
552
- #if 0
553
- const std::string voice_data = R"(<|audio_start|>
607
+ // the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
608
+ std::string audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
609
+ std::string audio_data = R"(<|audio_start|>
554
610
  the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
555
611
  overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
556
612
  package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
@@ -582,117 +638,170 @@ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><
582
638
  looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
583
639
  lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
584
640
 
585
- auto tmp = common_tokenize(vocab, voice_data, false, true);
586
- printf("\n\n");
587
- for (int i = 0; i < tmp.size(); ++i) {
588
- printf("%d, ", tmp[i]);
641
+ // audio data for 0.3 version
642
+ outetts_version tts_version = get_tts_version(model_ttc);
643
+ if (tts_version == OUTETTS_V0_3) {
644
+ audio_text = std::regex_replace(audio_text, std::regex(R"(<\|text_sep\|>)"), "<|space|>");
645
+ audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_start\|>)"), "");
646
+ audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_end\|>)"), "<|space|>");
647
+ }
648
+
649
+ // load speaker if given
650
+ if (!params.vocoder.speaker_file.empty()) {
651
+ LOG_INF("%s: loading speaker ..\n", __func__);
652
+ json speaker = speaker_from_file(params.vocoder.speaker_file);
653
+ if (speaker.empty()) {
654
+ LOG_ERR("%s: Failed to load speaker file '%s'\n", __func__, params.vocoder.speaker_file.c_str());
655
+ return 1;
656
+ }
657
+ audio_text = audio_text_from_speaker(speaker, tts_version);
658
+ audio_data = audio_data_from_speaker(speaker, tts_version);
659
+ }
660
+
661
+ // process prompt and generate voice codes
662
+ {
663
+ LOG_INF("%s: constructing prompt ..\n", __func__);
664
+
665
+ std::vector<llama_token> prompt_inp;
666
+
667
+ prompt_init(prompt_inp, vocab);
668
+
669
+ prompt_add(prompt_inp, vocab, audio_text, false, true);
670
+
671
+ // convert the input text into the necessary format expected by OuteTTS
672
+ {
673
+ std::string prompt_clean = process_text(params.prompt, tts_version);
674
+ if (params.vocoder.use_guide_tokens) {
675
+ guide_tokens = prepare_guide_tokens(vocab, prompt_clean, tts_version);
676
+ }
677
+
678
+ LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
679
+
680
+ prompt_add(prompt_inp, vocab, prompt_clean, false, true);
589
681
  }
590
- printf("\n\n");
682
+
683
+ prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
684
+
685
+ if (!params.vocoder.speaker_file.empty()) {
686
+ prompt_add(prompt_inp, vocab, audio_data, false, true);
687
+ } else {
688
+ // disabled to save time on tokenizing each time
689
+ #if 1
690
+ const std::string voice_data = audio_data;
691
+
692
+ auto tmp = common_tokenize(vocab, voice_data, false, true);
693
+ printf("\n\n");
694
+ for (size_t i = 0; i < tmp.size(); ++i) {
695
+ printf("%d, ", tmp[i]);
696
+ }
697
+ printf("\n\n");
698
+ prompt_add(prompt_inp, tmp);
591
699
  #else
592
- prompt_add(prompt_inp, llama_tokens {
593
- 151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
594
- 152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
595
- 151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
596
- 151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
597
- 153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
598
- 153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
599
- 152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
600
- 152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
601
- 152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
602
- 153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
603
- 153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
604
- 152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
605
- 153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
606
- 153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
607
- 151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
608
- 152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
609
- 152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
610
- 151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
611
- 152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
612
- 152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
613
- 152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
614
- 152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
615
- 152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
616
- 153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
617
- 155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
618
- 152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
619
- 32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
620
- 152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
621
- 153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
622
- 152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
623
- 151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
624
- 153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
625
- 152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
626
- 152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
627
- 152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
628
- 153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
629
- 152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
630
- 152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
631
- 153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
632
- 152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
633
- 152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
634
- 152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
635
- 155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
636
- 152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
637
- 14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
638
- 153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
639
- 198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
640
- 152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
641
- 151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
642
- 153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
643
- 152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
644
- 152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
645
- 152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
646
- 153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
647
- 152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
648
- 152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
649
- 155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
650
- 151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
651
- 153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
652
- 151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
653
- 155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
654
- 151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
655
- 153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
656
- 152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
657
- 152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
658
- 152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
659
- 151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
660
- 152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
661
- 151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
662
- 152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
663
- 151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
664
- 153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
665
- 151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
666
- 152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
667
- 153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
668
- 151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
669
- 151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
670
- 152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
671
- 151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
672
- 151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
673
- 152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
674
- 151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
675
- 152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
676
- 153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
677
- 152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
678
- 153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
679
- 152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
680
- 152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
681
- 151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
682
- 151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
683
- 151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
684
- 151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
685
- 151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
686
- 152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
687
- 152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
688
- 152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
689
- 153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
690
- 152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
691
- 152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
692
- 152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
693
- 152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
694
- 151670,});
700
+ prompt_add(prompt_inp, llama_tokens {
701
+ 151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
702
+ 152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
703
+ 151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
704
+ 151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
705
+ 153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
706
+ 153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
707
+ 152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
708
+ 152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
709
+ 152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
710
+ 153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
711
+ 153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
712
+ 152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
713
+ 153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
714
+ 153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
715
+ 151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
716
+ 152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
717
+ 152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
718
+ 151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
719
+ 152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
720
+ 152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
721
+ 152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
722
+ 152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
723
+ 152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
724
+ 153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
725
+ 155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
726
+ 152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
727
+ 32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
728
+ 152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
729
+ 153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
730
+ 152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
731
+ 151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
732
+ 153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
733
+ 152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
734
+ 152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
735
+ 152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
736
+ 153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
737
+ 152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
738
+ 152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
739
+ 153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
740
+ 152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
741
+ 152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
742
+ 152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
743
+ 155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
744
+ 152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
745
+ 14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
746
+ 153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
747
+ 198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
748
+ 152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
749
+ 151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
750
+ 153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
751
+ 152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
752
+ 152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
753
+ 152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
754
+ 153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
755
+ 152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
756
+ 152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
757
+ 155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
758
+ 151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
759
+ 153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
760
+ 151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
761
+ 155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
762
+ 151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
763
+ 153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
764
+ 152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
765
+ 152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
766
+ 152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
767
+ 151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
768
+ 152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
769
+ 151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
770
+ 152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
771
+ 151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
772
+ 153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
773
+ 151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
774
+ 152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
775
+ 153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
776
+ 151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
777
+ 151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
778
+ 152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
779
+ 151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
780
+ 151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
781
+ 152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
782
+ 151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
783
+ 152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
784
+ 153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
785
+ 152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
786
+ 153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
787
+ 152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
788
+ 152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
789
+ 151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
790
+ 151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
791
+ 151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
792
+ 151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
793
+ 151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
794
+ 152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
795
+ 152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
796
+ 152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
797
+ 153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
798
+ 152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
799
+ 152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
800
+ 152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
801
+ 152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
802
+ 151670,});
695
803
  #endif
804
+ }
696
805
 
697
806
  // print the prompt token-by-token
698
807
 
@@ -102,9 +102,11 @@ endif()
102
102
 
103
103
  option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
104
104
  option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
105
+ option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
105
106
  option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
106
107
  option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
107
108
  option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
109
+ option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
108
110
  option(GGML_AVX512 "ggml: enable AVX512F" OFF)
109
111
  option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
110
112
  option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
@@ -121,6 +123,7 @@ endif()
121
123
  option(GGML_LASX "ggml: enable lasx" ON)
122
124
  option(GGML_LSX "ggml: enable lsx" ON)
123
125
  option(GGML_RVV "ggml: enable rvv" ON)
126
+ option(GGML_VXE "ggml: enable vxe" ON)
124
127
 
125
128
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
126
129
  set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
@@ -150,12 +153,17 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
150
153
  "ggml: max. batch size for using peer access")
151
154
  option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
152
155
  option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
156
+ option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
153
157
  option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
154
158
  option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
159
+ set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
160
+ "ggml: cuda link binary compression mode; requires cuda 12.8+")
161
+ set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
155
162
 
156
163
  option(GGML_HIP "ggml: use HIP" OFF)
157
164
  option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
158
165
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
166
+ option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
159
167
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
160
168
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
161
169
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
@@ -187,6 +195,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
187
195
  option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
188
196
  option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
189
197
  option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
198
+ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
199
+ "gmml: OpenCL API version to target")
190
200
 
191
201
  # toolchain for vulkan-shaders-gen
192
202
  set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -209,6 +219,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
209
219
 
210
220
  find_package(Threads REQUIRED)
211
221
 
222
+ include(GNUInstallDirs)
223
+
212
224
  #
213
225
  # build the library
214
226
  #
@@ -232,7 +244,6 @@ endif ()
232
244
  # install
233
245
  #
234
246
 
235
- include(GNUInstallDirs)
236
247
  include(CMakePackageConfigHelpers)
237
248
 
238
249
  # all public headers
@@ -243,6 +254,7 @@ set(GGML_PUBLIC_HEADERS
243
254
  include/ggml-backend.h
244
255
  include/ggml-blas.h
245
256
  include/ggml-cann.h
257
+ include/ggml-cpp.h
246
258
  include/ggml-cuda.h
247
259
  include/ggml-kompute.h
248
260
  include/ggml-opt.h
@@ -19,7 +19,7 @@ struct ggml_tallocr {
19
19
  };
20
20
 
21
21
  GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22
- GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
22
+ GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23
23
 
24
24
  // Graph allocator
25
25
  /*
@@ -56,7 +56,7 @@ extern "C" {
56
56
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
57
57
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
58
58
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
59
+ GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
60
60
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
61
61
  GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
62
62
  GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -342,8 +342,8 @@ extern "C" {
342
342
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343
343
 
344
344
  // Tensor initialization
345
- GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
- GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
345
+ GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
+ GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
347
347
 
348
348
  // CPU buffer types are always available
349
349
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -8,7 +8,7 @@ extern "C" {
8
8
  #endif
9
9
 
10
10
  // the compute plan that needs to be prepared for ggml_graph_compute()
11
- // since https://github.com/ggerganov/ggml/issues/287
11
+ // since https://github.com/ggml-org/ggml/issues/287
12
12
  struct ggml_cplan {
13
13
  size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14
14
  uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@@ -80,6 +80,7 @@ extern "C" {
80
80
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
81
  GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
82
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
+ GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
83
84
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
85
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
85
86
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
@@ -95,9 +96,11 @@ extern "C" {
95
96
  GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
96
97
  GGML_BACKEND_API int ggml_cpu_has_sve (void);
97
98
  GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
99
+ GGML_BACKEND_API int ggml_cpu_has_sme (void);
98
100
  // other
99
101
  GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
100
102
  GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103
+ GGML_BACKEND_API int ggml_cpu_has_vxe (void);
101
104
  GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
102
105
  GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
103
106
 
@@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
45
45
 
46
46
  GGML_DEPRECATED(
47
47
  GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
- "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
48
+ "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
49
49
 
50
50
  GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51
51
 
@@ -10,8 +10,6 @@ extern "C" {
10
10
  #define GGML_VK_NAME "Vulkan"
11
11
  #define GGML_VK_MAX_DEVICES 16
12
12
 
13
- GGML_BACKEND_API void ggml_vk_instance_init(void);
14
-
15
13
  // backend API
16
14
  GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
17
15
 
@@ -198,7 +198,7 @@
198
198
 
199
199
  #ifndef __GNUC__
200
200
  # define GGML_ATTRIBUTE_FORMAT(...)
201
- #elif defined(__MINGW32__)
201
+ #elif defined(__MINGW32__) && !defined(__clang__)
202
202
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
203
203
  #else
204
204
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@@ -2140,7 +2140,11 @@ extern "C" {
2140
2140
  # define GGML_RESTRICT
2141
2141
  # endif
2142
2142
  #else
2143
- # define GGML_RESTRICT restrict
2143
+ # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
2144
+ # define GGML_RESTRICT __restrict
2145
+ # else
2146
+ # define GGML_RESTRICT restrict
2147
+ # endif
2144
2148
  #endif
2145
2149
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2146
2150
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);