@fugood/llama.node 0.3.13 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -1
  18. package/package.json +1 -1
  19. package/src/LlamaContext.cpp +98 -76
  20. package/src/LlamaContext.h +1 -1
  21. package/src/common.hpp +1 -2
  22. package/src/llama.cpp/.github/workflows/build.yml +60 -10
  23. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  24. package/src/llama.cpp/common/CMakeLists.txt +3 -3
  25. package/src/llama.cpp/common/arg.cpp +112 -11
  26. package/src/llama.cpp/common/chat.cpp +960 -266
  27. package/src/llama.cpp/common/chat.h +135 -0
  28. package/src/llama.cpp/common/common.cpp +27 -171
  29. package/src/llama.cpp/common/common.h +27 -67
  30. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  31. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  32. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
  33. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  34. package/src/llama.cpp/common/sampling.cpp +45 -7
  35. package/src/llama.cpp/common/speculative.cpp +6 -5
  36. package/src/llama.cpp/common/speculative.h +1 -1
  37. package/src/llama.cpp/docs/build.md +45 -7
  38. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
  39. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
  40. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  41. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
  42. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
  43. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  44. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  45. package/src/llama.cpp/examples/llava/clip.h +19 -3
  46. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  47. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  48. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  49. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
  50. package/src/llama.cpp/examples/main/main.cpp +73 -28
  51. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
  52. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
  53. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  54. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  55. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  56. package/src/llama.cpp/examples/run/run.cpp +110 -67
  57. package/src/llama.cpp/examples/server/server.cpp +82 -87
  58. package/src/llama.cpp/examples/server/utils.hpp +94 -107
  59. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  60. package/src/llama.cpp/examples/tts/tts.cpp +251 -142
  61. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  62. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  63. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  64. package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
  65. package/src/llama.cpp/ggml/include/ggml.h +5 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  68. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  69. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  70. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  71. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
  72. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  73. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
  74. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  75. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  76. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  77. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
  78. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
  79. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
  80. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
  81. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  82. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  83. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  84. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  85. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
  86. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  87. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  89. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
  90. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
  91. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  92. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
  93. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  94. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  95. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
  96. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  97. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  98. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
  99. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  100. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  101. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  102. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  103. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  104. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
  105. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
  106. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  107. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
  108. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
  109. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  111. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  112. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
  113. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  114. package/src/llama.cpp/ggml/src/ggml.c +8 -3
  115. package/src/llama.cpp/include/llama.h +19 -5
  116. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  117. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  118. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  119. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  120. package/src/llama.cpp/requirements.txt +1 -0
  121. package/src/llama.cpp/src/llama-arch.cpp +21 -0
  122. package/src/llama.cpp/src/llama-arch.h +1 -0
  123. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  124. package/src/llama.cpp/src/llama-grammar.cpp +182 -182
  125. package/src/llama.cpp/src/llama-grammar.h +12 -3
  126. package/src/llama.cpp/src/llama-kv-cache.h +1 -0
  127. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  128. package/src/llama.cpp/src/llama-model.cpp +69 -5
  129. package/src/llama.cpp/src/llama-sampling.cpp +43 -10
  130. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  131. package/src/llama.cpp/src/llama.cpp +147 -0
  132. package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
  133. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  134. package/src/llama.cpp/tests/test-chat.cpp +593 -395
  135. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  136. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  137. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  138. package/src/llama.cpp/common/chat.hpp +0 -55
  139. /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
@@ -1,10 +1,11 @@
1
+ #define _USE_MATH_DEFINES // For M_PI on MSVC
2
+
1
3
  #include "arg.h"
2
4
  #include "common.h"
3
5
  #include "sampling.h"
4
6
  #include "log.h"
5
7
  #include "llama.h"
6
-
7
- #define _USE_MATH_DEFINES // For M_PI on MSVC
8
+ #include "json.hpp"
8
9
 
9
10
  #include <algorithm>
10
11
  #include <cmath>
@@ -16,6 +17,13 @@
16
17
  #include <thread>
17
18
  #include <vector>
18
19
 
20
+ using json = nlohmann::ordered_json;
21
+
22
+ enum outetts_version {
23
+ OUTETTS_V0_2,
24
+ OUTETTS_V0_3,
25
+ };
26
+
19
27
  //
20
28
  // Terminal utils
21
29
  //
@@ -371,7 +379,7 @@ static std::string replace_numbers_with_words(const std::string & input_text) {
371
379
  }
372
380
 
373
381
  // Based on: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/version/v1/prompt_processor.py#L39
374
- static std::string process_text(const std::string & text) {
382
+ static std::string process_text(const std::string & text, const outetts_version tts_version = OUTETTS_V0_2) {
375
383
 
376
384
  // For now I skipped text romanization as I am unsure how to handle
377
385
  // uroman and MeCab implementations in C++
@@ -401,7 +409,8 @@ static std::string process_text(const std::string & text) {
401
409
  if (c == ' ') {
402
410
  prompt_clean += "<|text_sep|>";
403
411
  */
404
- processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), "<|text_sep|>");
412
+ std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
413
+ processed_text = std::regex_replace(processed_text, std::regex(R"(\s)"), separator);
405
414
 
406
415
  return processed_text;
407
416
  }
@@ -425,8 +434,8 @@ static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) {
425
434
  prompt_add(prompt, vocab, "<|im_start|>\n", true, true);
426
435
  }
427
436
 
428
- static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) {
429
- const std::string& delimiter = "<|text_sep|>";
437
+ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab, const std::string & str, const outetts_version tts_version = OUTETTS_V0_2) {
438
+ const std::string& delimiter = (tts_version == OUTETTS_V0_3 ? "<|space|>" : "<|text_sep|>");
430
439
 
431
440
  std::vector<llama_token> result;
432
441
  size_t start = 0;
@@ -452,6 +461,78 @@ static std::vector<llama_token> prepare_guide_tokens(const llama_vocab * vocab,
452
461
  return result;
453
462
  }
454
463
 
464
+ static json speaker_from_file(const std::string & speaker_file) {
465
+ std::ifstream file(speaker_file);
466
+ if (!file) {
467
+ LOG_ERR("%s: Failed to open file '%s' for reading\n", __func__, speaker_file.c_str());
468
+ return json();
469
+ }
470
+
471
+ json speaker = json::parse(file);
472
+ return speaker;
473
+ }
474
+
475
+ static outetts_version get_tts_version(llama_model *model, json speaker = json::object()) {
476
+ if (speaker.contains("version")) {
477
+ std::string version = speaker["version"].get<std::string>();
478
+ if (version == "0.2") {
479
+ return OUTETTS_V0_2;
480
+ } else if (version == "0.3") {
481
+ return OUTETTS_V0_3;
482
+ } else {
483
+ LOG_ERR("%s: Unsupported speaker version '%s'\n", __func__, version.c_str());
484
+ }
485
+ }
486
+
487
+ // Also could get version from model itself
488
+ const char *chat_template = llama_model_chat_template(model, nullptr);
489
+ if (chat_template && std::string(chat_template) == "outetts-0.3") {
490
+ return OUTETTS_V0_3;
491
+ }
492
+
493
+ // Use 0.2 as the default version
494
+ return OUTETTS_V0_2;
495
+ }
496
+
497
+ static std::string audio_text_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
498
+ std::string audio_text = "<|text_start|>";
499
+
500
+ if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
501
+ std::string separator = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
502
+ for (const auto &word : speaker["words"]) {
503
+ audio_text += word["word"].get<std::string>() + separator;
504
+ }
505
+ }
506
+
507
+ return audio_text;
508
+ }
509
+
510
+ static std::string audio_data_from_speaker(json speaker, const outetts_version tts_version = OUTETTS_V0_2) {
511
+ std::string audio_data = "<|audio_start|>\n";
512
+
513
+ if (tts_version == OUTETTS_V0_2 || tts_version == OUTETTS_V0_3) {
514
+ std::string code_start = (tts_version == OUTETTS_V0_3) ? "" : "<|code_start|>";
515
+ std::string code_end = (tts_version == OUTETTS_V0_3) ? "<|space|>" : "<|code_end|>";
516
+ for (const auto &word : speaker["words"]) {
517
+ std::string word_text = word["word"].get<std::string>();
518
+ double duration = word["duration"].get<double>();
519
+ std::vector<int> codes = word["codes"].get<std::vector<int>>();
520
+
521
+ // Create the audio output entry
522
+ std::ostringstream word_entry;
523
+ word_entry << word_text << "<|t_" << std::fixed << std::setprecision(2)
524
+ << duration << "|>" + code_start;
525
+ for (const auto &Code : codes) {
526
+ word_entry << "<|" << Code << "|>";
527
+ }
528
+ word_entry << code_end << "\n";
529
+ audio_data += word_entry.str();
530
+ }
531
+ }
532
+
533
+ return audio_data;
534
+ }
535
+
455
536
  int main(int argc, char ** argv) {
456
537
  common_params params;
457
538
 
@@ -523,34 +604,9 @@ int main(int argc, char ** argv) {
523
604
  std::vector<llama_token> codes;
524
605
  std::vector<llama_token> guide_tokens;
525
606
 
526
- // process prompt and generate voice codes
527
- {
528
- LOG_INF("%s: constructing prompt ..\n", __func__);
529
-
530
- std::vector<llama_token> prompt_inp;
531
-
532
- prompt_init(prompt_inp, vocab);
533
-
534
- prompt_add(prompt_inp, vocab, "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>", false, true);
535
-
536
- // convert the input text into the necessary format expected by OuteTTS
537
- {
538
- std::string prompt_clean = process_text(params.prompt);
539
- if (params.vocoder.use_guide_tokens) {
540
- guide_tokens = prepare_guide_tokens(vocab, prompt_clean);
541
- }
542
-
543
- LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
544
-
545
- prompt_add(prompt_inp, vocab, prompt_clean, false, true);
546
- }
547
-
548
- prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
549
-
550
- // disabled to save time on tokenizing each time
551
- // TODO: load voices from the json files
552
- #if 0
553
- const std::string voice_data = R"(<|audio_start|>
607
+ // the default speaker profile is from: https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
608
+ std::string audio_text = "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<|text_sep|>";
609
+ std::string audio_data = R"(<|audio_start|>
554
610
  the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
555
611
  overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
556
612
  package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
@@ -582,117 +638,170 @@ it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><
582
638
  looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
583
639
  lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
584
640
 
585
- auto tmp = common_tokenize(vocab, voice_data, false, true);
586
- printf("\n\n");
587
- for (int i = 0; i < tmp.size(); ++i) {
588
- printf("%d, ", tmp[i]);
641
+ // audio data for 0.3 version
642
+ outetts_version tts_version = get_tts_version(model_ttc);
643
+ if (tts_version == OUTETTS_V0_3) {
644
+ audio_text = std::regex_replace(audio_text, std::regex(R"(<\|text_sep\|>)"), "<|space|>");
645
+ audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_start\|>)"), "");
646
+ audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_end\|>)"), "<|space|>");
647
+ }
648
+
649
+ // load speaker if given
650
+ if (!params.vocoder.speaker_file.empty()) {
651
+ LOG_INF("%s: loading speaker ..\n", __func__);
652
+ json speaker = speaker_from_file(params.vocoder.speaker_file);
653
+ if (speaker.empty()) {
654
+ LOG_ERR("%s: Failed to load speaker file '%s'\n", __func__, params.vocoder.speaker_file.c_str());
655
+ return 1;
656
+ }
657
+ audio_text = audio_text_from_speaker(speaker, tts_version);
658
+ audio_data = audio_data_from_speaker(speaker, tts_version);
659
+ }
660
+
661
+ // process prompt and generate voice codes
662
+ {
663
+ LOG_INF("%s: constructing prompt ..\n", __func__);
664
+
665
+ std::vector<llama_token> prompt_inp;
666
+
667
+ prompt_init(prompt_inp, vocab);
668
+
669
+ prompt_add(prompt_inp, vocab, audio_text, false, true);
670
+
671
+ // convert the input text into the necessary format expected by OuteTTS
672
+ {
673
+ std::string prompt_clean = process_text(params.prompt, tts_version);
674
+ if (params.vocoder.use_guide_tokens) {
675
+ guide_tokens = prepare_guide_tokens(vocab, prompt_clean, tts_version);
676
+ }
677
+
678
+ LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str());
679
+
680
+ prompt_add(prompt_inp, vocab, prompt_clean, false, true);
589
681
  }
590
- printf("\n\n");
682
+
683
+ prompt_add(prompt_inp, vocab, "<|text_end|>\n", false, true);
684
+
685
+ if (!params.vocoder.speaker_file.empty()) {
686
+ prompt_add(prompt_inp, vocab, audio_data, false, true);
687
+ } else {
688
+ // disabled to save time on tokenizing each time
689
+ #if 1
690
+ const std::string voice_data = audio_data;
691
+
692
+ auto tmp = common_tokenize(vocab, voice_data, false, true);
693
+ printf("\n\n");
694
+ for (size_t i = 0; i < tmp.size(); ++i) {
695
+ printf("%d, ", tmp[i]);
696
+ }
697
+ printf("\n\n");
698
+ prompt_add(prompt_inp, tmp);
591
699
  #else
592
- prompt_add(prompt_inp, llama_tokens {
593
- 151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
594
- 152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
595
- 151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
596
- 151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
597
- 153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
598
- 153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
599
- 152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
600
- 152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
601
- 152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
602
- 153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
603
- 153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
604
- 152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
605
- 153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
606
- 153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
607
- 151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
608
- 152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
609
- 152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
610
- 151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
611
- 152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
612
- 152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
613
- 152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
614
- 152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
615
- 152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
616
- 153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
617
- 155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
618
- 152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
619
- 32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
620
- 152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
621
- 153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
622
- 152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
623
- 151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
624
- 153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
625
- 152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
626
- 152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
627
- 152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
628
- 153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
629
- 152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
630
- 152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
631
- 153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
632
- 152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
633
- 152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
634
- 152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
635
- 155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
636
- 152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
637
- 14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
638
- 153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
639
- 198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
640
- 152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
641
- 151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
642
- 153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
643
- 152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
644
- 152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
645
- 152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
646
- 153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
647
- 152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
648
- 152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
649
- 155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
650
- 151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
651
- 153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
652
- 151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
653
- 155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
654
- 151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
655
- 153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
656
- 152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
657
- 152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
658
- 152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
659
- 151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
660
- 152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
661
- 151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
662
- 152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
663
- 151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
664
- 153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
665
- 151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
666
- 152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
667
- 153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
668
- 151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
669
- 151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
670
- 152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
671
- 151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
672
- 151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
673
- 152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
674
- 151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
675
- 152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
676
- 153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
677
- 152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
678
- 153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
679
- 152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
680
- 152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
681
- 151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
682
- 151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
683
- 151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
684
- 151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
685
- 151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
686
- 152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
687
- 152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
688
- 152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
689
- 153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
690
- 152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
691
- 152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
692
- 152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
693
- 152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
694
- 151670,});
700
+ prompt_add(prompt_inp, llama_tokens {
701
+ 151667, 198, 1782, 155780, 151669, 151929, 152412, 152308, 152585,
702
+ 152460, 153375, 151670, 198, 74455, 155808, 151669, 151799,
703
+ 151873, 151863, 152446, 152372, 152204, 152728, 152229, 152470,
704
+ 151970, 153413, 152419, 153334, 153289, 153374, 153199, 152040,
705
+ 153260, 152721, 152680, 153297, 152419, 153248, 152400, 152691,
706
+ 153368, 153437, 151670, 198, 1722, 155828, 151669, 152607,
707
+ 152256, 152991, 152299, 152688, 153163, 153016, 152789, 153198,
708
+ 152712, 151911, 153107, 152623, 152170, 152395, 152852, 152207,
709
+ 152461, 153321, 153309, 151750, 152137, 153340, 152573, 152267,
710
+ 153347, 151789, 152681, 153339, 151992, 152512, 151751, 152179,
711
+ 153434, 153180, 152900, 153440, 152474, 153122, 153129, 151904,
712
+ 152311, 151670, 198, 1499, 155791, 151669, 152276, 152454,
713
+ 153354, 152544, 153204, 153272, 152708, 153433, 152319, 153226,
714
+ 153043, 152325, 153267, 152622, 151670, 198, 4250, 155797,
715
+ 151669, 153454, 153342, 151989, 152458, 153420, 152303, 152271,
716
+ 152827, 153036, 153196, 151708, 153263, 152561, 153207, 152213,
717
+ 152112, 153204, 151722, 152542, 151670, 198, 19789, 155796,
718
+ 151669, 153353, 153182, 152345, 152471, 152477, 153014, 152002,
719
+ 152191, 151734, 152312, 152810, 152237, 153224, 153169, 153224,
720
+ 152244, 153387, 153404, 151670, 198, 16069, 155811, 151669,
721
+ 152265, 151946, 151808, 152412, 152363, 152305, 153156, 152733,
722
+ 152810, 153157, 152016, 152100, 152069, 153234, 152317, 152589,
723
+ 152707, 153121, 153341, 152159, 152114, 153156, 153001, 153504,
724
+ 153376, 152272, 152433, 152325, 151941, 151670, 198, 285,
725
+ 155788, 151669, 152238, 152255, 153427, 152318, 153009, 152381,
726
+ 152474, 152680, 152157, 153255, 152324, 151682, 151670, 198,
727
+ 32955, 155804, 151669, 153490, 153419, 152364, 152405, 152682,
728
+ 152206, 152078, 153369, 152725, 153193, 153027, 152946, 152488,
729
+ 153070, 151883, 152890, 152489, 153144, 153375, 152358, 151685,
730
+ 152494, 152117, 152740, 151670, 198, 37448, 480, 155840, 151669,
731
+ 151902, 152720, 153377, 152027, 152378, 152821, 153207, 153459,
732
+ 153028, 153068, 152507, 153255, 152158, 152921, 151958, 152609,
733
+ 152748, 152822, 152286, 151714, 152730, 152377, 152353, 152470,
734
+ 152606, 152162, 152186, 153071, 152244, 153118, 153375, 153018,
735
+ 152712, 153098, 152976, 152336, 151843, 153202, 152297, 151736,
736
+ 153380, 153502, 152702, 152115, 153181, 152735, 153277, 153457,
737
+ 152393, 153112, 152595, 151670, 198, 19098, 155808, 151669,
738
+ 152464, 153452, 152595, 153312, 151937, 151933, 153197, 152239,
739
+ 153163, 152922, 153402, 152034, 152591, 153438, 152215, 151673,
740
+ 152005, 151785, 152642, 151924, 153278, 151805, 151974, 153482,
741
+ 152718, 152862, 153347, 151670, 198, 72, 155780, 151669, 151795,
742
+ 152111, 152746, 152377, 153471, 152309, 151670, 198, 19016,
743
+ 155788, 151669, 153181, 152271, 152190, 152842, 152224, 152701,
744
+ 152939, 152536, 152091, 151815, 152733, 151672, 151670, 198,
745
+ 14689, 155788, 151669, 152291, 152072, 152942, 151734, 153042,
746
+ 153504, 152589, 153333, 151839, 151941, 153038, 153180, 151670,
747
+ 198, 36996, 8303, 155832, 151669, 152231, 152256, 152835,
748
+ 152801, 152985, 153400, 152393, 152818, 152765, 152249, 152600,
749
+ 151699, 152302, 152752, 153018, 153009, 151992, 153054, 152847,
750
+ 153354, 153228, 152662, 153355, 152532, 153393, 151782, 152458,
751
+ 152048, 152757, 152428, 153195, 151906, 153006, 153178, 153250,
752
+ 152331, 152284, 152780, 153138, 153319, 151980, 153142, 152418,
753
+ 152228, 152733, 151670, 198, 9096, 155801, 151669, 151698,
754
+ 153321, 152217, 153039, 152935, 153400, 152122, 152531, 153106,
755
+ 152169, 152892, 152957, 151851, 152427, 152826, 152451, 151851,
756
+ 152901, 152885, 152594, 153446, 153080, 151670, 198, 14689,
757
+ 155795, 151669, 152658, 151700, 153321, 152450, 152530, 153191,
758
+ 151673, 151690, 151698, 152714, 152846, 152981, 153171, 153384,
759
+ 153364, 153188, 153246, 151670, 198, 1055, 155779, 151669,
760
+ 151869, 152388, 152711, 153334, 151736, 151670, 198, 1782,
761
+ 155780, 151669, 153483, 153240, 152241, 152558, 152697, 153046,
762
+ 151670, 198, 5804, 1363, 155820, 151669, 152941, 152764, 152605,
763
+ 153034, 153434, 153372, 153347, 151887, 152453, 152758, 152133,
764
+ 152510, 152694, 152431, 152321, 153088, 152676, 152223, 152581,
765
+ 152459, 152015, 152502, 153063, 152712, 153294, 153451, 153032,
766
+ 152903, 152859, 152989, 151748, 152669, 152661, 152650, 152409,
767
+ 151861, 151670, 198, 300, 7973, 155828, 151669, 153095, 152469,
768
+ 152988, 152894, 151819, 152391, 153019, 152058, 153062, 153230,
769
+ 151826, 152112, 152306, 152264, 152769, 153390, 152384, 152435,
770
+ 152790, 153393, 152983, 152540, 152252, 152034, 153107, 152540,
771
+ 151919, 151893, 152558, 152817, 152946, 152956, 152129, 152715,
772
+ 153131, 153490, 151734, 152271, 152707, 151734, 153321, 152450,
773
+ 151670, 198, 8088, 155792, 151669, 152452, 153497, 153353,
774
+ 152679, 152533, 152382, 152374, 152611, 153341, 153163, 152285,
775
+ 153411, 152495, 153141, 152320, 151670, 198, 1199, 155781,
776
+ 151669, 151764, 152360, 153295, 152634, 153342, 152199, 152271,
777
+ 151670, 198, 43366, 155799, 151669, 152308, 151682, 152889,
778
+ 152016, 152385, 152629, 152495, 151826, 153321, 152958, 152180,
779
+ 151886, 153432, 152922, 152128, 153024, 153040, 152593, 152287,
780
+ 151677, 151670, 198, 53660, 155808, 151669, 151727, 152092,
781
+ 152680, 153331, 151699, 152316, 152938, 152289, 152433, 153384,
782
+ 151781, 153137, 153259, 152175, 153213, 152291, 151869, 152691,
783
+ 152489, 151941, 152049, 152034, 153053, 152179, 153160, 151676,
784
+ 153367, 151670, 198, 268, 4123, 480, 155821, 151669, 152350,
785
+ 152173, 152536, 151991, 151960, 153144, 153013, 152358, 152234,
786
+ 153135, 152291, 153235, 152143, 152583, 152402, 153483, 152678,
787
+ 152192, 152533, 152946, 151797, 153103, 152310, 152293, 151825,
788
+ 152548, 153442, 152109, 152659, 153325, 152781, 152570, 152957,
789
+ 151752, 152265, 153381, 152515, 151670, 198, 437, 155787,
790
+ 151669, 152957, 152659, 151975, 152709, 152402, 152836, 152174,
791
+ 151792, 153409, 153327, 152990, 151670, 198, 275, 155781,
792
+ 151669, 152520, 153038, 152067, 153273, 153185, 152265, 152974,
793
+ 151670, 198, 94273, 155799, 151669, 152953, 152938, 153427,
794
+ 152244, 151920, 153423, 152929, 152367, 153052, 152129, 152331,
795
+ 152257, 152987, 152777, 153448, 152408, 151696, 152408, 152326,
796
+ 152699, 151670, 198, 385, 16239, 155828, 151669, 152306, 152268,
797
+ 153438, 153228, 152978, 152957, 153153, 153393, 152795, 152110,
798
+ 152918, 152923, 152467, 152331, 153053, 153330, 151889, 153444,
799
+ 152234, 152624, 151779, 152801, 152784, 152139, 152222, 152751,
800
+ 152512, 153287, 153141, 153052, 151840, 152589, 152508, 153499,
801
+ 152109, 152255, 151739, 152267, 152759, 153318, 153165, 153349,
802
+ 151670,});
695
803
  #endif
804
+ }
696
805
 
697
806
  // print the prompt token-by-token
698
807
 
@@ -102,9 +102,11 @@ endif()
102
102
 
103
103
  option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
104
104
  option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
105
+ option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
105
106
  option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
106
107
  option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
107
108
  option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
109
+ option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
108
110
  option(GGML_AVX512 "ggml: enable AVX512F" OFF)
109
111
  option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
110
112
  option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
@@ -121,6 +123,7 @@ endif()
121
123
  option(GGML_LASX "ggml: enable lasx" ON)
122
124
  option(GGML_LSX "ggml: enable lsx" ON)
123
125
  option(GGML_RVV "ggml: enable rvv" ON)
126
+ option(GGML_VXE "ggml: enable vxe" ON)
124
127
 
125
128
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
126
129
  set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
@@ -150,12 +153,17 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
150
153
  "ggml: max. batch size for using peer access")
151
154
  option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
152
155
  option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
156
+ option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
153
157
  option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
154
158
  option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
159
+ set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
160
+ "ggml: cuda link binary compression mode; requires cuda 12.8+")
161
+ set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
155
162
 
156
163
  option(GGML_HIP "ggml: use HIP" OFF)
157
164
  option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
158
165
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
166
+ option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
159
167
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
160
168
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
161
169
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
@@ -187,6 +195,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
187
195
  option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
188
196
  option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
189
197
  option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
198
+ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
199
+ "gmml: OpenCL API version to target")
190
200
 
191
201
  # toolchain for vulkan-shaders-gen
192
202
  set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -209,6 +219,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
209
219
 
210
220
  find_package(Threads REQUIRED)
211
221
 
222
+ include(GNUInstallDirs)
223
+
212
224
  #
213
225
  # build the library
214
226
  #
@@ -232,7 +244,6 @@ endif ()
232
244
  # install
233
245
  #
234
246
 
235
- include(GNUInstallDirs)
236
247
  include(CMakePackageConfigHelpers)
237
248
 
238
249
  # all public headers
@@ -243,6 +254,7 @@ set(GGML_PUBLIC_HEADERS
243
254
  include/ggml-backend.h
244
255
  include/ggml-blas.h
245
256
  include/ggml-cann.h
257
+ include/ggml-cpp.h
246
258
  include/ggml-cuda.h
247
259
  include/ggml-kompute.h
248
260
  include/ggml-opt.h
@@ -19,7 +19,7 @@ struct ggml_tallocr {
19
19
  };
20
20
 
21
21
  GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22
- GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
22
+ GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23
23
 
24
24
  // Graph allocator
25
25
  /*
@@ -56,7 +56,7 @@ extern "C" {
56
56
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
57
57
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
58
58
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
59
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
59
+ GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
60
60
  GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
61
61
  GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
62
62
  GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -342,8 +342,8 @@ extern "C" {
342
342
  GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
343
343
 
344
344
  // Tensor initialization
345
- GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
- GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
345
+ GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
346
+ GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
347
347
 
348
348
  // CPU buffer types are always available
349
349
  GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -80,6 +80,7 @@ extern "C" {
80
80
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
81
  GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
82
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
+ GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
83
84
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
85
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
85
86
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
@@ -95,9 +96,11 @@ extern "C" {
95
96
  GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
96
97
  GGML_BACKEND_API int ggml_cpu_has_sve (void);
97
98
  GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
99
+ GGML_BACKEND_API int ggml_cpu_has_sme (void);
98
100
  // other
99
101
  GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
100
102
  GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103
+ GGML_BACKEND_API int ggml_cpu_has_vxe (void);
101
104
  GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
102
105
  GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
103
106
 
@@ -2140,7 +2140,11 @@ extern "C" {
2140
2140
  # define GGML_RESTRICT
2141
2141
  # endif
2142
2142
  #else
2143
- # define GGML_RESTRICT restrict
2143
+ # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
2144
+ # define GGML_RESTRICT __restrict
2145
+ # else
2146
+ # define GGML_RESTRICT restrict
2147
+ # endif
2144
2148
  #endif
2145
2149
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2146
2150
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -226,6 +226,9 @@ add_library(ggml-base
226
226
  gguf.cpp)
227
227
 
228
228
  target_include_directories(ggml-base PRIVATE .)
229
+ if (GGML_BACKEND_DL)
230
+ target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
231
+ endif()
229
232
 
230
233
  add_library(ggml
231
234
  ggml-backend-reg.cpp)
@@ -233,7 +236,7 @@ add_library(ggml
233
236
  target_link_libraries(ggml PUBLIC ggml-base)
234
237
 
235
238
  if (CMAKE_SYSTEM_NAME MATCHES "Linux")
236
- target_link_libraries(ggml PRIVATE dl)
239
+ target_link_libraries(ggml PRIVATE dl stdc++fs)
237
240
  endif()
238
241
 
239
242
  function(ggml_add_backend_library backend)
@@ -286,7 +289,7 @@ function(ggml_add_cpu_backend_variant tag_name)
286
289
  set(GGML_CPU_TAG_NAME ${tag_name})
287
290
  # other: OPENMP LLAMAFILE CPU_HBM
288
291
  foreach (feat NATIVE
289
- AVX AVX2 AVX_VNNI FMA F16C
292
+ AVX AVX2 BMI2 AVX_VNNI FMA F16C
290
293
  AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
291
294
  AMX_TILE AMX_INT8 AMX_BF16)
292
295
  set(GGML_${feat} OFF)
@@ -306,13 +309,13 @@ if (GGML_CPU_ALL_VARIANTS)
306
309
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
307
310
  endif()
308
311
  ggml_add_cpu_backend_variant(sandybridge AVX)
309
- ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
310
- ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
311
- ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
312
- ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
312
+ ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
313
+ ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
314
+ ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
315
+ ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
313
316
  if (NOT MSVC)
314
317
  # MSVC doesn't support AMX
315
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
318
+ ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
316
319
  endif()
317
320
  elseif (GGML_CPU)
318
321
  ggml_add_cpu_backend_variant_impl("")