@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/lib/binding.ts +3 -1
  2. package/lib/index.js +2 -0
  3. package/lib/index.ts +3 -1
  4. package/package.json +14 -14
  5. package/scripts/llama.cpp.patch +27 -26
  6. package/src/EmbeddingWorker.cpp +1 -1
  7. package/src/LlamaCompletionWorker.cpp +28 -7
  8. package/src/LlamaCompletionWorker.h +4 -0
  9. package/src/LlamaContext.cpp +14 -17
  10. package/src/common.hpp +7 -6
  11. package/src/llama.cpp/CMakeLists.txt +15 -4
  12. package/src/llama.cpp/common/CMakeLists.txt +15 -24
  13. package/src/llama.cpp/common/arg.cpp +172 -110
  14. package/src/llama.cpp/common/chat-parser.cpp +385 -0
  15. package/src/llama.cpp/common/chat-parser.h +120 -0
  16. package/src/llama.cpp/common/chat.cpp +726 -596
  17. package/src/llama.cpp/common/chat.h +74 -8
  18. package/src/llama.cpp/common/common.cpp +56 -38
  19. package/src/llama.cpp/common/common.h +9 -3
  20. package/src/llama.cpp/common/json-partial.cpp +256 -0
  21. package/src/llama.cpp/common/json-partial.h +38 -0
  22. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
  23. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
  24. package/src/llama.cpp/common/sampling.cpp +7 -8
  25. package/src/llama.cpp/common/speculative.cpp +6 -4
  26. package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
  27. package/src/llama.cpp/ggml/include/ggml.h +22 -3
  28. package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
  29. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
  30. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  31. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
  32. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  33. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  34. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  37. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  39. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  40. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  41. package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  42. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  43. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
  44. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  45. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
  46. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
  47. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
  48. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
  49. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  50. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  51. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
  52. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  53. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
  54. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  55. package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  56. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  57. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  58. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  59. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
  60. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  61. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  62. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
  63. package/src/llama.cpp/include/llama.h +145 -40
  64. package/src/llama.cpp/src/CMakeLists.txt +5 -1
  65. package/src/llama.cpp/src/llama-arch.cpp +99 -3
  66. package/src/llama.cpp/src/llama-arch.h +10 -1
  67. package/src/llama.cpp/src/llama-batch.cpp +728 -272
  68. package/src/llama.cpp/src/llama-batch.h +112 -54
  69. package/src/llama.cpp/src/llama-chat.cpp +19 -2
  70. package/src/llama.cpp/src/llama-chat.h +1 -0
  71. package/src/llama.cpp/src/llama-context.cpp +525 -339
  72. package/src/llama.cpp/src/llama-context.h +38 -17
  73. package/src/llama.cpp/src/llama-cparams.cpp +4 -0
  74. package/src/llama.cpp/src/llama-cparams.h +2 -0
  75. package/src/llama.cpp/src/llama-grammar.cpp +12 -2
  76. package/src/llama.cpp/src/llama-graph.cpp +413 -353
  77. package/src/llama.cpp/src/llama-graph.h +112 -56
  78. package/src/llama.cpp/src/llama-hparams.cpp +10 -2
  79. package/src/llama.cpp/src/llama-hparams.h +13 -2
  80. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
  81. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
  82. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
  83. package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
  84. package/src/llama.cpp/src/llama-kv-cells.h +415 -0
  85. package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  86. package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
  87. package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
  88. package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
  89. package/src/llama.cpp/src/llama-memory.cpp +41 -0
  90. package/src/llama.cpp/src/llama-memory.h +86 -5
  91. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  92. package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
  93. package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
  94. package/src/llama.cpp/src/llama-model.cpp +1137 -528
  95. package/src/llama.cpp/src/llama-model.h +4 -0
  96. package/src/llama.cpp/src/llama-quant.cpp +2 -1
  97. package/src/llama.cpp/src/llama-sampling.cpp +2 -2
  98. package/src/llama.cpp/src/llama-vocab.cpp +69 -32
  99. package/src/llama.cpp/src/llama-vocab.h +1 -0
  100. package/src/llama.cpp/src/llama.cpp +11 -7
  101. package/src/llama.cpp/src/unicode.cpp +5 -0
  102. package/src/tts_utils.h +1 -1
  103. package/src/llama.cpp/common/json.hpp +0 -24766
  104. package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
  105. package/src/llama.cpp/common/minja/minja.hpp +0 -2974
  106. package/src/llama.cpp/common/stb_image.h +0 -7988
  107. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  108. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
  109. package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
  110. package/src/llama.cpp/src/llama-kv-cache.h +0 -515
  111. /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  112. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  113. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -0,0 +1,38 @@
1
+ #pragma once
2
+
3
+ #include <nlohmann/json.hpp>
4
+
5
+ // Healing marker (empty if the JSON was fully parsed / wasn't healed).
6
+ struct common_healing_marker {
7
+ // Raw marker.
8
+ std::string marker;
9
+
10
+ // Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
11
+ std::string json_dump_marker;
12
+ };
13
+
14
+ // Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
15
+ struct common_json {
16
+ nlohmann::ordered_json json;
17
+
18
+ common_healing_marker healing_marker;
19
+ };
20
+
21
+ // Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
22
+ //
23
+ // Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
24
+ // This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
25
+ // (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
26
+ //
27
+ // For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
28
+ bool common_json_parse(
29
+ const std::string & input,
30
+ const std::string & healing_marker,
31
+ common_json & out);
32
+
33
+ // Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
34
+ bool common_json_parse(
35
+ std::string::const_iterator & it,
36
+ const std::string::const_iterator & end,
37
+ const std::string & healing_marker,
38
+ common_json & out);
@@ -1,8 +1,9 @@
1
1
  #include "json-schema-to-grammar.h"
2
2
  #include "common.h"
3
3
 
4
+ #include <nlohmann/json.hpp>
5
+
4
6
  #include <algorithm>
5
- #include <fstream>
6
7
  #include <map>
7
8
  #include <regex>
8
9
  #include <sstream>
@@ -1,9 +1,9 @@
1
1
  #pragma once
2
2
 
3
- #include "ggml.h"
4
- // Change JSON_ASSERT from assert() to GGML_ASSERT:
5
- #define JSON_ASSERT GGML_ASSERT
6
- #include "json.hpp"
3
+ #include <nlohmann/json_fwd.hpp>
4
+
5
+ #include <functional>
6
+ #include <string>
7
7
 
8
8
  std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
9
9
  bool force_gbnf = false);
@@ -161,7 +161,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
161
161
  GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
162
162
  #endif // LLAMA_USE_LLGUIDANCE
163
163
  } else {
164
- std::vector<std::string> patterns_at_start;
164
+ std::vector<std::string> trigger_patterns;
165
165
  std::vector<std::string> patterns_anywhere;
166
166
  std::vector<llama_token> trigger_tokens;
167
167
  for (const auto & trigger : params.grammar_triggers) {
@@ -173,10 +173,13 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
173
173
  break;
174
174
  }
175
175
  case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
176
- case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START:
177
176
  {
178
- const auto & pattern = trigger.value;
179
- (trigger.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START ? patterns_at_start : patterns_anywhere).push_back(pattern);
177
+ patterns_anywhere.push_back(trigger.value);
178
+ break;
179
+ }
180
+ case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
181
+ {
182
+ trigger_patterns.push_back(trigger.value);
180
183
  break;
181
184
  }
182
185
  case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN:
@@ -190,10 +193,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
190
193
  }
191
194
  }
192
195
 
193
- std::vector<std::string> trigger_patterns;
194
- if (!patterns_at_start.empty()) {
195
- trigger_patterns.push_back("^(" + string_join(patterns_at_start, "|") + ")[\\s\\S]*");
196
- }
197
196
  if (!patterns_anywhere.empty()) {
198
197
  trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*");
199
198
  }
@@ -144,6 +144,8 @@ llama_tokens common_speculative_gen_draft(
144
144
  auto & smpl = spec->smpl;
145
145
  auto & prompt = spec->prompt;
146
146
 
147
+ auto * mem = llama_get_memory(ctx);
148
+
147
149
  int reuse_i = 0;
148
150
  int reuse_n = 0;
149
151
 
@@ -173,7 +175,7 @@ llama_tokens common_speculative_gen_draft(
173
175
  result.reserve(params.n_draft);
174
176
 
175
177
  if (reuse_n == 0) {
176
- llama_kv_self_clear(ctx);
178
+ llama_memory_clear(mem, false);
177
179
 
178
180
  prompt.clear();
179
181
  } else {
@@ -192,14 +194,14 @@ llama_tokens common_speculative_gen_draft(
192
194
  }
193
195
 
194
196
  if (reuse_i > 0) {
195
- llama_kv_self_seq_rm (ctx, 0, 0, reuse_i);
196
- llama_kv_self_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
197
+ llama_memory_seq_rm (mem, 0, 0, reuse_i);
198
+ llama_memory_seq_add(mem, 0, reuse_i, -1, -reuse_i);
197
199
 
198
200
  prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
199
201
  }
200
202
 
201
203
  if (reuse_n < (int) prompt.size()) {
202
- llama_kv_self_seq_rm (ctx, 0, reuse_n, -1);
204
+ llama_memory_seq_rm (mem, 0, reuse_n, -1);
203
205
 
204
206
  prompt.erase(prompt.begin() + reuse_n, prompt.end());
205
207
  }
@@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
105
105
  message(DEBUG "INS_ENB : ${INS_ENB}")
106
106
 
107
107
  option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108
- option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
108
+ option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109
109
  option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
110
110
  option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
111
111
  option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
@@ -129,6 +129,7 @@ option(GGML_LASX "ggml: enable lasx" ON)
129
129
  option(GGML_LSX "ggml: enable lsx" ON)
130
130
  option(GGML_RVV "ggml: enable rvv" ON)
131
131
  option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
132
+ option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
132
133
  option(GGML_VXE "ggml: enable vxe" ON)
133
134
 
134
135
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
@@ -136,7 +137,7 @@ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
136
137
  set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
137
138
 
138
139
 
139
- if (WIN32)
140
+ if (MINGW)
140
141
  set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
141
142
  endif()
142
143
 
@@ -171,12 +172,12 @@ option(GGML_HIP "ggml: use HIP"
171
172
  option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
172
173
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
173
174
  option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
175
+ option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
174
176
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
175
177
  option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
176
178
  option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
177
179
  option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
178
180
  option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
179
- option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
180
181
  option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
181
182
  option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
182
183
  option(GGML_KOMPUTE "ggml: use Kompute" OFF)
@@ -367,6 +368,8 @@ if (MSVC)
367
368
  /wd4005 # Macro redefinition
368
369
  /wd4244 # Conversion from one type to another type, possible loss of data
369
370
  /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
371
+ /wd4305 # Conversion from 'type1' to 'type2', possible loss of data
372
+ /wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data
370
373
  /wd4996 # Disable POSIX deprecation warnings
371
374
  /wd4702 # Unreachable code warnings
372
375
  )
@@ -386,4 +389,46 @@ if (MSVC)
386
389
  disable_msvc_warnings(ggml-cpu-skylakex)
387
390
  disable_msvc_warnings(ggml-cpu-icelake)
388
391
  disable_msvc_warnings(ggml-cpu-alderlake)
392
+
393
+ if (GGML_BUILD_EXAMPLES)
394
+ disable_msvc_warnings(common-ggml)
395
+ disable_msvc_warnings(common)
396
+
397
+ disable_msvc_warnings(mnist-common)
398
+ disable_msvc_warnings(mnist-eval)
399
+ disable_msvc_warnings(mnist-train)
400
+
401
+ disable_msvc_warnings(gpt-2-ctx)
402
+ disable_msvc_warnings(gpt-2-alloc)
403
+ disable_msvc_warnings(gpt-2-backend)
404
+ disable_msvc_warnings(gpt-2-sched)
405
+ disable_msvc_warnings(gpt-2-quantize)
406
+ disable_msvc_warnings(gpt-2-batched)
407
+
408
+ disable_msvc_warnings(gpt-j)
409
+ disable_msvc_warnings(gpt-j-quantize)
410
+
411
+ disable_msvc_warnings(magika)
412
+ disable_msvc_warnings(yolov3-tiny)
413
+ disable_msvc_warnings(sam)
414
+
415
+ disable_msvc_warnings(simple-ctx)
416
+ disable_msvc_warnings(simple-backend)
417
+ endif()
418
+
419
+ if (GGML_BUILD_TESTS)
420
+ disable_msvc_warnings(test-mul-mat)
421
+ disable_msvc_warnings(test-arange)
422
+ disable_msvc_warnings(test-backend-ops)
423
+ disable_msvc_warnings(test-cont)
424
+ disable_msvc_warnings(test-conv-transpose)
425
+ disable_msvc_warnings(test-conv-transpose-1d)
426
+ disable_msvc_warnings(test-conv1d)
427
+ disable_msvc_warnings(test-conv2d)
428
+ disable_msvc_warnings(test-conv2d-dw)
429
+ disable_msvc_warnings(test-customop)
430
+ disable_msvc_warnings(test-dup)
431
+ disable_msvc_warnings(test-opt)
432
+ disable_msvc_warnings(test-pool)
433
+ endif ()
389
434
  endif()
@@ -489,6 +489,7 @@ extern "C" {
489
489
  GGML_OP_UPSCALE, // nearest interpolate
490
490
  GGML_OP_PAD,
491
491
  GGML_OP_PAD_REFLECT_1D,
492
+ GGML_OP_ROLL,
492
493
  GGML_OP_ARANGE,
493
494
  GGML_OP_TIMESTEP_EMBEDDING,
494
495
  GGML_OP_ARGSORT,
@@ -935,6 +936,15 @@ extern "C" {
935
936
  struct ggml_tensor * a,
936
937
  struct ggml_tensor * b);
937
938
 
939
+ // repeat a to the specified shape
940
+ GGML_API struct ggml_tensor * ggml_repeat_4d(
941
+ struct ggml_context * ctx,
942
+ struct ggml_tensor * a,
943
+ int64_t ne0,
944
+ int64_t ne1,
945
+ int64_t ne2,
946
+ int64_t ne3);
947
+
938
948
  // sums repetitions in a into shape of b
939
949
  GGML_API struct ggml_tensor * ggml_repeat_back(
940
950
  struct ggml_context * ctx,
@@ -1792,6 +1802,17 @@ extern "C" {
1792
1802
  int p0,
1793
1803
  int p1);
1794
1804
 
1805
+ // Move tensor elements by an offset given for each dimension. Elements that
1806
+ // are shifted beyond the last position are wrapped around to the beginning.
1807
+ GGML_API struct ggml_tensor * ggml_roll(
1808
+ struct ggml_context * ctx,
1809
+ struct ggml_tensor * a,
1810
+ int shift0,
1811
+ int shift1,
1812
+ int shift2,
1813
+ int shift3);
1814
+
1815
+
1795
1816
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1796
1817
  // timesteps: [N,]
1797
1818
  // return: [N, dim]
@@ -2086,9 +2107,6 @@ extern "C" {
2086
2107
  GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2087
2108
  GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2088
2109
 
2089
- GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2090
- GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2091
-
2092
2110
  // print info and performance information for the graph
2093
2111
  GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
2094
2112
 
@@ -2172,6 +2190,7 @@ extern "C" {
2172
2190
 
2173
2191
  // scheduling priorities
2174
2192
  enum ggml_sched_priority {
2193
+ GGML_SCHED_PRIO_LOW = -1,
2175
2194
  GGML_SCHED_PRIO_NORMAL,
2176
2195
  GGML_SCHED_PRIO_MEDIUM,
2177
2196
  GGML_SCHED_PRIO_HIGH,
@@ -109,6 +109,8 @@ if (MSVC)
109
109
  else ()
110
110
  set(CMAKE_GENERATOR_PLATFORM_LWR "")
111
111
  endif ()
112
+ ggml_get_system_arch()
113
+ message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
112
114
 
113
115
  if (NOT MSVC)
114
116
  if (GGML_STATIC)
@@ -123,7 +125,6 @@ if (NOT MSVC)
123
125
  endif()
124
126
 
125
127
  if (MINGW)
126
- # Target Windows 8 for PrefetchVirtualMemory
127
128
  add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
128
129
  endif()
129
130
 
@@ -194,6 +195,7 @@ add_library(ggml-base
194
195
  ../include/ggml-opt.h
195
196
  ../include/gguf.h
196
197
  ggml.c
198
+ ggml.cpp
197
199
  ggml-alloc.c
198
200
  ggml-backend.cpp
199
201
  ggml-opt.cpp
@@ -210,6 +212,7 @@ endif()
210
212
 
211
213
  add_library(ggml
212
214
  ggml-backend-reg.cpp)
215
+ add_library(ggml::ggml ALIAS ggml)
213
216
 
214
217
  target_link_libraries(ggml PUBLIC ggml-base)
215
218
 
@@ -224,6 +227,7 @@ function(ggml_add_backend_library backend)
224
227
  set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
225
228
  target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
226
229
  add_dependencies(ggml ${backend})
230
+ install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
227
231
  else()
228
232
  add_library(${backend} ${ARGN})
229
233
  target_link_libraries(ggml PUBLIC ${backend})
@@ -266,17 +270,27 @@ endfunction()
266
270
  function(ggml_add_cpu_backend_variant tag_name)
267
271
  set(GGML_CPU_TAG_NAME ${tag_name})
268
272
  # other: OPENMP LLAMAFILE CPU_HBM
269
- foreach (feat NATIVE
270
- SSE42
271
- AVX AVX2 BMI2 AVX_VNNI FMA F16C
272
- AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
273
- AMX_TILE AMX_INT8 AMX_BF16)
274
- set(GGML_${feat} OFF)
275
- endforeach()
276
-
277
- foreach (feat ${ARGN})
278
- set(GGML_${feat} ON)
279
- endforeach()
273
+ if (GGML_SYSTEM_ARCH STREQUAL "x86")
274
+ foreach (feat NATIVE
275
+ SSE42
276
+ AVX AVX2 BMI2 AVX_VNNI FMA F16C
277
+ AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
278
+ AMX_TILE AMX_INT8 AMX_BF16)
279
+ set(GGML_${feat} OFF)
280
+ endforeach()
281
+
282
+ foreach (feat ${ARGN})
283
+ set(GGML_${feat} ON)
284
+ endforeach()
285
+ elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
286
+ foreach (feat ${ARGN})
287
+ set(GGML_INTERNAL_${feat} ON)
288
+ endforeach()
289
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
290
+ foreach (feat ${ARGN})
291
+ set(GGML_INTERNAL_${feat} ON)
292
+ endforeach()
293
+ endif()
280
294
 
281
295
  ggml_add_cpu_backend_variant_impl(${tag_name})
282
296
  endfunction()
@@ -286,17 +300,62 @@ ggml_add_backend(CPU)
286
300
  if (GGML_CPU_ALL_VARIANTS)
287
301
  if (NOT GGML_BACKEND_DL)
288
302
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
303
+ elseif (GGML_CPU_ARM_ARCH)
304
+ message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
289
305
  endif()
290
- ggml_add_cpu_backend_variant(x64)
291
- ggml_add_cpu_backend_variant(sse42 SSE42)
292
- ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
293
- ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
294
- ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
295
- ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
296
- ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
297
- if (NOT MSVC)
298
- # MSVC doesn't support AMX
299
- ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
306
+ if (GGML_SYSTEM_ARCH STREQUAL "x86")
307
+ ggml_add_cpu_backend_variant(x64)
308
+ ggml_add_cpu_backend_variant(sse42 SSE42)
309
+ ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
310
+ ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
311
+ ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
312
+ ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
313
+ ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
314
+ if (NOT MSVC)
315
+ # MSVC doesn't support AMX
316
+ ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
317
+ endif()
318
+ elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
319
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
320
+ # Many of these features are optional so we build versions with popular
321
+ # combinations and name the backends based on the version they were
322
+ # first released with
323
+ ggml_add_cpu_backend_variant(armv8.0_1)
324
+ ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
325
+ ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
326
+ ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
327
+ ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
328
+ ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
329
+ ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
330
+ ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
331
+ elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
332
+ # Android-specific backends with SoC-compatible feature sets
333
+ ggml_add_cpu_backend_variant(android_armv8.0_1)
334
+ ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
335
+ ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
336
+ ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
337
+ elseif (APPLE)
338
+ ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
339
+ ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
340
+ ggml_add_cpu_backend_variant(apple_m4 DOTPROD MATMUL_INT8 NOSVE SME)
341
+ else()
342
+ message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
343
+ endif()
344
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
345
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
346
+ ggml_add_cpu_backend_variant(power0)
347
+ ggml_add_cpu_backend_variant(power7_1 POWER7)
348
+ ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
349
+ ggml_add_cpu_backend_variant(power8_1 POWER8)
350
+ ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
351
+ ggml_add_cpu_backend_variant(power9 POWER9 VSX)
352
+ ggml_add_cpu_backend_variant(power10 POWER10 VSX)
353
+ ggml_add_cpu_backend_variant(power11 POWER11 VSX)
354
+ else()
355
+ message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
356
+ endif()
357
+ else()
358
+ message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
300
359
  endif()
301
360
  elseif (GGML_CPU)
302
361
  ggml_add_cpu_backend_variant_impl("")