@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +3 -1
- package/lib/index.js +2 -0
- package/lib/index.ts +3 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +28 -7
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +14 -17
- package/src/common.hpp +7 -6
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <nlohmann/json.hpp>
|
|
4
|
+
|
|
5
|
+
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
|
|
6
|
+
struct common_healing_marker {
|
|
7
|
+
// Raw marker.
|
|
8
|
+
std::string marker;
|
|
9
|
+
|
|
10
|
+
// Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
|
|
11
|
+
std::string json_dump_marker;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
// Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
|
|
15
|
+
struct common_json {
|
|
16
|
+
nlohmann::ordered_json json;
|
|
17
|
+
|
|
18
|
+
common_healing_marker healing_marker;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
|
|
22
|
+
//
|
|
23
|
+
// Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
|
|
24
|
+
// This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
|
|
25
|
+
// (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
|
|
26
|
+
//
|
|
27
|
+
// For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
|
|
28
|
+
bool common_json_parse(
|
|
29
|
+
const std::string & input,
|
|
30
|
+
const std::string & healing_marker,
|
|
31
|
+
common_json & out);
|
|
32
|
+
|
|
33
|
+
// Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
|
|
34
|
+
bool common_json_parse(
|
|
35
|
+
std::string::const_iterator & it,
|
|
36
|
+
const std::string::const_iterator & end,
|
|
37
|
+
const std::string & healing_marker,
|
|
38
|
+
common_json & out);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
#include
|
|
3
|
+
#include <nlohmann/json_fwd.hpp>
|
|
4
|
+
|
|
5
|
+
#include <functional>
|
|
6
|
+
#include <string>
|
|
7
7
|
|
|
8
8
|
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
|
9
9
|
bool force_gbnf = false);
|
|
@@ -161,7 +161,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
161
161
|
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
|
162
162
|
#endif // LLAMA_USE_LLGUIDANCE
|
|
163
163
|
} else {
|
|
164
|
-
std::vector<std::string>
|
|
164
|
+
std::vector<std::string> trigger_patterns;
|
|
165
165
|
std::vector<std::string> patterns_anywhere;
|
|
166
166
|
std::vector<llama_token> trigger_tokens;
|
|
167
167
|
for (const auto & trigger : params.grammar_triggers) {
|
|
@@ -173,10 +173,13 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
173
173
|
break;
|
|
174
174
|
}
|
|
175
175
|
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
|
|
176
|
-
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START:
|
|
177
176
|
{
|
|
178
|
-
|
|
179
|
-
|
|
177
|
+
patterns_anywhere.push_back(trigger.value);
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
|
|
181
|
+
{
|
|
182
|
+
trigger_patterns.push_back(trigger.value);
|
|
180
183
|
break;
|
|
181
184
|
}
|
|
182
185
|
case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN:
|
|
@@ -190,10 +193,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
|
190
193
|
}
|
|
191
194
|
}
|
|
192
195
|
|
|
193
|
-
std::vector<std::string> trigger_patterns;
|
|
194
|
-
if (!patterns_at_start.empty()) {
|
|
195
|
-
trigger_patterns.push_back("^(" + string_join(patterns_at_start, "|") + ")[\\s\\S]*");
|
|
196
|
-
}
|
|
197
196
|
if (!patterns_anywhere.empty()) {
|
|
198
197
|
trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*");
|
|
199
198
|
}
|
|
@@ -144,6 +144,8 @@ llama_tokens common_speculative_gen_draft(
|
|
|
144
144
|
auto & smpl = spec->smpl;
|
|
145
145
|
auto & prompt = spec->prompt;
|
|
146
146
|
|
|
147
|
+
auto * mem = llama_get_memory(ctx);
|
|
148
|
+
|
|
147
149
|
int reuse_i = 0;
|
|
148
150
|
int reuse_n = 0;
|
|
149
151
|
|
|
@@ -173,7 +175,7 @@ llama_tokens common_speculative_gen_draft(
|
|
|
173
175
|
result.reserve(params.n_draft);
|
|
174
176
|
|
|
175
177
|
if (reuse_n == 0) {
|
|
176
|
-
|
|
178
|
+
llama_memory_clear(mem, false);
|
|
177
179
|
|
|
178
180
|
prompt.clear();
|
|
179
181
|
} else {
|
|
@@ -192,14 +194,14 @@ llama_tokens common_speculative_gen_draft(
|
|
|
192
194
|
}
|
|
193
195
|
|
|
194
196
|
if (reuse_i > 0) {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
+
llama_memory_seq_rm (mem, 0, 0, reuse_i);
|
|
198
|
+
llama_memory_seq_add(mem, 0, reuse_i, -1, -reuse_i);
|
|
197
199
|
|
|
198
200
|
prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
|
|
199
201
|
}
|
|
200
202
|
|
|
201
203
|
if (reuse_n < (int) prompt.size()) {
|
|
202
|
-
|
|
204
|
+
llama_memory_seq_rm (mem, 0, reuse_n, -1);
|
|
203
205
|
|
|
204
206
|
prompt.erase(prompt.begin() + reuse_n, prompt.end());
|
|
205
207
|
}
|
|
@@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
|
|
|
105
105
|
message(DEBUG "INS_ENB : ${INS_ENB}")
|
|
106
106
|
|
|
107
107
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
|
108
|
-
option(
|
|
108
|
+
option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
109
109
|
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
|
|
110
110
|
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
|
|
111
111
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
|
@@ -129,6 +129,7 @@ option(GGML_LASX "ggml: enable lasx" ON)
|
|
|
129
129
|
option(GGML_LSX "ggml: enable lsx" ON)
|
|
130
130
|
option(GGML_RVV "ggml: enable rvv" ON)
|
|
131
131
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
|
|
132
|
+
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
|
132
133
|
option(GGML_VXE "ggml: enable vxe" ON)
|
|
133
134
|
|
|
134
135
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
@@ -136,7 +137,7 @@ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
|
136
137
|
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
|
|
137
138
|
|
|
138
139
|
|
|
139
|
-
if (
|
|
140
|
+
if (MINGW)
|
|
140
141
|
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
|
|
141
142
|
endif()
|
|
142
143
|
|
|
@@ -171,12 +172,12 @@ option(GGML_HIP "ggml: use HIP"
|
|
|
171
172
|
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
|
172
173
|
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
|
173
174
|
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
|
175
|
+
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
|
174
176
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
175
177
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
176
178
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
177
179
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
|
178
180
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
179
|
-
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
|
180
181
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
181
182
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
182
183
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
@@ -367,6 +368,8 @@ if (MSVC)
|
|
|
367
368
|
/wd4005 # Macro redefinition
|
|
368
369
|
/wd4244 # Conversion from one type to another type, possible loss of data
|
|
369
370
|
/wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
|
|
371
|
+
/wd4305 # Conversion from 'type1' to 'type2', possible loss of data
|
|
372
|
+
/wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data
|
|
370
373
|
/wd4996 # Disable POSIX deprecation warnings
|
|
371
374
|
/wd4702 # Unreachable code warnings
|
|
372
375
|
)
|
|
@@ -386,4 +389,46 @@ if (MSVC)
|
|
|
386
389
|
disable_msvc_warnings(ggml-cpu-skylakex)
|
|
387
390
|
disable_msvc_warnings(ggml-cpu-icelake)
|
|
388
391
|
disable_msvc_warnings(ggml-cpu-alderlake)
|
|
392
|
+
|
|
393
|
+
if (GGML_BUILD_EXAMPLES)
|
|
394
|
+
disable_msvc_warnings(common-ggml)
|
|
395
|
+
disable_msvc_warnings(common)
|
|
396
|
+
|
|
397
|
+
disable_msvc_warnings(mnist-common)
|
|
398
|
+
disable_msvc_warnings(mnist-eval)
|
|
399
|
+
disable_msvc_warnings(mnist-train)
|
|
400
|
+
|
|
401
|
+
disable_msvc_warnings(gpt-2-ctx)
|
|
402
|
+
disable_msvc_warnings(gpt-2-alloc)
|
|
403
|
+
disable_msvc_warnings(gpt-2-backend)
|
|
404
|
+
disable_msvc_warnings(gpt-2-sched)
|
|
405
|
+
disable_msvc_warnings(gpt-2-quantize)
|
|
406
|
+
disable_msvc_warnings(gpt-2-batched)
|
|
407
|
+
|
|
408
|
+
disable_msvc_warnings(gpt-j)
|
|
409
|
+
disable_msvc_warnings(gpt-j-quantize)
|
|
410
|
+
|
|
411
|
+
disable_msvc_warnings(magika)
|
|
412
|
+
disable_msvc_warnings(yolov3-tiny)
|
|
413
|
+
disable_msvc_warnings(sam)
|
|
414
|
+
|
|
415
|
+
disable_msvc_warnings(simple-ctx)
|
|
416
|
+
disable_msvc_warnings(simple-backend)
|
|
417
|
+
endif()
|
|
418
|
+
|
|
419
|
+
if (GGML_BUILD_TESTS)
|
|
420
|
+
disable_msvc_warnings(test-mul-mat)
|
|
421
|
+
disable_msvc_warnings(test-arange)
|
|
422
|
+
disable_msvc_warnings(test-backend-ops)
|
|
423
|
+
disable_msvc_warnings(test-cont)
|
|
424
|
+
disable_msvc_warnings(test-conv-transpose)
|
|
425
|
+
disable_msvc_warnings(test-conv-transpose-1d)
|
|
426
|
+
disable_msvc_warnings(test-conv1d)
|
|
427
|
+
disable_msvc_warnings(test-conv2d)
|
|
428
|
+
disable_msvc_warnings(test-conv2d-dw)
|
|
429
|
+
disable_msvc_warnings(test-customop)
|
|
430
|
+
disable_msvc_warnings(test-dup)
|
|
431
|
+
disable_msvc_warnings(test-opt)
|
|
432
|
+
disable_msvc_warnings(test-pool)
|
|
433
|
+
endif ()
|
|
389
434
|
endif()
|
|
@@ -489,6 +489,7 @@ extern "C" {
|
|
|
489
489
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
490
490
|
GGML_OP_PAD,
|
|
491
491
|
GGML_OP_PAD_REFLECT_1D,
|
|
492
|
+
GGML_OP_ROLL,
|
|
492
493
|
GGML_OP_ARANGE,
|
|
493
494
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
495
|
GGML_OP_ARGSORT,
|
|
@@ -935,6 +936,15 @@ extern "C" {
|
|
|
935
936
|
struct ggml_tensor * a,
|
|
936
937
|
struct ggml_tensor * b);
|
|
937
938
|
|
|
939
|
+
// repeat a to the specified shape
|
|
940
|
+
GGML_API struct ggml_tensor * ggml_repeat_4d(
|
|
941
|
+
struct ggml_context * ctx,
|
|
942
|
+
struct ggml_tensor * a,
|
|
943
|
+
int64_t ne0,
|
|
944
|
+
int64_t ne1,
|
|
945
|
+
int64_t ne2,
|
|
946
|
+
int64_t ne3);
|
|
947
|
+
|
|
938
948
|
// sums repetitions in a into shape of b
|
|
939
949
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
|
940
950
|
struct ggml_context * ctx,
|
|
@@ -1792,6 +1802,17 @@ extern "C" {
|
|
|
1792
1802
|
int p0,
|
|
1793
1803
|
int p1);
|
|
1794
1804
|
|
|
1805
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1806
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1807
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1808
|
+
struct ggml_context * ctx,
|
|
1809
|
+
struct ggml_tensor * a,
|
|
1810
|
+
int shift0,
|
|
1811
|
+
int shift1,
|
|
1812
|
+
int shift2,
|
|
1813
|
+
int shift3);
|
|
1814
|
+
|
|
1815
|
+
|
|
1795
1816
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1796
1817
|
// timesteps: [N,]
|
|
1797
1818
|
// return: [N, dim]
|
|
@@ -2086,9 +2107,6 @@ extern "C" {
|
|
|
2086
2107
|
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2087
2108
|
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
|
2088
2109
|
|
|
2089
|
-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
2090
|
-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
2091
|
-
|
|
2092
2110
|
// print info and performance information for the graph
|
|
2093
2111
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
2094
2112
|
|
|
@@ -2172,6 +2190,7 @@ extern "C" {
|
|
|
2172
2190
|
|
|
2173
2191
|
// scheduling priorities
|
|
2174
2192
|
enum ggml_sched_priority {
|
|
2193
|
+
GGML_SCHED_PRIO_LOW = -1,
|
|
2175
2194
|
GGML_SCHED_PRIO_NORMAL,
|
|
2176
2195
|
GGML_SCHED_PRIO_MEDIUM,
|
|
2177
2196
|
GGML_SCHED_PRIO_HIGH,
|
|
@@ -109,6 +109,8 @@ if (MSVC)
|
|
|
109
109
|
else ()
|
|
110
110
|
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
|
111
111
|
endif ()
|
|
112
|
+
ggml_get_system_arch()
|
|
113
|
+
message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
|
|
112
114
|
|
|
113
115
|
if (NOT MSVC)
|
|
114
116
|
if (GGML_STATIC)
|
|
@@ -123,7 +125,6 @@ if (NOT MSVC)
|
|
|
123
125
|
endif()
|
|
124
126
|
|
|
125
127
|
if (MINGW)
|
|
126
|
-
# Target Windows 8 for PrefetchVirtualMemory
|
|
127
128
|
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
128
129
|
endif()
|
|
129
130
|
|
|
@@ -194,6 +195,7 @@ add_library(ggml-base
|
|
|
194
195
|
../include/ggml-opt.h
|
|
195
196
|
../include/gguf.h
|
|
196
197
|
ggml.c
|
|
198
|
+
ggml.cpp
|
|
197
199
|
ggml-alloc.c
|
|
198
200
|
ggml-backend.cpp
|
|
199
201
|
ggml-opt.cpp
|
|
@@ -210,6 +212,7 @@ endif()
|
|
|
210
212
|
|
|
211
213
|
add_library(ggml
|
|
212
214
|
ggml-backend-reg.cpp)
|
|
215
|
+
add_library(ggml::ggml ALIAS ggml)
|
|
213
216
|
|
|
214
217
|
target_link_libraries(ggml PUBLIC ggml-base)
|
|
215
218
|
|
|
@@ -224,6 +227,7 @@ function(ggml_add_backend_library backend)
|
|
|
224
227
|
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
|
225
228
|
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
|
226
229
|
add_dependencies(ggml ${backend})
|
|
230
|
+
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
227
231
|
else()
|
|
228
232
|
add_library(${backend} ${ARGN})
|
|
229
233
|
target_link_libraries(ggml PUBLIC ${backend})
|
|
@@ -266,17 +270,27 @@ endfunction()
|
|
|
266
270
|
function(ggml_add_cpu_backend_variant tag_name)
|
|
267
271
|
set(GGML_CPU_TAG_NAME ${tag_name})
|
|
268
272
|
# other: OPENMP LLAMAFILE CPU_HBM
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
273
|
+
if (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
274
|
+
foreach (feat NATIVE
|
|
275
|
+
SSE42
|
|
276
|
+
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
|
277
|
+
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
|
278
|
+
AMX_TILE AMX_INT8 AMX_BF16)
|
|
279
|
+
set(GGML_${feat} OFF)
|
|
280
|
+
endforeach()
|
|
281
|
+
|
|
282
|
+
foreach (feat ${ARGN})
|
|
283
|
+
set(GGML_${feat} ON)
|
|
284
|
+
endforeach()
|
|
285
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
286
|
+
foreach (feat ${ARGN})
|
|
287
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
288
|
+
endforeach()
|
|
289
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
290
|
+
foreach (feat ${ARGN})
|
|
291
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
292
|
+
endforeach()
|
|
293
|
+
endif()
|
|
280
294
|
|
|
281
295
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
282
296
|
endfunction()
|
|
@@ -286,17 +300,62 @@ ggml_add_backend(CPU)
|
|
|
286
300
|
if (GGML_CPU_ALL_VARIANTS)
|
|
287
301
|
if (NOT GGML_BACKEND_DL)
|
|
288
302
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
303
|
+
elseif (GGML_CPU_ARM_ARCH)
|
|
304
|
+
message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
|
|
289
305
|
endif()
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
306
|
+
if (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
307
|
+
ggml_add_cpu_backend_variant(x64)
|
|
308
|
+
ggml_add_cpu_backend_variant(sse42 SSE42)
|
|
309
|
+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
|
310
|
+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
|
|
311
|
+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
|
312
|
+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
313
|
+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
|
314
|
+
if (NOT MSVC)
|
|
315
|
+
# MSVC doesn't support AMX
|
|
316
|
+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
317
|
+
endif()
|
|
318
|
+
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
319
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
320
|
+
# Many of these features are optional so we build versions with popular
|
|
321
|
+
# combinations and name the backends based on the version they were
|
|
322
|
+
# first released with
|
|
323
|
+
ggml_add_cpu_backend_variant(armv8.0_1)
|
|
324
|
+
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
|
|
325
|
+
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
|
|
326
|
+
ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
|
|
327
|
+
ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
|
|
328
|
+
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
|
|
329
|
+
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
|
|
330
|
+
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
|
|
331
|
+
elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
|
|
332
|
+
# Android-specific backends with SoC-compatible feature sets
|
|
333
|
+
ggml_add_cpu_backend_variant(android_armv8.0_1)
|
|
334
|
+
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
|
|
335
|
+
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
|
|
336
|
+
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
|
|
337
|
+
elseif (APPLE)
|
|
338
|
+
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
|
|
339
|
+
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
|
|
340
|
+
ggml_add_cpu_backend_variant(apple_m4 DOTPROD MATMUL_INT8 NOSVE SME)
|
|
341
|
+
else()
|
|
342
|
+
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
|
343
|
+
endif()
|
|
344
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
345
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
346
|
+
ggml_add_cpu_backend_variant(power0)
|
|
347
|
+
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
|
348
|
+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
|
349
|
+
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
|
350
|
+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
|
351
|
+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
|
352
|
+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
|
353
|
+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
|
354
|
+
else()
|
|
355
|
+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
|
356
|
+
endif()
|
|
357
|
+
else()
|
|
358
|
+
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
|
300
359
|
endif()
|
|
301
360
|
elseif (GGML_CPU)
|
|
302
361
|
ggml_add_cpu_backend_variant_impl("")
|