@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +3 -1
- package/lib/index.js +2 -0
- package/lib/index.ts +3 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +28 -7
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +14 -17
- package/src/common.hpp +7 -6
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
// Rename `_generic` functions if no native implementation is available.
|
|
4
|
+
// This effectively selects the generic implementation.
|
|
5
|
+
|
|
6
|
+
#if defined(GGML_CPU_GENERIC)
|
|
7
|
+
// quants.c
|
|
8
|
+
#define quantize_row_q8_0_generic quantize_row_q8_0
|
|
9
|
+
#define quantize_row_q8_1_generic quantize_row_q8_1
|
|
10
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
11
|
+
#define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0
|
|
12
|
+
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
|
13
|
+
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
|
|
14
|
+
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
|
|
15
|
+
#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
|
|
16
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
17
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
18
|
+
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
|
|
19
|
+
#define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K
|
|
20
|
+
#define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K
|
|
21
|
+
#define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K
|
|
22
|
+
#define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K
|
|
23
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
24
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
25
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
26
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
27
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
28
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
29
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
30
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
31
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
32
|
+
// repack.cpp
|
|
33
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
34
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
35
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
36
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
37
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
38
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
39
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
40
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
41
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
42
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
43
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
44
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
45
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
46
|
+
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
47
|
+
// repack.cpp
|
|
48
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
49
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
50
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
51
|
+
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
52
|
+
// repack.cpp
|
|
53
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
54
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
55
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
56
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
57
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
58
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
59
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
60
|
+
#elif defined(__POWERPC__) || defined(__powerpc__)
|
|
61
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
|
|
62
|
+
// quants.c
|
|
63
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
64
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
65
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
66
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
67
|
+
// repack.cpp
|
|
68
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
69
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
70
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
71
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
72
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
73
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
74
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
75
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
76
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
77
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
78
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
79
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
80
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
81
|
+
#elif defined(__loongarch64)
|
|
82
|
+
// quants.c
|
|
83
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
84
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
85
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
86
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
87
|
+
// repack.cpp
|
|
88
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
89
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
90
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
91
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
92
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
93
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
94
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
95
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
96
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
97
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
98
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
99
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
100
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
101
|
+
#elif defined(__riscv)
|
|
102
|
+
// quants.c
|
|
103
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
104
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
105
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
106
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
107
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
108
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
109
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
110
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
111
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
112
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
113
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
114
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
115
|
+
// repack.cpp
|
|
116
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
117
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
118
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
119
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
120
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
121
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
122
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
123
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
124
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
125
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
126
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
127
|
+
#elif defined(__s390x__)
|
|
128
|
+
// quants.c
|
|
129
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
130
|
+
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
|
|
131
|
+
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
|
|
132
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
133
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
134
|
+
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
|
|
135
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
136
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
137
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
138
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
139
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
140
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
141
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
142
|
+
// repack.cpp
|
|
143
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
144
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
145
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
146
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
147
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
148
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
149
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
150
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
151
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
152
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
153
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
154
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
155
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
156
|
+
#elif defined(__wasm__)
|
|
157
|
+
// quants.c
|
|
158
|
+
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
|
159
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
160
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
161
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
162
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
163
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
164
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
165
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
166
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
167
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
168
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
169
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
170
|
+
// repack.cpp
|
|
171
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
172
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
173
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
174
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
175
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
176
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
177
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
178
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
179
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
180
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
181
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
182
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
183
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
184
|
+
#endif
|
|
@@ -320,21 +320,17 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
|
|
|
320
320
|
|
|
321
321
|
#ifdef __wasm_simd128__
|
|
322
322
|
#include <wasm_simd128.h>
|
|
323
|
-
#
|
|
323
|
+
#endif
|
|
324
|
+
|
|
324
325
|
#ifdef __POWER9_VECTOR__
|
|
325
326
|
#include <altivec.h>
|
|
326
|
-
#
|
|
327
|
+
#endif
|
|
328
|
+
|
|
327
329
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
328
330
|
#include <intrin.h>
|
|
329
|
-
#
|
|
330
|
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
|
|
331
|
-
#if !defined(__riscv)
|
|
331
|
+
#elif defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
|
|
332
332
|
#include <immintrin.h>
|
|
333
333
|
#endif
|
|
334
|
-
#endif
|
|
335
|
-
#endif
|
|
336
|
-
#endif
|
|
337
|
-
#endif
|
|
338
334
|
|
|
339
335
|
#ifdef __riscv_v_intrinsic
|
|
340
336
|
#include <riscv_vector.h>
|
|
@@ -375,7 +371,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
|
|
|
375
371
|
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
|
|
376
372
|
#endif
|
|
377
373
|
|
|
378
|
-
typedef signed
|
|
374
|
+
typedef signed char char8x16_t __attribute__((vector_size(16)));
|
|
379
375
|
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
|
|
380
376
|
|
|
381
377
|
typedef int8_t int8x16_t __attribute__((vector_size(16)));
|
|
@@ -386,10 +382,10 @@ typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
|
|
|
386
382
|
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
|
|
387
383
|
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
|
|
388
384
|
|
|
389
|
-
typedef float
|
|
390
|
-
typedef double double64x2_t
|
|
385
|
+
typedef float float32x4_t __attribute__((vector_size(16)));
|
|
386
|
+
typedef double double64x2_t __attribute__((vector_size(16)));
|
|
391
387
|
|
|
392
|
-
typedef signed
|
|
388
|
+
typedef signed long long long64x2_t __attribute__((vector_size(16)));
|
|
393
389
|
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
|
|
394
390
|
|
|
395
391
|
typedef struct ggml_uint8x16x2_t {
|
|
@@ -507,6 +503,9 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) {
|
|
|
507
503
|
// TODO: move to ggml-threading
|
|
508
504
|
void ggml_barrier(struct ggml_threadpool * tp);
|
|
509
505
|
|
|
506
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value);
|
|
507
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value);
|
|
508
|
+
|
|
510
509
|
#ifdef __cplusplus
|
|
511
510
|
}
|
|
512
511
|
#endif
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
|
|
4
4
|
#include "ggml-backend-impl.h"
|
|
5
5
|
#include "ggml-backend.h"
|
|
6
|
-
#include "
|
|
6
|
+
#include "traits.h"
|
|
7
7
|
#include "ggml-cpu-impl.h"
|
|
8
8
|
#include "ggml-cpu.h"
|
|
9
9
|
#include "ggml-impl.h"
|
|
10
|
-
#include "
|
|
10
|
+
#include "quants.h"
|
|
11
11
|
#include "ggml-threading.h"
|
|
12
12
|
#include "unary-ops.h"
|
|
13
13
|
#include "binary-ops.h"
|
|
@@ -74,13 +74,8 @@
|
|
|
74
74
|
|
|
75
75
|
#if defined(__ARM_ARCH)
|
|
76
76
|
struct ggml_arm_arch_features_type {
|
|
77
|
-
int has_neon;
|
|
78
|
-
int has_dotprod;
|
|
79
|
-
int has_i8mm;
|
|
80
|
-
int has_sve;
|
|
81
77
|
int sve_cnt;
|
|
82
|
-
|
|
83
|
-
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
|
|
78
|
+
} ggml_arm_arch_features = { 0 };
|
|
84
79
|
#endif
|
|
85
80
|
|
|
86
81
|
|
|
@@ -270,7 +265,11 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
|
270
265
|
.from_float = quantize_row_q4_K,
|
|
271
266
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
|
272
267
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
268
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
|
269
|
+
.nrows = 2,
|
|
270
|
+
#else
|
|
273
271
|
.nrows = 1,
|
|
272
|
+
#endif
|
|
274
273
|
},
|
|
275
274
|
[GGML_TYPE_Q5_K] = {
|
|
276
275
|
.from_float = quantize_row_q5_K,
|
|
@@ -555,6 +554,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
|
|
|
555
554
|
#endif
|
|
556
555
|
}
|
|
557
556
|
|
|
557
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
|
|
558
|
+
atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
|
|
562
|
+
return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
|
563
|
+
}
|
|
564
|
+
|
|
558
565
|
#if defined(__gnu_linux__)
|
|
559
566
|
static cpu_set_t ggml_get_numa_affinity(void) {
|
|
560
567
|
cpu_set_t cpuset;
|
|
@@ -666,87 +673,15 @@ bool ggml_is_numa(void) {
|
|
|
666
673
|
|
|
667
674
|
#if defined(__linux__) && defined(__aarch64__)
|
|
668
675
|
#include <sys/auxv.h>
|
|
669
|
-
#elif defined(__APPLE__)
|
|
670
|
-
#include <sys/sysctl.h>
|
|
671
|
-
#endif
|
|
672
|
-
|
|
673
|
-
#if !defined(HWCAP2_I8MM)
|
|
674
|
-
#define HWCAP2_I8MM (1 << 13)
|
|
675
|
-
#endif
|
|
676
|
-
|
|
677
|
-
#if !defined(HWCAP2_SME)
|
|
678
|
-
#define HWCAP2_SME (1 << 23)
|
|
679
676
|
#endif
|
|
680
677
|
|
|
681
678
|
static void ggml_init_arm_arch_features(void) {
|
|
682
|
-
#if defined(__linux__) && defined(__aarch64__)
|
|
683
|
-
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
684
|
-
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
685
|
-
|
|
686
|
-
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
|
687
|
-
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
688
|
-
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
689
|
-
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
|
690
|
-
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
691
|
-
|
|
692
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
679
|
+
#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
|
693
680
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
694
681
|
#endif
|
|
695
|
-
#elif defined(__APPLE__)
|
|
696
|
-
int oldp = 0;
|
|
697
|
-
size_t size = sizeof(oldp);
|
|
698
|
-
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
|
699
|
-
oldp = 0;
|
|
700
|
-
}
|
|
701
|
-
ggml_arm_arch_features.has_neon = oldp;
|
|
702
|
-
|
|
703
|
-
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
|
|
704
|
-
oldp = 0;
|
|
705
|
-
}
|
|
706
|
-
ggml_arm_arch_features.has_dotprod = oldp;
|
|
707
|
-
|
|
708
|
-
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
|
709
|
-
oldp = 0;
|
|
710
|
-
}
|
|
711
|
-
ggml_arm_arch_features.has_i8mm = oldp;
|
|
712
|
-
|
|
713
|
-
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
|
|
714
|
-
oldp = 0;
|
|
715
|
-
}
|
|
716
|
-
ggml_arm_arch_features.has_sme = oldp;
|
|
717
|
-
|
|
718
|
-
ggml_arm_arch_features.has_sve = 0;
|
|
719
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
|
720
|
-
#else
|
|
721
|
-
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
|
722
|
-
#if defined(__ARM_NEON)
|
|
723
|
-
ggml_arm_arch_features.has_neon = 1;
|
|
724
|
-
#else
|
|
725
|
-
ggml_arm_arch_features.has_neon = 0;
|
|
726
|
-
#endif
|
|
727
|
-
|
|
728
|
-
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
|
729
|
-
ggml_arm_arch_features.has_i8mm = 1;
|
|
730
|
-
#else
|
|
731
|
-
ggml_arm_arch_features.has_i8mm = 0;
|
|
732
|
-
#endif
|
|
733
|
-
|
|
734
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
735
|
-
ggml_arm_arch_features.has_sve = 1;
|
|
736
|
-
ggml_arm_arch_features.sve_cnt = 16;
|
|
737
|
-
#else
|
|
738
|
-
ggml_arm_arch_features.has_sve = 0;
|
|
739
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
|
740
|
-
#endif
|
|
741
|
-
|
|
742
|
-
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
|
|
743
|
-
ggml_arm_arch_features.has_sme = 1;
|
|
744
|
-
#else
|
|
745
|
-
ggml_arm_arch_features.has_sme = 0;
|
|
746
|
-
#endif
|
|
747
|
-
#endif
|
|
748
682
|
}
|
|
749
|
-
|
|
683
|
+
|
|
684
|
+
#endif // __ARM_ARCH
|
|
750
685
|
|
|
751
686
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
|
752
687
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
|
@@ -1955,6 +1890,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1955
1890
|
{
|
|
1956
1891
|
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
|
1957
1892
|
} break;
|
|
1893
|
+
case GGML_OP_ROLL:
|
|
1894
|
+
{
|
|
1895
|
+
ggml_compute_forward_roll(params, tensor);
|
|
1896
|
+
} break;
|
|
1958
1897
|
case GGML_OP_ARANGE:
|
|
1959
1898
|
{
|
|
1960
1899
|
ggml_compute_forward_arange(params, tensor);
|
|
@@ -2279,6 +2218,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2279
2218
|
case GGML_OP_UPSCALE:
|
|
2280
2219
|
case GGML_OP_PAD:
|
|
2281
2220
|
case GGML_OP_PAD_REFLECT_1D:
|
|
2221
|
+
case GGML_OP_ROLL:
|
|
2282
2222
|
case GGML_OP_ARANGE:
|
|
2283
2223
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
2284
2224
|
case GGML_OP_ARGSORT:
|
|
@@ -2414,12 +2354,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
|
2414
2354
|
// This is up to the applications.
|
|
2415
2355
|
DWORD p = THREAD_PRIORITY_NORMAL;
|
|
2416
2356
|
switch (prio) {
|
|
2357
|
+
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
|
|
2417
2358
|
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
|
|
2418
2359
|
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
|
|
2419
2360
|
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
|
|
2420
2361
|
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
|
|
2421
2362
|
}
|
|
2422
2363
|
|
|
2364
|
+
if (prio != GGML_SCHED_PRIO_LOW) {
|
|
2365
|
+
// Tell Windows that this thread should not be throttled (needs its own CPU core).
|
|
2366
|
+
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
|
|
2367
|
+
// all our threads onto the first 4 cores which results in terrible performance with
|
|
2368
|
+
// n_threads > 4
|
|
2369
|
+
#if _WIN32_WINNT >= 0x0602
|
|
2370
|
+
THREAD_POWER_THROTTLING_STATE t;
|
|
2371
|
+
ZeroMemory(&t, sizeof(t));
|
|
2372
|
+
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
|
|
2373
|
+
t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
|
|
2374
|
+
t.StateMask = 0;
|
|
2375
|
+
|
|
2376
|
+
if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
|
|
2377
|
+
GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
|
|
2378
|
+
return false;
|
|
2379
|
+
}
|
|
2380
|
+
#endif
|
|
2381
|
+
}
|
|
2382
|
+
|
|
2423
2383
|
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
|
2424
2384
|
// Keep inherited policy/priority
|
|
2425
2385
|
return true;
|
|
@@ -2447,6 +2407,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
|
2447
2407
|
struct sched_param p;
|
|
2448
2408
|
int32_t policy = SCHED_OTHER;
|
|
2449
2409
|
switch (prio) {
|
|
2410
|
+
// TODO: there seems to be no way to set lower prio on Apple platforms
|
|
2411
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
|
2450
2412
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
|
2451
2413
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
|
2452
2414
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
|
@@ -2503,6 +2465,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
|
2503
2465
|
struct sched_param p;
|
|
2504
2466
|
int32_t policy = SCHED_OTHER;
|
|
2505
2467
|
switch (prio) {
|
|
2468
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
|
|
2506
2469
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
|
2507
2470
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
|
2508
2471
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
|
@@ -3408,7 +3371,7 @@ int ggml_cpu_has_vxe(void) {
|
|
|
3408
3371
|
|
|
3409
3372
|
int ggml_cpu_has_neon(void) {
|
|
3410
3373
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
|
3411
|
-
return
|
|
3374
|
+
return 1;
|
|
3412
3375
|
#else
|
|
3413
3376
|
return 0;
|
|
3414
3377
|
#endif
|
|
@@ -3416,7 +3379,7 @@ int ggml_cpu_has_neon(void) {
|
|
|
3416
3379
|
|
|
3417
3380
|
int ggml_cpu_has_dotprod(void) {
|
|
3418
3381
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
|
|
3419
|
-
return
|
|
3382
|
+
return 1;
|
|
3420
3383
|
#else
|
|
3421
3384
|
return 0;
|
|
3422
3385
|
#endif
|
|
@@ -3424,7 +3387,7 @@ int ggml_cpu_has_dotprod(void) {
|
|
|
3424
3387
|
|
|
3425
3388
|
int ggml_cpu_has_sve(void) {
|
|
3426
3389
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
|
3427
|
-
return
|
|
3390
|
+
return 1;
|
|
3428
3391
|
#else
|
|
3429
3392
|
return 0;
|
|
3430
3393
|
#endif
|
|
@@ -3432,7 +3395,7 @@ int ggml_cpu_has_sve(void) {
|
|
|
3432
3395
|
|
|
3433
3396
|
int ggml_cpu_has_matmul_int8(void) {
|
|
3434
3397
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
|
|
3435
|
-
return
|
|
3398
|
+
return 1;
|
|
3436
3399
|
#else
|
|
3437
3400
|
return 0;
|
|
3438
3401
|
#endif
|
|
@@ -3448,7 +3411,7 @@ int ggml_cpu_get_sve_cnt(void) {
|
|
|
3448
3411
|
|
|
3449
3412
|
int ggml_cpu_has_sme(void) {
|
|
3450
3413
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
|
|
3451
|
-
return
|
|
3414
|
+
return 1;
|
|
3452
3415
|
#else
|
|
3453
3416
|
return 0;
|
|
3454
3417
|
#endif
|
|
@@ -3484,6 +3447,19 @@ void ggml_cpu_init(void) {
|
|
|
3484
3447
|
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
|
3485
3448
|
|
|
3486
3449
|
GGML_PRINT_DEBUG("%s: GELU, Quick GELU, SILU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0);
|
|
3450
|
+
|
|
3451
|
+
#ifdef GGML_USE_OPENMP
|
|
3452
|
+
//if (!getenv("OMP_WAIT_POLICY")) {
|
|
3453
|
+
// // set the wait policy to active, so that OpenMP threads don't sleep
|
|
3454
|
+
// putenv("OMP_WAIT_POLICY=active");
|
|
3455
|
+
//}
|
|
3456
|
+
|
|
3457
|
+
if (!getenv("KMP_BLOCKTIME")) {
|
|
3458
|
+
// set the time to wait before sleeping a thread
|
|
3459
|
+
// this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
|
|
3460
|
+
putenv("KMP_BLOCKTIME=200"); // 200ms
|
|
3461
|
+
}
|
|
3462
|
+
#endif
|
|
3487
3463
|
}
|
|
3488
3464
|
|
|
3489
3465
|
#if defined(__ARM_ARCH)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#include "ggml-backend.h"
|
|
2
2
|
#include "ggml-backend-impl.h"
|
|
3
3
|
#include "ggml-cpu.h"
|
|
4
|
-
#include "
|
|
5
|
-
#include "
|
|
4
|
+
#include "repack.h"
|
|
5
|
+
#include "traits.h"
|
|
6
6
|
#include "ggml-impl.h"
|
|
7
7
|
#include "amx/amx.h"
|
|
8
8
|
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#include <vector>
|
|
12
12
|
|
|
13
13
|
#ifdef GGML_USE_CPU_HBM
|
|
14
|
-
# include "
|
|
14
|
+
# include "hbm.h"
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
17
|
#ifdef GGML_USE_CPU_KLEIDIAI
|
|
@@ -51,9 +51,9 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|
|
51
51
|
}
|
|
52
52
|
#endif
|
|
53
53
|
|
|
54
|
-
#ifdef
|
|
55
|
-
if (
|
|
56
|
-
bufts.push_back(
|
|
54
|
+
#ifdef GGML_USE_CPU_REPACK
|
|
55
|
+
if (ggml_backend_cpu_repack_buffer_type()) {
|
|
56
|
+
bufts.push_back(ggml_backend_cpu_repack_buffer_type());
|
|
57
57
|
}
|
|
58
58
|
#endif
|
|
59
59
|
|
|
@@ -596,8 +596,8 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
596
596
|
#ifdef GGML_USE_CPU_KLEIDIAI
|
|
597
597
|
features.push_back({ "KLEIDIAI", "1" });
|
|
598
598
|
#endif
|
|
599
|
-
#ifdef
|
|
600
|
-
features.push_back({ "
|
|
599
|
+
#ifdef GGML_USE_CPU_REPACK
|
|
600
|
+
features.push_back({ "REPACK", "1" });
|
|
601
601
|
#endif
|
|
602
602
|
|
|
603
603
|
features.push_back({ nullptr, nullptr });
|