llama-cpp-capacitor 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/LICENSE +21 -0
- package/cpp/README.md +4 -0
- package/cpp/anyascii.c +22223 -0
- package/cpp/anyascii.h +42 -0
- package/cpp/chat-parser.cpp +393 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +2315 -0
- package/cpp/chat.h +221 -0
- package/cpp/common.cpp +1619 -0
- package/cpp/common.h +744 -0
- package/cpp/ggml-alloc.c +1028 -0
- package/cpp/ggml-alloc.h +76 -0
- package/cpp/ggml-backend-impl.h +255 -0
- package/cpp/ggml-backend-reg.cpp +600 -0
- package/cpp/ggml-backend.cpp +2118 -0
- package/cpp/ggml-backend.h +354 -0
- package/cpp/ggml-common.h +1878 -0
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +3650 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +1891 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml-cpu/arch-fallback.h +215 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +73 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +525 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3578 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +672 -0
- package/cpp/ggml-cpu/ops.cpp +10587 -0
- package/cpp/ggml-cpu/ops.h +114 -0
- package/cpp/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml-cpu/quants.h +97 -0
- package/cpp/ggml-cpu/repack.cpp +1982 -0
- package/cpp/ggml-cpu/repack.h +120 -0
- package/cpp/ggml-cpu/simd-mappings.h +1184 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +348 -0
- package/cpp/ggml-cpu/vec.h +1121 -0
- package/cpp/ggml-cpu.h +145 -0
- package/cpp/ggml-impl.h +622 -0
- package/cpp/ggml-metal-impl.h +688 -0
- package/cpp/ggml-metal.h +66 -0
- package/cpp/ggml-metal.m +6833 -0
- package/cpp/ggml-opt.cpp +1093 -0
- package/cpp/ggml-opt.h +256 -0
- package/cpp/ggml-quants.c +5324 -0
- package/cpp/ggml-quants.h +106 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml.c +7108 -0
- package/cpp/ggml.h +2492 -0
- package/cpp/gguf.cpp +1358 -0
- package/cpp/gguf.h +202 -0
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +985 -0
- package/cpp/json-schema-to-grammar.h +21 -0
- package/cpp/llama-adapter.cpp +388 -0
- package/cpp/llama-adapter.h +76 -0
- package/cpp/llama-arch.cpp +2355 -0
- package/cpp/llama-arch.h +499 -0
- package/cpp/llama-batch.cpp +875 -0
- package/cpp/llama-batch.h +160 -0
- package/cpp/llama-chat.cpp +783 -0
- package/cpp/llama-chat.h +65 -0
- package/cpp/llama-context.cpp +2748 -0
- package/cpp/llama-context.h +306 -0
- package/cpp/llama-cparams.cpp +5 -0
- package/cpp/llama-cparams.h +41 -0
- package/cpp/llama-cpp.h +30 -0
- package/cpp/llama-grammar.cpp +1229 -0
- package/cpp/llama-grammar.h +173 -0
- package/cpp/llama-graph.cpp +1891 -0
- package/cpp/llama-graph.h +810 -0
- package/cpp/llama-hparams.cpp +180 -0
- package/cpp/llama-hparams.h +233 -0
- package/cpp/llama-impl.cpp +167 -0
- package/cpp/llama-impl.h +61 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache-iswa.cpp +318 -0
- package/cpp/llama-kv-cache-iswa.h +135 -0
- package/cpp/llama-kv-cache.cpp +2059 -0
- package/cpp/llama-kv-cache.h +374 -0
- package/cpp/llama-kv-cells.h +491 -0
- package/cpp/llama-memory-hybrid.cpp +258 -0
- package/cpp/llama-memory-hybrid.h +137 -0
- package/cpp/llama-memory-recurrent.cpp +1146 -0
- package/cpp/llama-memory-recurrent.h +179 -0
- package/cpp/llama-memory.cpp +59 -0
- package/cpp/llama-memory.h +119 -0
- package/cpp/llama-mmap.cpp +600 -0
- package/cpp/llama-mmap.h +68 -0
- package/cpp/llama-model-loader.cpp +1164 -0
- package/cpp/llama-model-loader.h +170 -0
- package/cpp/llama-model-saver.cpp +282 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +19042 -0
- package/cpp/llama-model.h +491 -0
- package/cpp/llama-sampling.cpp +2575 -0
- package/cpp/llama-sampling.h +32 -0
- package/cpp/llama-vocab.cpp +3792 -0
- package/cpp/llama-vocab.h +176 -0
- package/cpp/llama.cpp +358 -0
- package/cpp/llama.h +1373 -0
- package/cpp/log.cpp +427 -0
- package/cpp/log.h +103 -0
- package/cpp/minja/chat-template.hpp +550 -0
- package/cpp/minja/minja.hpp +3009 -0
- package/cpp/nlohmann/json.hpp +25526 -0
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-completion.cpp +681 -0
- package/cpp/rn-completion.h +116 -0
- package/cpp/rn-llama.cpp +345 -0
- package/cpp/rn-llama.h +149 -0
- package/cpp/rn-mtmd.hpp +602 -0
- package/cpp/rn-tts.cpp +591 -0
- package/cpp/rn-tts.h +59 -0
- package/cpp/sampling.cpp +579 -0
- package/cpp/sampling.h +107 -0
- package/cpp/tools/mtmd/clip-impl.h +473 -0
- package/cpp/tools/mtmd/clip.cpp +4322 -0
- package/cpp/tools/mtmd/clip.h +106 -0
- package/cpp/tools/mtmd/miniaudio/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +769 -0
- package/cpp/tools/mtmd/mtmd-audio.h +47 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +460 -0
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +1066 -0
- package/cpp/tools/mtmd/mtmd.h +298 -0
- package/cpp/tools/mtmd/stb/stb_image.h +7988 -0
- package/cpp/unicode-data.cpp +7034 -0
- package/cpp/unicode-data.h +20 -0
- package/cpp/unicode.cpp +1061 -0
- package/cpp/unicode.h +68 -0
- package/package.json +2 -1
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "common.h"
|
|
3
|
+
|
|
4
|
+
size_t lm_ggml_backend_amx_desired_wsize(const struct lm_ggml_tensor * dst);
|
|
5
|
+
|
|
6
|
+
size_t lm_ggml_backend_amx_get_alloc_size(const struct lm_ggml_tensor * tensor);
|
|
7
|
+
|
|
8
|
+
void lm_ggml_backend_amx_convert_weight(struct lm_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
9
|
+
|
|
10
|
+
void lm_ggml_backend_amx_mul_mat(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#include "ggml-backend-impl.h"
|
|
2
|
+
|
|
3
|
+
#if defined(__aarch64__)
|
|
4
|
+
|
|
5
|
+
#if defined(__linux__)
|
|
6
|
+
#include <sys/auxv.h>
|
|
7
|
+
#elif defined(__APPLE__)
|
|
8
|
+
#include <sys/sysctl.h>
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#if !defined(HWCAP2_I8MM)
|
|
12
|
+
#define HWCAP2_I8MM (1 << 13)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#if !defined(HWCAP2_SME)
|
|
16
|
+
#define HWCAP2_SME (1 << 23)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
struct aarch64_features {
|
|
20
|
+
// has_neon not needed, aarch64 has NEON guaranteed
|
|
21
|
+
bool has_dotprod = false;
|
|
22
|
+
bool has_fp16_va = false;
|
|
23
|
+
bool has_sve = false;
|
|
24
|
+
bool has_sve2 = false;
|
|
25
|
+
bool has_i8mm = false;
|
|
26
|
+
bool has_sme = false;
|
|
27
|
+
|
|
28
|
+
aarch64_features() {
|
|
29
|
+
#if defined(__linux__)
|
|
30
|
+
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
31
|
+
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
32
|
+
|
|
33
|
+
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
34
|
+
has_fp16_va = !!(hwcap & HWCAP_FPHP);
|
|
35
|
+
has_sve = !!(hwcap & HWCAP_SVE);
|
|
36
|
+
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
|
|
37
|
+
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
38
|
+
has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
39
|
+
#elif defined(__APPLE__)
|
|
40
|
+
int oldp = 0;
|
|
41
|
+
size_t size = sizeof(oldp);
|
|
42
|
+
|
|
43
|
+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
|
|
44
|
+
has_dotprod = static_cast<bool>(oldp);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
|
|
48
|
+
has_i8mm = static_cast<bool>(oldp);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
|
|
52
|
+
has_sme = static_cast<bool>(oldp);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Apple apparently does not implement SVE yet
|
|
56
|
+
#endif
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
static int lm_ggml_backend_cpu_aarch64_score() {
|
|
61
|
+
int score = 1;
|
|
62
|
+
aarch64_features af;
|
|
63
|
+
|
|
64
|
+
#ifdef LM_GGML_USE_DOTPROD
|
|
65
|
+
if (!af.has_dotprod) { return 0; }
|
|
66
|
+
score += 1<<1;
|
|
67
|
+
#endif
|
|
68
|
+
#ifdef LM_GGML_USE_FP16_VECTOR_ARITHMETIC
|
|
69
|
+
if (!af.has_fp16_va) { return 0; }
|
|
70
|
+
score += 1<<2;
|
|
71
|
+
#endif
|
|
72
|
+
#ifdef LM_GGML_USE_SVE
|
|
73
|
+
if (!af.has_sve) { return 0; }
|
|
74
|
+
score += 1<<3;
|
|
75
|
+
#endif
|
|
76
|
+
#ifdef LM_GGML_USE_MATMUL_INT8
|
|
77
|
+
if (!af.has_i8mm) { return 0; }
|
|
78
|
+
score += 1<<4;
|
|
79
|
+
#endif
|
|
80
|
+
#ifdef LM_GGML_USE_SVE2
|
|
81
|
+
if (!af.has_sve2) { return 0; }
|
|
82
|
+
score += 1<<5;
|
|
83
|
+
#endif
|
|
84
|
+
#ifdef LM_GGML_USE_SME
|
|
85
|
+
if (!af.has_sme) { return 0; }
|
|
86
|
+
score += 1<<6;
|
|
87
|
+
#endif
|
|
88
|
+
|
|
89
|
+
return score;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
LM_GGML_BACKEND_DL_SCORE_IMPL(lm_ggml_backend_cpu_aarch64_score)
|
|
93
|
+
|
|
94
|
+
# endif // defined(__aarch64__)
|