llama-cpp-capacitor 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/cpp/LICENSE +21 -0
  2. package/cpp/README.md +4 -0
  3. package/cpp/anyascii.c +22223 -0
  4. package/cpp/anyascii.h +42 -0
  5. package/cpp/chat-parser.cpp +393 -0
  6. package/cpp/chat-parser.h +120 -0
  7. package/cpp/chat.cpp +2315 -0
  8. package/cpp/chat.h +221 -0
  9. package/cpp/common.cpp +1619 -0
  10. package/cpp/common.h +744 -0
  11. package/cpp/ggml-alloc.c +1028 -0
  12. package/cpp/ggml-alloc.h +76 -0
  13. package/cpp/ggml-backend-impl.h +255 -0
  14. package/cpp/ggml-backend-reg.cpp +600 -0
  15. package/cpp/ggml-backend.cpp +2118 -0
  16. package/cpp/ggml-backend.h +354 -0
  17. package/cpp/ggml-common.h +1878 -0
  18. package/cpp/ggml-cpp.h +39 -0
  19. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  20. package/cpp/ggml-cpu/amx/amx.h +8 -0
  21. package/cpp/ggml-cpu/amx/common.h +91 -0
  22. package/cpp/ggml-cpu/amx/mmq.cpp +2512 -0
  23. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  24. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  25. package/cpp/ggml-cpu/arch/arm/quants.c +3650 -0
  26. package/cpp/ggml-cpu/arch/arm/repack.cpp +1891 -0
  27. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  28. package/cpp/ggml-cpu/arch/x86/quants.c +3820 -0
  29. package/cpp/ggml-cpu/arch/x86/repack.cpp +6307 -0
  30. package/cpp/ggml-cpu/arch-fallback.h +215 -0
  31. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  32. package/cpp/ggml-cpu/binary-ops.h +16 -0
  33. package/cpp/ggml-cpu/common.h +73 -0
  34. package/cpp/ggml-cpu/ggml-cpu-impl.h +525 -0
  35. package/cpp/ggml-cpu/ggml-cpu.c +3578 -0
  36. package/cpp/ggml-cpu/ggml-cpu.cpp +672 -0
  37. package/cpp/ggml-cpu/ops.cpp +10587 -0
  38. package/cpp/ggml-cpu/ops.h +114 -0
  39. package/cpp/ggml-cpu/quants.c +1193 -0
  40. package/cpp/ggml-cpu/quants.h +97 -0
  41. package/cpp/ggml-cpu/repack.cpp +1982 -0
  42. package/cpp/ggml-cpu/repack.h +120 -0
  43. package/cpp/ggml-cpu/simd-mappings.h +1184 -0
  44. package/cpp/ggml-cpu/traits.cpp +36 -0
  45. package/cpp/ggml-cpu/traits.h +38 -0
  46. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  47. package/cpp/ggml-cpu/unary-ops.h +28 -0
  48. package/cpp/ggml-cpu/vec.cpp +348 -0
  49. package/cpp/ggml-cpu/vec.h +1121 -0
  50. package/cpp/ggml-cpu.h +145 -0
  51. package/cpp/ggml-impl.h +622 -0
  52. package/cpp/ggml-metal-impl.h +688 -0
  53. package/cpp/ggml-metal.h +66 -0
  54. package/cpp/ggml-metal.m +6833 -0
  55. package/cpp/ggml-opt.cpp +1093 -0
  56. package/cpp/ggml-opt.h +256 -0
  57. package/cpp/ggml-quants.c +5324 -0
  58. package/cpp/ggml-quants.h +106 -0
  59. package/cpp/ggml-threading.cpp +12 -0
  60. package/cpp/ggml-threading.h +14 -0
  61. package/cpp/ggml.c +7108 -0
  62. package/cpp/ggml.h +2492 -0
  63. package/cpp/gguf.cpp +1358 -0
  64. package/cpp/gguf.h +202 -0
  65. package/cpp/json-partial.cpp +256 -0
  66. package/cpp/json-partial.h +38 -0
  67. package/cpp/json-schema-to-grammar.cpp +985 -0
  68. package/cpp/json-schema-to-grammar.h +21 -0
  69. package/cpp/llama-adapter.cpp +388 -0
  70. package/cpp/llama-adapter.h +76 -0
  71. package/cpp/llama-arch.cpp +2355 -0
  72. package/cpp/llama-arch.h +499 -0
  73. package/cpp/llama-batch.cpp +875 -0
  74. package/cpp/llama-batch.h +160 -0
  75. package/cpp/llama-chat.cpp +783 -0
  76. package/cpp/llama-chat.h +65 -0
  77. package/cpp/llama-context.cpp +2748 -0
  78. package/cpp/llama-context.h +306 -0
  79. package/cpp/llama-cparams.cpp +5 -0
  80. package/cpp/llama-cparams.h +41 -0
  81. package/cpp/llama-cpp.h +30 -0
  82. package/cpp/llama-grammar.cpp +1229 -0
  83. package/cpp/llama-grammar.h +173 -0
  84. package/cpp/llama-graph.cpp +1891 -0
  85. package/cpp/llama-graph.h +810 -0
  86. package/cpp/llama-hparams.cpp +180 -0
  87. package/cpp/llama-hparams.h +233 -0
  88. package/cpp/llama-impl.cpp +167 -0
  89. package/cpp/llama-impl.h +61 -0
  90. package/cpp/llama-io.cpp +15 -0
  91. package/cpp/llama-io.h +35 -0
  92. package/cpp/llama-kv-cache-iswa.cpp +318 -0
  93. package/cpp/llama-kv-cache-iswa.h +135 -0
  94. package/cpp/llama-kv-cache.cpp +2059 -0
  95. package/cpp/llama-kv-cache.h +374 -0
  96. package/cpp/llama-kv-cells.h +491 -0
  97. package/cpp/llama-memory-hybrid.cpp +258 -0
  98. package/cpp/llama-memory-hybrid.h +137 -0
  99. package/cpp/llama-memory-recurrent.cpp +1146 -0
  100. package/cpp/llama-memory-recurrent.h +179 -0
  101. package/cpp/llama-memory.cpp +59 -0
  102. package/cpp/llama-memory.h +119 -0
  103. package/cpp/llama-mmap.cpp +600 -0
  104. package/cpp/llama-mmap.h +68 -0
  105. package/cpp/llama-model-loader.cpp +1164 -0
  106. package/cpp/llama-model-loader.h +170 -0
  107. package/cpp/llama-model-saver.cpp +282 -0
  108. package/cpp/llama-model-saver.h +37 -0
  109. package/cpp/llama-model.cpp +19042 -0
  110. package/cpp/llama-model.h +491 -0
  111. package/cpp/llama-sampling.cpp +2575 -0
  112. package/cpp/llama-sampling.h +32 -0
  113. package/cpp/llama-vocab.cpp +3792 -0
  114. package/cpp/llama-vocab.h +176 -0
  115. package/cpp/llama.cpp +358 -0
  116. package/cpp/llama.h +1373 -0
  117. package/cpp/log.cpp +427 -0
  118. package/cpp/log.h +103 -0
  119. package/cpp/minja/chat-template.hpp +550 -0
  120. package/cpp/minja/minja.hpp +3009 -0
  121. package/cpp/nlohmann/json.hpp +25526 -0
  122. package/cpp/nlohmann/json_fwd.hpp +187 -0
  123. package/cpp/regex-partial.cpp +204 -0
  124. package/cpp/regex-partial.h +56 -0
  125. package/cpp/rn-completion.cpp +681 -0
  126. package/cpp/rn-completion.h +116 -0
  127. package/cpp/rn-llama.cpp +345 -0
  128. package/cpp/rn-llama.h +149 -0
  129. package/cpp/rn-mtmd.hpp +602 -0
  130. package/cpp/rn-tts.cpp +591 -0
  131. package/cpp/rn-tts.h +59 -0
  132. package/cpp/sampling.cpp +579 -0
  133. package/cpp/sampling.h +107 -0
  134. package/cpp/tools/mtmd/clip-impl.h +473 -0
  135. package/cpp/tools/mtmd/clip.cpp +4322 -0
  136. package/cpp/tools/mtmd/clip.h +106 -0
  137. package/cpp/tools/mtmd/miniaudio/miniaudio.h +93468 -0
  138. package/cpp/tools/mtmd/mtmd-audio.cpp +769 -0
  139. package/cpp/tools/mtmd/mtmd-audio.h +47 -0
  140. package/cpp/tools/mtmd/mtmd-helper.cpp +460 -0
  141. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  142. package/cpp/tools/mtmd/mtmd.cpp +1066 -0
  143. package/cpp/tools/mtmd/mtmd.h +298 -0
  144. package/cpp/tools/mtmd/stb/stb_image.h +7988 -0
  145. package/cpp/unicode-data.cpp +7034 -0
  146. package/cpp/unicode-data.h +20 -0
  147. package/cpp/unicode.cpp +1061 -0
  148. package/cpp/unicode.h +68 -0
  149. package/package.json +2 -1
@@ -0,0 +1,120 @@
1
+ #pragma once
2
+
3
+ #define LM_GGML_COMMON_DECL_CPP
4
+ #include "ggml-common.h"
5
+
6
+ #include "traits.h"
7
+ #include "ggml.h"
8
+
9
+ // GGML internal header
10
+
11
+ lm_ggml_backend_buffer_type_t lm_ggml_backend_cpu_repack_buffer_type(void);
12
+
13
+ template <int K> constexpr int QK_0() {
14
+ if constexpr (K == 4) {
15
+ return QK4_0;
16
+ }
17
+ if constexpr (K == 8) {
18
+ return QK8_0;
19
+ }
20
+ return -1;
21
+ }
22
+
23
+ template <int K, int N> struct block {
24
+ lm_ggml_half d[N]; // deltas for N qK_0 blocks
25
+ int8_t qs[(QK_0<K>() * N * K) / 8]; // quants for N qK_0 blocks
26
+ };
27
+
28
+ // control size
29
+ static_assert(sizeof(block<4, 4>) == 4 * sizeof(lm_ggml_half) + QK8_0 * 2, "wrong block<4,4> size/padding");
30
+ static_assert(sizeof(block<4, 8>) == 8 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong block<4,8> size/padding");
31
+ static_assert(sizeof(block<8, 4>) == 4 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong block<8,4> size/padding");
32
+ static_assert(sizeof(block<8, 8>) == 8 * sizeof(lm_ggml_half) + QK8_0 * 8, "wrong block<8,8> size/padding");
33
+
34
+ using block_q4_0x4 = block<4, 4>;
35
+ using block_q4_0x8 = block<4, 8>;
36
+ using block_q8_0x4 = block<8, 4>;
37
+ using block_q8_0x8 = block<8, 8>;
38
+
39
+ struct block_q4_Kx8 {
40
+ lm_ggml_half d[8]; // super-block scale for quantized scales
41
+ lm_ggml_half dmin[8]; // super-block scale for quantized mins
42
+ uint8_t scales[96]; // scales and mins, quantized with 6 bits
43
+ uint8_t qs[1024]; // 4--bit quants
44
+ };
45
+
46
+ static_assert(sizeof(block_q4_Kx8) == sizeof(lm_ggml_half) * 16 + K_SCALE_SIZE * 8 + QK_K * 4, "wrong q4_K block size/padding");
47
+ struct block_q2_Kx8 {
48
+ lm_ggml_half d[8]; // super-block scale for quantized scales
49
+ lm_ggml_half dmin[8]; // super-block scale for quantized mins
50
+ uint8_t scales[128]; // scales and mins, quantized with 4 bits
51
+ uint8_t qs[512]; // 2--bit quants
52
+ };
53
+
54
+ static_assert(sizeof(block_q2_Kx8) == sizeof(lm_ggml_half) * 16 + QK_K/2 + QK_K * 2, "wrong q2_K block size/padding");
55
+ struct block_q8_Kx4 {
56
+ float d[4]; // delta
57
+ int8_t qs[QK_K * 4]; // quants
58
+ int16_t bsums[QK_K / 4]; // sum of quants in groups of 16
59
+ };
60
+
61
+ static_assert(sizeof(block_q8_Kx4) == sizeof(float) * 4 + QK_K * 4 + (QK_K / 4) * sizeof(int16_t), "wrong q8_K block size/padding");
62
+
63
+ struct block_iq4_nlx4 {
64
+ lm_ggml_half d[4]; // deltas for 4 iq4_nl blocks
65
+ uint8_t qs[QK4_NL * 2]; // nibbles / quants for 4 iq4_nl blocks
66
+ };
67
+
68
+ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
69
+
70
+ struct block_iq4_nlx8 {
71
+ lm_ggml_half d[8]; // deltas for 8 iq4_nl blocks
72
+ uint8_t qs[QK4_NL * 4]; // nibbles / quants for 8 iq4_nl blocks
73
+ };
74
+
75
+ static_assert(sizeof(block_iq4_nlx8) == 8 * sizeof(lm_ggml_half) + QK4_NL * 4, "wrong iq4_nlx8 block size/padding");
76
+
77
+ #if defined(__cplusplus)
78
+ extern "C" {
79
+ #endif
80
+
81
+ void lm_ggml_quantize_mat_q8_0_4x4(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
82
+ void lm_ggml_quantize_mat_q8_0_4x8(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
83
+ void lm_ggml_quantize_mat_q8_K_4x8(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
84
+ void lm_ggml_gemv_q4_0_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
85
+ void lm_ggml_gemv_q4_0_4x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
86
+ void lm_ggml_gemv_q4_0_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
87
+ void lm_ggml_gemv_q4_K_8x8_q8_K(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
88
+ void lm_ggml_gemv_q2_K_8x8_q8_K(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
89
+ void lm_ggml_gemv_iq4_nl_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
90
+ void lm_ggml_gemv_iq4_nl_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
91
+ void lm_ggml_gemm_q4_0_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
92
+ void lm_ggml_gemm_q4_0_4x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
93
+ void lm_ggml_gemm_q4_0_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
94
+ void lm_ggml_gemm_q4_K_8x8_q8_K(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
95
+ void lm_ggml_gemm_q2_K_8x8_q8_K(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
96
+ void lm_ggml_gemm_iq4_nl_4x4_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
97
+ void lm_ggml_gemm_iq4_nl_8x8_q8_0(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
98
+
99
+ // Native implementations
100
+ void lm_ggml_quantize_mat_q8_0_4x4_generic(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
101
+ void lm_ggml_quantize_mat_q8_0_4x8_generic(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
102
+ void lm_ggml_quantize_mat_q8_K_4x8_generic(const float * LM_GGML_RESTRICT x, void * LM_GGML_RESTRICT vy, int64_t k);
103
+ void lm_ggml_gemv_q4_0_4x4_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
104
+ void lm_ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
105
+ void lm_ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
106
+ void lm_ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
107
+ void lm_ggml_gemv_q2_K_8x8_q8_K_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
108
+ void lm_ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
109
+ void lm_ggml_gemv_iq4_nl_8x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
110
+ void lm_ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
111
+ void lm_ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
112
+ void lm_ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
113
+ void lm_ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
114
+ void lm_ggml_gemm_q2_K_8x8_q8_K_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
115
+ void lm_ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
116
+ void lm_ggml_gemm_iq4_nl_8x8_q8_0_generic(int n, float * LM_GGML_RESTRICT s, size_t bs, const void * LM_GGML_RESTRICT vx, const void * LM_GGML_RESTRICT vy, int nr, int nc);
117
+
118
+ #if defined(__cplusplus)
119
+ } // extern "C"
120
+ #endif