llama-cpp-capacitor 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/cpp/LICENSE +21 -0
  2. package/cpp/README.md +4 -0
  3. package/cpp/anyascii.c +22223 -0
  4. package/cpp/anyascii.h +42 -0
  5. package/cpp/chat-parser.cpp +393 -0
  6. package/cpp/chat-parser.h +120 -0
  7. package/cpp/chat.cpp +2315 -0
  8. package/cpp/chat.h +221 -0
  9. package/cpp/common.cpp +1619 -0
  10. package/cpp/common.h +744 -0
  11. package/cpp/ggml-alloc.c +1028 -0
  12. package/cpp/ggml-alloc.h +76 -0
  13. package/cpp/ggml-backend-impl.h +255 -0
  14. package/cpp/ggml-backend-reg.cpp +600 -0
  15. package/cpp/ggml-backend.cpp +2118 -0
  16. package/cpp/ggml-backend.h +354 -0
  17. package/cpp/ggml-common.h +1878 -0
  18. package/cpp/ggml-cpp.h +39 -0
  19. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  20. package/cpp/ggml-cpu/amx/amx.h +8 -0
  21. package/cpp/ggml-cpu/amx/common.h +91 -0
  22. package/cpp/ggml-cpu/amx/mmq.cpp +2512 -0
  23. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  24. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  25. package/cpp/ggml-cpu/arch/arm/quants.c +3650 -0
  26. package/cpp/ggml-cpu/arch/arm/repack.cpp +1891 -0
  27. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  28. package/cpp/ggml-cpu/arch/x86/quants.c +3820 -0
  29. package/cpp/ggml-cpu/arch/x86/repack.cpp +6307 -0
  30. package/cpp/ggml-cpu/arch-fallback.h +215 -0
  31. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  32. package/cpp/ggml-cpu/binary-ops.h +16 -0
  33. package/cpp/ggml-cpu/common.h +73 -0
  34. package/cpp/ggml-cpu/ggml-cpu-impl.h +525 -0
  35. package/cpp/ggml-cpu/ggml-cpu.c +3578 -0
  36. package/cpp/ggml-cpu/ggml-cpu.cpp +672 -0
  37. package/cpp/ggml-cpu/ops.cpp +10587 -0
  38. package/cpp/ggml-cpu/ops.h +114 -0
  39. package/cpp/ggml-cpu/quants.c +1193 -0
  40. package/cpp/ggml-cpu/quants.h +97 -0
  41. package/cpp/ggml-cpu/repack.cpp +1982 -0
  42. package/cpp/ggml-cpu/repack.h +120 -0
  43. package/cpp/ggml-cpu/simd-mappings.h +1184 -0
  44. package/cpp/ggml-cpu/traits.cpp +36 -0
  45. package/cpp/ggml-cpu/traits.h +38 -0
  46. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  47. package/cpp/ggml-cpu/unary-ops.h +28 -0
  48. package/cpp/ggml-cpu/vec.cpp +348 -0
  49. package/cpp/ggml-cpu/vec.h +1121 -0
  50. package/cpp/ggml-cpu.h +145 -0
  51. package/cpp/ggml-impl.h +622 -0
  52. package/cpp/ggml-metal-impl.h +688 -0
  53. package/cpp/ggml-metal.h +66 -0
  54. package/cpp/ggml-metal.m +6833 -0
  55. package/cpp/ggml-opt.cpp +1093 -0
  56. package/cpp/ggml-opt.h +256 -0
  57. package/cpp/ggml-quants.c +5324 -0
  58. package/cpp/ggml-quants.h +106 -0
  59. package/cpp/ggml-threading.cpp +12 -0
  60. package/cpp/ggml-threading.h +14 -0
  61. package/cpp/ggml.c +7108 -0
  62. package/cpp/ggml.h +2492 -0
  63. package/cpp/gguf.cpp +1358 -0
  64. package/cpp/gguf.h +202 -0
  65. package/cpp/json-partial.cpp +256 -0
  66. package/cpp/json-partial.h +38 -0
  67. package/cpp/json-schema-to-grammar.cpp +985 -0
  68. package/cpp/json-schema-to-grammar.h +21 -0
  69. package/cpp/llama-adapter.cpp +388 -0
  70. package/cpp/llama-adapter.h +76 -0
  71. package/cpp/llama-arch.cpp +2355 -0
  72. package/cpp/llama-arch.h +499 -0
  73. package/cpp/llama-batch.cpp +875 -0
  74. package/cpp/llama-batch.h +160 -0
  75. package/cpp/llama-chat.cpp +783 -0
  76. package/cpp/llama-chat.h +65 -0
  77. package/cpp/llama-context.cpp +2748 -0
  78. package/cpp/llama-context.h +306 -0
  79. package/cpp/llama-cparams.cpp +5 -0
  80. package/cpp/llama-cparams.h +41 -0
  81. package/cpp/llama-cpp.h +30 -0
  82. package/cpp/llama-grammar.cpp +1229 -0
  83. package/cpp/llama-grammar.h +173 -0
  84. package/cpp/llama-graph.cpp +1891 -0
  85. package/cpp/llama-graph.h +810 -0
  86. package/cpp/llama-hparams.cpp +180 -0
  87. package/cpp/llama-hparams.h +233 -0
  88. package/cpp/llama-impl.cpp +167 -0
  89. package/cpp/llama-impl.h +61 -0
  90. package/cpp/llama-io.cpp +15 -0
  91. package/cpp/llama-io.h +35 -0
  92. package/cpp/llama-kv-cache-iswa.cpp +318 -0
  93. package/cpp/llama-kv-cache-iswa.h +135 -0
  94. package/cpp/llama-kv-cache.cpp +2059 -0
  95. package/cpp/llama-kv-cache.h +374 -0
  96. package/cpp/llama-kv-cells.h +491 -0
  97. package/cpp/llama-memory-hybrid.cpp +258 -0
  98. package/cpp/llama-memory-hybrid.h +137 -0
  99. package/cpp/llama-memory-recurrent.cpp +1146 -0
  100. package/cpp/llama-memory-recurrent.h +179 -0
  101. package/cpp/llama-memory.cpp +59 -0
  102. package/cpp/llama-memory.h +119 -0
  103. package/cpp/llama-mmap.cpp +600 -0
  104. package/cpp/llama-mmap.h +68 -0
  105. package/cpp/llama-model-loader.cpp +1164 -0
  106. package/cpp/llama-model-loader.h +170 -0
  107. package/cpp/llama-model-saver.cpp +282 -0
  108. package/cpp/llama-model-saver.h +37 -0
  109. package/cpp/llama-model.cpp +19042 -0
  110. package/cpp/llama-model.h +491 -0
  111. package/cpp/llama-sampling.cpp +2575 -0
  112. package/cpp/llama-sampling.h +32 -0
  113. package/cpp/llama-vocab.cpp +3792 -0
  114. package/cpp/llama-vocab.h +176 -0
  115. package/cpp/llama.cpp +358 -0
  116. package/cpp/llama.h +1373 -0
  117. package/cpp/log.cpp +427 -0
  118. package/cpp/log.h +103 -0
  119. package/cpp/minja/chat-template.hpp +550 -0
  120. package/cpp/minja/minja.hpp +3009 -0
  121. package/cpp/nlohmann/json.hpp +25526 -0
  122. package/cpp/nlohmann/json_fwd.hpp +187 -0
  123. package/cpp/regex-partial.cpp +204 -0
  124. package/cpp/regex-partial.h +56 -0
  125. package/cpp/rn-completion.cpp +681 -0
  126. package/cpp/rn-completion.h +116 -0
  127. package/cpp/rn-llama.cpp +345 -0
  128. package/cpp/rn-llama.h +149 -0
  129. package/cpp/rn-mtmd.hpp +602 -0
  130. package/cpp/rn-tts.cpp +591 -0
  131. package/cpp/rn-tts.h +59 -0
  132. package/cpp/sampling.cpp +579 -0
  133. package/cpp/sampling.h +107 -0
  134. package/cpp/tools/mtmd/clip-impl.h +473 -0
  135. package/cpp/tools/mtmd/clip.cpp +4322 -0
  136. package/cpp/tools/mtmd/clip.h +106 -0
  137. package/cpp/tools/mtmd/miniaudio/miniaudio.h +93468 -0
  138. package/cpp/tools/mtmd/mtmd-audio.cpp +769 -0
  139. package/cpp/tools/mtmd/mtmd-audio.h +47 -0
  140. package/cpp/tools/mtmd/mtmd-helper.cpp +460 -0
  141. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  142. package/cpp/tools/mtmd/mtmd.cpp +1066 -0
  143. package/cpp/tools/mtmd/mtmd.h +298 -0
  144. package/cpp/tools/mtmd/stb/stb_image.h +7988 -0
  145. package/cpp/unicode-data.cpp +7034 -0
  146. package/cpp/unicode-data.h +20 -0
  147. package/cpp/unicode.cpp +1061 -0
  148. package/cpp/unicode.h +68 -0
  149. package/package.json +2 -1
@@ -0,0 +1,160 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include "llama-cparams.h"
6
+
7
+ #include <array>
8
+ #include <vector>
9
+ #include <set>
10
+ #include <bitset>
11
+ #include <memory>
12
+ #include <unordered_map>
13
+
14
+ // keep this struct lightweight
15
+ struct llama_ubatch {
16
+ bool equal_seqs() const {
17
+ return b_equal_seqs != 0;
18
+ }
19
+
20
+ uint32_t b_equal_seqs; // note: this is a boolean, but we use an int32_t for alignment
21
+ // otherwise address sanitizer complains
22
+ // TODO: whole_seqs for embeddings?
23
+
24
+ uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
25
+ uint32_t n_seq_tokens; // tokens per sequence set
26
+ uint32_t n_seqs; // sequence sets in the ubatch
27
+ uint32_t n_seqs_unq; // unique sequence ids in the ubatch
28
+
29
+ // seq_id_unq: unique sequence ids in the ubatch
30
+ // seq_idx: indices of the unique sequence ids in the ubatch in [0, n_seqs_unq)
31
+ // used for extracting sequence pooled embeddings
32
+
33
+ // // size | idx | val
34
+ llama_token * token; // [n_tokens] | i | id, token
35
+ float * embd; // [n_embd, n_tokens] | i | embd
36
+ llama_pos * pos; // [n_tokens] | i | pos
37
+ int32_t * n_seq_id; // [n_tokens] | i | -
38
+ llama_seq_id ** seq_id; // [n_tokens] | s | s0, s1, seq_id
39
+ llama_seq_id * seq_id_unq; // [n_seqs_unq] | s | seq_id
40
+ int32_t * seq_idx; // [LLAMA_MAX_SEQ] | - | seq_idx
41
+ int8_t * output; // [n_tokens] | i | -
42
+
43
+ struct data_t {
44
+ std::vector<llama_token> token;
45
+ std::vector<float> embd;
46
+ std::vector<llama_pos> pos;
47
+ std::vector<int32_t> n_seq_id;
48
+ std::vector<llama_seq_id *> seq_id;
49
+ std::vector<llama_seq_id> seq_id_unq;
50
+ std::vector<int32_t> seq_idx;
51
+ std::vector<int8_t> output;
52
+ };
53
+
54
+ // the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data
55
+ std::shared_ptr<data_t> data;
56
+ };
57
+
58
+ // a helper for sanitizing, fulfilling and splitting a batch
59
+ class llama_batch_allocr {
60
+ public:
61
+ llama_batch_allocr(uint32_t n_pos_per_embd);
62
+
63
+ // sanitize and auto-gen missing data in the input batch
64
+ // memory is optional. if provided will be used to check for sequence continuity and to determine the positions
65
+ bool init(
66
+ const llama_batch & batch_inp,
67
+ const llama_vocab & vocab,
68
+ const llama_memory_i * memory,
69
+ uint32_t n_embd,
70
+ uint32_t n_seq_max,
71
+ bool output_all);
72
+
73
+ const llama_batch & get_batch() const;
74
+
75
+ uint32_t get_n_tokens() const;
76
+ uint32_t get_n_outputs() const;
77
+ uint32_t get_n_used() const;
78
+
79
+ // the array of output indices in the order they were encountered during the ubatch splitting
80
+ std::vector<int32_t> & get_out_ids();
81
+
82
+ // min/max positions of each sequence in the current ubatch
83
+ llama_pos seq_pos_min(llama_seq_id seq_id) const;
84
+ llama_pos seq_pos_max(llama_seq_id seq_id) const;
85
+
86
+ // call once before splitting the batch to reset the internal state
87
+ void split_reset();
88
+
89
+ // simple split, unknown number of sequence sets of unequal lengths
90
+ llama_ubatch split_simple(uint32_t n_ubatch);
91
+
92
+ // make ubatches of equal-length sequences sets
93
+ // if sequential == true, the tokens in the ubatch will have increasing sequential sequence ids
94
+ llama_ubatch split_equal(uint32_t n_ubatch, bool sequential);
95
+
96
+ // sequence-set-wise split - each ubatch contains a single sequence-set
97
+ llama_ubatch split_seq(uint32_t n_ubatch);
98
+
99
+ // a helper method for creating a well-defined ubatch of tokens
100
+ // TODO: support embeddings if needed in the future
101
+ llama_ubatch ubatch_reserve(uint32_t n_seq_tokens, uint32_t n_seqs);
102
+
103
+ private:
104
+ void clear();
105
+
106
+ // create the next ubatch based on the provided batch indices (idxs) and the number of sequence sets (n_seqs)
107
+ // return llama_ubatch.n_tokens == 0 if the entire batch was consumed
108
+ llama_ubatch ubatch_add(const std::vector<int32_t> & idxs, uint32_t n_seqs, bool equal_seqs);
109
+
110
+ // for debugging, start with LLAMA_BATCH_DEBUG=2
111
+ void ubatch_print(const llama_ubatch & ubatch, int debug);
112
+
113
+ llama_batch batch;
114
+
115
+ // only for debugging purposes
116
+ const llama_vocab * vocab;
117
+
118
+ // TODO: this is more of a temporary solution until we have a better way to handle multiple positions per token/embd
119
+ // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
120
+ const uint32_t n_pos_per_embd;
121
+
122
+ uint32_t n_embd;
123
+ uint32_t n_seq_max;
124
+ uint32_t n_outputs;
125
+
126
+ std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
127
+
128
+ std::vector<llama_pos> pos;
129
+ std::vector<int32_t> n_seq_id;
130
+ std::vector<llama_seq_id *> seq_id;
131
+ std::vector<llama_seq_id> seq_id_unq;
132
+ std::vector<int32_t> seq_idx;
133
+ std::vector<int8_t> output;
134
+
135
+ using pos_set_t = std::set<llama_pos>;
136
+ using seq_cpl_t = std::vector<bool>;
137
+
138
+ // helper flag to quickly determine if there are any coupled sequences in the batch
139
+ bool has_cpl = false;
140
+
141
+ std::vector<pos_set_t> seq_pos; // seq_pos[s]: the set of positions in sequence s
142
+ std::vector<seq_cpl_t> seq_cpl; // seq_cpl[s0][s1]: if sequence s0 is coupled to sequence s1
143
+
144
+ using idx_vec_t = std::vector<int32_t>;
145
+ using seq_set_t = std::bitset<LLAMA_MAX_SEQ>;
146
+
147
+ std::vector<seq_set_t> seq_set; // seq_set[i]: the sequence set of token i
148
+
149
+ std::unordered_map<seq_set_t, idx_vec_t> seq_set_map; // the indices at which the sequence set appears
150
+
151
+ // batch indices of the output
152
+ std::vector<int32_t> out_ids;
153
+
154
+ uint32_t n_used;
155
+
156
+ // used[i] indicates if token i has already been used in a previous ubatch
157
+ std::vector<bool> used;
158
+
159
+ int debug;
160
+ };