@fugood/llama.node 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +4 -0
- package/lib/index.js +6 -1
- package/lib/index.ts +6 -0
- package/lib/version.js +5 -0
- package/lib/version.ts +2 -0
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +19 -15
- package/src/LlamaCompletionWorker.cpp +73 -18
- package/src/LlamaCompletionWorker.h +8 -0
- package/src/llama.cpp/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +147 -46
- package/src/llama.cpp/common/chat-parser.cpp +9 -1
- package/src/llama.cpp/common/chat.cpp +350 -3
- package/src/llama.cpp/common/chat.h +11 -3
- package/src/llama.cpp/common/common.cpp +54 -0
- package/src/llama.cpp/common/common.h +44 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -2
- package/src/llama.cpp/ggml/include/ggml-opt.h +25 -6
- package/src/llama.cpp/ggml/include/ggml-zdnn.h +16 -0
- package/src/llama.cpp/ggml/include/ggml.h +65 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +96 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +1136 -1077
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +20 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +21 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +16 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +270 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +3 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +35 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +200 -51
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +19 -4
- package/src/llama.cpp/include/llama.h +26 -0
- package/src/llama.cpp/src/llama-arch.cpp +65 -0
- package/src/llama.cpp/src/llama-arch.h +10 -0
- package/src/llama.cpp/src/llama-batch.cpp +1 -1
- package/src/llama.cpp/src/llama-chat.cpp +15 -4
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +37 -25
- package/src/llama.cpp/src/llama-context.h +6 -5
- package/src/llama.cpp/src/llama-graph.cpp +118 -9
- package/src/llama.cpp/src/llama-graph.h +38 -0
- package/src/llama.cpp/src/llama-hparams.h +5 -3
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +12 -6
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +2 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +93 -69
- package/src/llama.cpp/src/llama-kv-cache-unified.h +2 -2
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-hybrid.h +2 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
- package/src/llama.cpp/src/llama-memory.h +2 -2
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +500 -4
- package/src/llama.cpp/src/llama-model.h +25 -4
- package/src/llama.cpp/src/llama-quant.cpp +37 -1
- package/src/llama.cpp/src/llama-vocab.cpp +43 -0
|
@@ -39,6 +39,10 @@ llama_kv_cache_unified::llama_kv_cache_unified(
|
|
|
39
39
|
if (model.arch == LLM_ARCH_GEMMA3N) {
|
|
40
40
|
n_layer_cache = 20;
|
|
41
41
|
}
|
|
42
|
+
if (model.arch == LLM_ARCH_GLM4_MOE) {
|
|
43
|
+
// GLM-4.5: Only process up to last layer, skip final NextN layer
|
|
44
|
+
n_layer_cache = hparams.n_layer - hparams.nextn_predict_layers;
|
|
45
|
+
}
|
|
42
46
|
|
|
43
47
|
// create a context for each buffer type
|
|
44
48
|
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
|
@@ -219,12 +223,7 @@ void llama_kv_cache_unified::clear(bool data) {
|
|
|
219
223
|
}
|
|
220
224
|
|
|
221
225
|
bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
|
|
222
|
-
GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size());
|
|
223
|
-
|
|
224
|
-
auto & cells = v_cells[seq_to_stream[seq_id]];
|
|
225
|
-
auto & head = v_heads[seq_to_stream[seq_id]];
|
|
226
|
-
|
|
227
|
-
uint32_t new_head = cells.size();
|
|
226
|
+
GGML_ASSERT(seq_id == -1 || (seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()));
|
|
228
227
|
|
|
229
228
|
if (p0 < 0) {
|
|
230
229
|
p0 = 0;
|
|
@@ -235,6 +234,11 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
|
|
|
235
234
|
}
|
|
236
235
|
|
|
237
236
|
if (seq_id >= 0) {
|
|
237
|
+
auto & cells = v_cells[seq_to_stream[seq_id]];
|
|
238
|
+
auto & head = v_heads[seq_to_stream[seq_id]];
|
|
239
|
+
|
|
240
|
+
uint32_t new_head = cells.size();
|
|
241
|
+
|
|
238
242
|
for (uint32_t i = 0; i < cells.size(); ++i) {
|
|
239
243
|
if (!cells.pos_in(i, p0, p1)) {
|
|
240
244
|
continue;
|
|
@@ -246,24 +250,36 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
|
|
|
246
250
|
}
|
|
247
251
|
}
|
|
248
252
|
}
|
|
253
|
+
|
|
254
|
+
// If we freed up a slot, set head to it so searching can start there.
|
|
255
|
+
if (new_head != cells.size() && new_head < head) {
|
|
256
|
+
head = new_head;
|
|
257
|
+
}
|
|
249
258
|
} else {
|
|
250
259
|
// match any sequence
|
|
251
|
-
for (uint32_t
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
}
|
|
260
|
+
for (uint32_t s = 0; s < n_stream; ++s) {
|
|
261
|
+
auto & cells = v_cells[s];
|
|
262
|
+
auto & head = v_heads[s];
|
|
255
263
|
|
|
256
|
-
cells.
|
|
264
|
+
uint32_t new_head = cells.size();
|
|
257
265
|
|
|
258
|
-
|
|
259
|
-
|
|
266
|
+
for (uint32_t i = 0; i < cells.size(); ++i) {
|
|
267
|
+
if (!cells.pos_in(i, p0, p1)) {
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
cells.rm(i);
|
|
272
|
+
|
|
273
|
+
if (new_head == cells.size()) {
|
|
274
|
+
new_head = i;
|
|
275
|
+
}
|
|
260
276
|
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
277
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
278
|
+
// If we freed up a slot, set head to it so searching can start there.
|
|
279
|
+
if (new_head != cells.size() && new_head < head) {
|
|
280
|
+
head = new_head;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
267
283
|
}
|
|
268
284
|
|
|
269
285
|
return true;
|
|
@@ -734,66 +750,70 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
|
|
|
734
750
|
}
|
|
735
751
|
|
|
736
752
|
llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const {
|
|
737
|
-
if (debug > 0) {
|
|
738
|
-
const auto & cells = v_cells[seq_to_stream[1]];
|
|
739
753
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
754
|
+
if (debug > 0) {
|
|
755
|
+
for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) {
|
|
756
|
+
const auto seq_id = ubatch.seq_id_unq[s];
|
|
757
|
+
const auto stream_id = seq_to_stream[seq_id];
|
|
758
|
+
const auto & cells = v_cells[stream_id];
|
|
759
|
+
const uint32_t head_cur = v_heads[stream_id];
|
|
760
|
+
|
|
761
|
+
LLAMA_LOG_DEBUG("%s: stream[%d], n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n",
|
|
762
|
+
__func__, stream_id, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa);
|
|
763
|
+
|
|
764
|
+
if ((debug == 2 && n_swa > 0) || debug > 2) {
|
|
765
|
+
std::string ss;
|
|
766
|
+
for (uint32_t i = 0; i < cells.size(); ++i) {
|
|
767
|
+
if (cells.is_empty(i)) {
|
|
768
|
+
ss += '.';
|
|
769
|
+
} else {
|
|
770
|
+
assert(cells.seq_count(i) >= 1);
|
|
744
771
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
772
|
+
if (cells.seq_count(i) == 1) {
|
|
773
|
+
ss += std::to_string(cells.seq_get(i));
|
|
774
|
+
} else {
|
|
775
|
+
ss += 'M';
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
if (i%256 == 255) {
|
|
779
|
+
ss += " *";
|
|
780
|
+
ss += '\n';
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
|
|
784
|
+
}
|
|
752
785
|
|
|
753
|
-
|
|
754
|
-
|
|
786
|
+
if ((debug == 2 && n_swa > 0) || debug > 2) {
|
|
787
|
+
std::string ss;
|
|
788
|
+
for (uint32_t i = 0; i < cells.size(); ++i) {
|
|
789
|
+
std::string cur;
|
|
790
|
+
if (cells.is_empty(i)) {
|
|
791
|
+
cur = '.';
|
|
755
792
|
} else {
|
|
756
|
-
|
|
793
|
+
cur = std::to_string(cells.pos_get(i));
|
|
794
|
+
}
|
|
795
|
+
const int n = cur.size();
|
|
796
|
+
for (int j = 0; j < 5 - n; ++j) {
|
|
797
|
+
cur += ' ';
|
|
798
|
+
}
|
|
799
|
+
ss += cur;
|
|
800
|
+
if (i%256 == 255) {
|
|
801
|
+
ss += " *";
|
|
802
|
+
}
|
|
803
|
+
if (i%64 == 63) {
|
|
804
|
+
ss += '\n';
|
|
757
805
|
}
|
|
758
806
|
}
|
|
759
|
-
|
|
760
|
-
ss += " *";
|
|
761
|
-
ss += '\n';
|
|
762
|
-
}
|
|
807
|
+
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
|
|
763
808
|
}
|
|
764
|
-
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
|
|
765
|
-
}
|
|
766
809
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
std::string cur;
|
|
771
|
-
if (cells.is_empty(i)) {
|
|
772
|
-
cur = '.';
|
|
773
|
-
} else {
|
|
774
|
-
cur = std::to_string(cells.pos_get(i));
|
|
775
|
-
}
|
|
776
|
-
const int n = cur.size();
|
|
777
|
-
for (int j = 0; j < 5 - n; ++j) {
|
|
778
|
-
cur += ' ';
|
|
779
|
-
}
|
|
780
|
-
ss += cur;
|
|
781
|
-
if (i%256 == 255) {
|
|
782
|
-
ss += " *";
|
|
783
|
-
}
|
|
784
|
-
if (i%64 == 63) {
|
|
785
|
-
ss += '\n';
|
|
810
|
+
for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {
|
|
811
|
+
if (cells.seq_pos_min(s) < 0) {
|
|
812
|
+
continue;
|
|
786
813
|
}
|
|
787
|
-
}
|
|
788
|
-
LLAMA_LOG_DEBUG("\n%s\n", ss.c_str());
|
|
789
|
-
}
|
|
790
814
|
|
|
791
|
-
|
|
792
|
-
if (cells.seq_pos_min(s) < 0) {
|
|
793
|
-
continue;
|
|
815
|
+
LLAMA_LOG_DEBUG("%s: stream[%d] min[%d] = %5d, max[%d] = %5d\n", __func__, stream_id, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
|
|
794
816
|
}
|
|
795
|
-
|
|
796
|
-
LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
|
|
797
817
|
}
|
|
798
818
|
}
|
|
799
819
|
|
|
@@ -1808,7 +1828,9 @@ bool llama_kv_cache_unified::is_masked_swa(llama_pos p0, llama_pos p1) const {
|
|
|
1808
1828
|
return false;
|
|
1809
1829
|
}
|
|
1810
1830
|
|
|
1811
|
-
void llama_kv_cache_unified::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
|
1831
|
+
void llama_kv_cache_unified::state_write(llama_io_write_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) const {
|
|
1832
|
+
GGML_UNUSED(flags);
|
|
1833
|
+
|
|
1812
1834
|
io.write(&n_stream, sizeof(n_stream));
|
|
1813
1835
|
|
|
1814
1836
|
for (uint32_t s = 0; s < n_stream; ++s) {
|
|
@@ -1859,7 +1881,9 @@ void llama_kv_cache_unified::state_write(llama_io_write_i & io, llama_seq_id seq
|
|
|
1859
1881
|
}
|
|
1860
1882
|
}
|
|
1861
1883
|
|
|
1862
|
-
void llama_kv_cache_unified::state_read(llama_io_read_i & io, llama_seq_id seq_id) {
|
|
1884
|
+
void llama_kv_cache_unified::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) {
|
|
1885
|
+
GGML_UNUSED(flags);
|
|
1886
|
+
|
|
1863
1887
|
GGML_ASSERT(seq_id == -1 || (seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()));
|
|
1864
1888
|
|
|
1865
1889
|
uint32_t n_stream_cur;
|
|
@@ -136,8 +136,8 @@ public:
|
|
|
136
136
|
|
|
137
137
|
// state write/load
|
|
138
138
|
|
|
139
|
-
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
|
|
140
|
-
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1)
|
|
139
|
+
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) const override;
|
|
140
|
+
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) override;
|
|
141
141
|
|
|
142
142
|
//
|
|
143
143
|
// llama_kv_cache_unified specific API
|
|
@@ -165,12 +165,16 @@ llama_pos llama_memory_hybrid::seq_pos_max(llama_seq_id seq_id) const {
|
|
|
165
165
|
return std::min(mem_attn->seq_pos_max(seq_id), mem_recr->seq_pos_max(seq_id));
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
-
void llama_memory_hybrid::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
|
168
|
+
void llama_memory_hybrid::state_write(llama_io_write_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) const {
|
|
169
|
+
GGML_UNUSED(flags);
|
|
170
|
+
|
|
169
171
|
mem_attn->state_write(io, seq_id);
|
|
170
172
|
mem_recr->state_write(io, seq_id);
|
|
171
173
|
}
|
|
172
174
|
|
|
173
|
-
void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id) {
|
|
175
|
+
void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) {
|
|
176
|
+
GGML_UNUSED(flags);
|
|
177
|
+
|
|
174
178
|
mem_attn->state_read(io, seq_id);
|
|
175
179
|
mem_recr->state_read(io, seq_id);
|
|
176
180
|
}
|
|
@@ -74,8 +74,8 @@ public:
|
|
|
74
74
|
|
|
75
75
|
// state write/load
|
|
76
76
|
|
|
77
|
-
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
|
|
78
|
-
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1) override;
|
|
77
|
+
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) const override;
|
|
78
|
+
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) override;
|
|
79
79
|
|
|
80
80
|
//
|
|
81
81
|
// llama_memory_hybrid specific API
|
|
@@ -680,7 +680,9 @@ size_t llama_memory_recurrent::size_s_bytes() const {
|
|
|
680
680
|
return size_s_bytes;
|
|
681
681
|
}
|
|
682
682
|
|
|
683
|
-
void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
|
683
|
+
void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) const {
|
|
684
|
+
GGML_UNUSED(flags);
|
|
685
|
+
|
|
684
686
|
std::vector<std::pair<uint32_t, uint32_t>> cell_ranges; // ranges, from inclusive, to exclusive
|
|
685
687
|
uint32_t cell_count = 0;
|
|
686
688
|
|
|
@@ -718,7 +720,9 @@ void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq
|
|
|
718
720
|
state_write_data(io, cell_ranges);
|
|
719
721
|
}
|
|
720
722
|
|
|
721
|
-
void llama_memory_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id) {
|
|
723
|
+
void llama_memory_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) {
|
|
724
|
+
GGML_UNUSED(flags);
|
|
725
|
+
|
|
722
726
|
uint32_t cell_count;
|
|
723
727
|
io.read_to(&cell_count, sizeof(cell_count));
|
|
724
728
|
|
|
@@ -63,8 +63,8 @@ public:
|
|
|
63
63
|
|
|
64
64
|
// state write/load
|
|
65
65
|
|
|
66
|
-
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
|
|
67
|
-
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1) override;
|
|
66
|
+
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) const override;
|
|
67
|
+
void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) override;
|
|
68
68
|
|
|
69
69
|
uint32_t head = 0; // the location where the batch will be placed in the cache (see find_slot())
|
|
70
70
|
uint32_t size = 0; // total number of cells, shared across all sequences
|
|
@@ -104,8 +104,8 @@ struct llama_memory_i {
|
|
|
104
104
|
// state write/read
|
|
105
105
|
//
|
|
106
106
|
|
|
107
|
-
virtual void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const = 0;
|
|
108
|
-
virtual void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1) = 0;
|
|
107
|
+
virtual void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) const = 0;
|
|
108
|
+
virtual void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1, llama_state_seq_flags flags = 0) = 0;
|
|
109
109
|
};
|
|
110
110
|
|
|
111
111
|
using llama_memory_ptr = std::unique_ptr<llama_memory_i>;
|
|
@@ -35,6 +35,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|
|
35
35
|
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
|
36
36
|
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
|
37
37
|
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
|
38
|
+
case LLAMA_FTYPE_MOSTLY_MXFP4_MOE: return "MXFP4 MoE";
|
|
38
39
|
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
|
|
39
40
|
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
|
|
40
41
|
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
|
@@ -58,8 +58,9 @@ struct llama_model_loader {
|
|
|
58
58
|
}
|
|
59
59
|
};
|
|
60
60
|
|
|
61
|
-
static const int TENSOR_NOT_REQUIRED = 1;
|
|
62
|
-
static const int TENSOR_DUPLICATED =
|
|
61
|
+
static const int TENSOR_NOT_REQUIRED = 1 << 0;
|
|
62
|
+
static const int TENSOR_DUPLICATED = 1 << 1;
|
|
63
|
+
static const int TENSOR_SKIP = 1 << 2;
|
|
63
64
|
|
|
64
65
|
int n_kv = 0;
|
|
65
66
|
int n_tensors = 0;
|