npm - cui-llama.rn - Versions diffs - 1.4.0 → 1.4.1 - Mend

cui-llama.rn 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/android/src/main/jni.cpp +9 -9
package/cpp/common.cpp +163 -60
package/cpp/common.h +43 -12
package/cpp/ggml-alloc.c +1042 -1037
package/cpp/ggml-backend-impl.h +255 -256
package/cpp/ggml-backend-reg.cpp +582 -582
package/cpp/ggml-backend.cpp +2002 -2002
package/cpp/ggml-backend.h +354 -352
package/cpp/ggml-common.h +1853 -1853
package/cpp/ggml-cpp.h +39 -39
package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
package/cpp/ggml-cpu-aarch64.h +8 -8
package/cpp/ggml-cpu-impl.h +386 -386
package/cpp/ggml-cpu-quants.c +10920 -10839
package/cpp/ggml-cpu-traits.cpp +36 -36
package/cpp/ggml-cpu-traits.h +38 -38
package/cpp/ggml-cpu.c +329 -60
package/cpp/ggml-cpu.cpp +10 -2
package/cpp/ggml-cpu.h +135 -135
package/cpp/ggml-impl.h +567 -567
package/cpp/ggml-metal-impl.h +17 -17
package/cpp/ggml-metal.m +4884 -4884
package/cpp/ggml-quants.c +5238 -5238
package/cpp/ggml-threading.h +14 -14
package/cpp/ggml.c +6514 -6448
package/cpp/ggml.h +2194 -2163
package/cpp/gguf.cpp +1329 -1325
package/cpp/gguf.h +202 -202
package/cpp/json-schema-to-grammar.cpp +1045 -1045
package/cpp/json-schema-to-grammar.h +8 -8
package/cpp/json.hpp +24766 -24766
package/cpp/llama-adapter.cpp +347 -346
package/cpp/llama-adapter.h +74 -73
package/cpp/llama-arch.cpp +1487 -1434
package/cpp/llama-arch.h +400 -395
package/cpp/llama-batch.cpp +368 -368
package/cpp/llama-batch.h +88 -88
package/cpp/llama-chat.cpp +578 -567
package/cpp/llama-chat.h +52 -51
package/cpp/llama-context.cpp +1775 -1771
package/cpp/llama-context.h +128 -128
package/cpp/llama-cparams.cpp +1 -1
package/cpp/llama-cparams.h +37 -37
package/cpp/llama-cpp.h +30 -30
package/cpp/llama-grammar.cpp +1139 -1139
package/cpp/llama-grammar.h +143 -143
package/cpp/llama-hparams.cpp +71 -71
package/cpp/llama-hparams.h +139 -140
package/cpp/llama-impl.cpp +167 -167
package/cpp/llama-impl.h +61 -61
package/cpp/llama-kv-cache.cpp +718 -718
package/cpp/llama-kv-cache.h +218 -218
package/cpp/llama-mmap.cpp +2 -1
package/cpp/llama-mmap.h +67 -67
package/cpp/llama-model-loader.cpp +1124 -1011
package/cpp/llama-model-loader.h +167 -158
package/cpp/llama-model.cpp +3997 -2202
package/cpp/llama-model.h +370 -391
package/cpp/llama-sampling.cpp +2408 -2406
package/cpp/llama-sampling.h +32 -48
package/cpp/llama-vocab.cpp +3247 -1982
package/cpp/llama-vocab.h +125 -182
package/cpp/llama.cpp +416 -2886
package/cpp/llama.h +1323 -1285
package/cpp/log.cpp +401 -401
package/cpp/log.h +121 -121
package/cpp/rn-llama.hpp +18 -12
package/cpp/sampling.cpp +505 -500
package/cpp/sgemm.cpp +2597 -2597
package/cpp/speculative.cpp +277 -274
package/cpp/speculative.h +28 -28
package/cpp/unicode.cpp +2 -3
package/package.json +1 -1

package/cpp/llama-batch.h CHANGED Viewed

@@ -1,88 +1,88 @@
-#pragma once
-#include "llama.h"
-#include <array>
-#include <vector>
-// very similar to llama_batch,
-// but has more metadata about sequences
-struct llama_ubatch {
-    bool equal_seqs;
-    // TODO: whole_seqs for embeddings?
-    uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
-    uint32_t n_seq_tokens; // tokens per sequence
-    uint32_t n_seqs;
-    llama_token  *  token;    // [n_tokens]
-    float        *  embd;     // [n_embd, n_tokens]
-    llama_pos    *  pos;      // [n_tokens]
-    int32_t      *  n_seq_id; // [n_seqs]
-    llama_seq_id ** seq_id;   // [n_seqs]
-    int8_t       *  output;   // [n_tokens]
-};
-struct llama_sbatch_seq {
-    int32_t n_seq_id;
-    llama_seq_id * seq_id;
-    size_t offset;
-    size_t length;
-};
-// sequence-length-aware batch splitting
-struct llama_sbatch {
-    // tokens left in this batch
-    size_t n_tokens;
-    size_t n_embd;
-    bool logits_all; // TODO: remove once lctx.logits_all is removed too
-    // sorted indices into the batch
-    std::vector<size_t> ids;
-    // batch indices of the output
-    std::vector<size_t> out_ids;
-    std::vector<llama_sbatch_seq> seq;
-    const llama_batch * batch = nullptr;
-    // buffers for the ubatch
-    std::vector<llama_token>    ubatch_token;
-    std::vector<float>          ubatch_embd;
-    std::vector<llama_pos>      ubatch_pos;
-    std::vector<int32_t>        ubatch_n_seq_id;
-    std::vector<llama_seq_id *> ubatch_seq_id;
-    std::vector<int8_t>         ubatch_output;
-    llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
-    void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
-    // simple split, unknown number of sequences of unequal lengths
-    llama_ubatch split_simple(size_t n_ubatch);
-    // make batches of equal-length sequences
-    llama_ubatch split_equal(size_t n_ubatch);
-    // sequence-wise split
-    llama_ubatch split_seq(size_t n_ubatch);
-    void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
-};
-// temporary allocate memory for the input batch if needed
-struct llama_batch_allocr {
-    struct llama_batch batch;
-    std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
-    std::vector<llama_pos>      pos;
-    std::vector<int32_t>        n_seq_id;
-    std::vector<llama_seq_id *> seq_id;
-    std::vector<int8_t>         logits;
-    // optionally fulfill the batch returned by llama_batch_get_one
-    llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
-};
+#pragma once
+#include "llama.h"
+#include <array>
+#include <vector>
+// very similar to llama_batch,
+// but has more metadata about sequences
+struct llama_ubatch {
+    bool equal_seqs;
+    // TODO: whole_seqs for embeddings?
+    uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
+    uint32_t n_seq_tokens; // tokens per sequence
+    uint32_t n_seqs;
+    llama_token  *  token;    // [n_tokens]
+    float        *  embd;     // [n_embd, n_tokens]
+    llama_pos    *  pos;      // [n_tokens]
+    int32_t      *  n_seq_id; // [n_seqs]
+    llama_seq_id ** seq_id;   // [n_seqs]
+    int8_t       *  output;   // [n_tokens]
+};
+struct llama_sbatch_seq {
+    int32_t n_seq_id;
+    llama_seq_id * seq_id;
+    size_t offset;
+    size_t length;
+};
+// sequence-length-aware batch splitting
+struct llama_sbatch {
+    // tokens left in this batch
+    size_t n_tokens;
+    size_t n_embd;
+    bool logits_all; // TODO: remove once lctx.logits_all is removed too
+    // sorted indices into the batch
+    std::vector<size_t> ids;
+    // batch indices of the output
+    std::vector<size_t> out_ids;
+    std::vector<llama_sbatch_seq> seq;
+    const llama_batch * batch = nullptr;
+    // buffers for the ubatch
+    std::vector<llama_token>    ubatch_token;
+    std::vector<float>          ubatch_embd;
+    std::vector<llama_pos>      ubatch_pos;
+    std::vector<int32_t>        ubatch_n_seq_id;
+    std::vector<llama_seq_id *> ubatch_seq_id;
+    std::vector<int8_t>         ubatch_output;
+    llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
+    void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
+    // simple split, unknown number of sequences of unequal lengths
+    llama_ubatch split_simple(size_t n_ubatch);
+    // make batches of equal-length sequences
+    llama_ubatch split_equal(size_t n_ubatch);
+    // sequence-wise split
+    llama_ubatch split_seq(size_t n_ubatch);
+    void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
+};
+// temporary allocate memory for the input batch if needed
+struct llama_batch_allocr {
+    struct llama_batch batch;
+    std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
+    std::vector<llama_pos>      pos;
+    std::vector<int32_t>        n_seq_id;
+    std::vector<llama_seq_id *> seq_id;
+    std::vector<int8_t>         logits;
+    // optionally fulfill the batch returned by llama_batch_get_one
+    llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
+};