cui-llama.rn 1.3.0 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +6 -1
- package/android/src/main/jni.cpp +6 -6
- package/cpp/amx/amx.cpp +196 -0
- package/cpp/amx/amx.h +20 -0
- package/cpp/amx/common.h +101 -0
- package/cpp/amx/mmq.cpp +2524 -0
- package/cpp/amx/mmq.h +16 -0
- package/cpp/common.cpp +1981 -1682
- package/cpp/common.h +636 -600
- package/cpp/ggml-aarch64.c +129 -129
- package/cpp/ggml-aarch64.h +19 -19
- package/cpp/ggml-alloc.c +1038 -1040
- package/cpp/ggml-alloc.h +76 -76
- package/cpp/ggml-backend-impl.h +238 -216
- package/cpp/ggml-backend-reg.cpp +423 -195
- package/cpp/ggml-backend.cpp +1999 -1997
- package/cpp/ggml-backend.h +351 -328
- package/cpp/ggml-common.h +1859 -1853
- package/cpp/ggml-cpp.h +38 -38
- package/cpp/ggml-cpu-aarch64.c +3823 -3560
- package/cpp/ggml-cpu-aarch64.h +32 -30
- package/cpp/ggml-cpu-impl.h +386 -371
- package/cpp/ggml-cpu-quants.c +10835 -10822
- package/cpp/ggml-cpu-quants.h +63 -63
- package/cpp/ggml-cpu.c +99 -103
- package/cpp/ggml-cpu.cpp +69 -17
- package/cpp/ggml-cpu.h +152 -177
- package/cpp/ggml-impl.h +556 -550
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +4426 -4294
- package/cpp/ggml-quants.c +5247 -5247
- package/cpp/ggml-quants.h +100 -100
- package/cpp/ggml-threading.cpp +12 -12
- package/cpp/ggml-threading.h +12 -12
- package/cpp/ggml.c +7618 -8180
- package/cpp/ggml.h +2255 -2411
- package/cpp/json-schema-to-grammar.cpp +1045 -0
- package/cpp/json-schema-to-grammar.h +8 -0
- package/cpp/json.hpp +24766 -0
- package/cpp/llama-grammar.cpp +1138 -1138
- package/cpp/llama-grammar.h +144 -144
- package/cpp/llama-impl.h +181 -181
- package/cpp/llama-sampling.cpp +2348 -2348
- package/cpp/llama-sampling.h +48 -48
- package/cpp/llama-vocab.cpp +1984 -1984
- package/cpp/llama-vocab.h +170 -170
- package/cpp/llama.cpp +22332 -22132
- package/cpp/llama.h +1259 -1253
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +6 -6
- package/cpp/sampling.cpp +505 -466
- package/cpp/sampling.h +22 -1
- package/cpp/sgemm.cpp +1884 -1884
- package/cpp/speculative.cpp +270 -0
- package/cpp/speculative.h +28 -0
- package/cpp/unicode.cpp +11 -0
- package/ios/RNLlamaContext.mm +13 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +4 -2
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +2 -1
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +94 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts +5 -6
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +4 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -1
- package/src/NativeRNLlama.ts +97 -10
- package/src/grammar.ts +10 -8
- package/src/index.ts +22 -1
package/cpp/sampling.h
CHANGED
@@ -36,7 +36,7 @@ struct common_sampler;
|
|
36
36
|
|
37
37
|
// llama_sampler API overloads
|
38
38
|
|
39
|
-
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct
|
39
|
+
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params);
|
40
40
|
|
41
41
|
void common_sampler_free(struct common_sampler * gsmpl);
|
42
42
|
|
@@ -60,6 +60,27 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
|
|
60
60
|
//
|
61
61
|
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
|
62
62
|
|
63
|
+
// generalized version of common_sampler_sample
|
64
|
+
//
|
65
|
+
// will cross-reference the sampled tokens with a batch of draft tokens and accept those that match
|
66
|
+
// if the sampler disagrees at some point, we stop and return the accepted tokens up to now
|
67
|
+
//
|
68
|
+
// common_sampler_sample_n(gsmpl, ctx, { idx }, {});
|
69
|
+
//
|
70
|
+
// is equivalent to
|
71
|
+
//
|
72
|
+
// common_sampler_sample(gsmpl, ctx, idx);
|
73
|
+
// common_sampler_accept(gsmpl, token, true);
|
74
|
+
//
|
75
|
+
// requires: idxs.size() == draft.size() + 1
|
76
|
+
//
|
77
|
+
// returns at least 1 token, up to idxs.size()
|
78
|
+
//
|
79
|
+
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first = false);
|
80
|
+
|
81
|
+
// assume idxs == [ 0, 1, 2, ..., draft.size() ]
|
82
|
+
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first = false);
|
83
|
+
|
63
84
|
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
|
64
85
|
|
65
86
|
// helpers
|