cui-llama.rn 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +5 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +28 -44
- package/cpp/common.h +35 -14
- package/cpp/ggml-alloc.c +0 -1
- package/cpp/ggml-backend-impl.h +38 -20
- package/cpp/ggml-backend-reg.cpp +246 -92
- package/cpp/ggml-backend.h +1 -0
- package/cpp/ggml-common.h +42 -48
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
- package/cpp/ggml-cpu-aarch64.h +2 -26
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14122 -13971
- package/cpp/ggml-cpu.cpp +627 -715
- package/cpp/ggml-cpu.h +0 -17
- package/cpp/ggml-impl.h +22 -6
- package/cpp/ggml-metal.m +482 -24
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml.c +284 -178
- package/cpp/ggml.h +73 -25
- package/cpp/llama-grammar.cpp +15 -15
- package/cpp/llama-grammar.h +2 -5
- package/cpp/llama-sampling.cpp +35 -90
- package/cpp/llama-vocab.cpp +7 -2
- package/cpp/llama-vocab.h +1 -1
- package/cpp/llama.cpp +1782 -586
- package/cpp/llama.h +20 -19
- package/cpp/sampling.cpp +11 -16
- package/cpp/sgemm.cpp +265 -258
- package/cpp/sgemm.h +2 -2
- package/cpp/speculative.cpp +4 -0
- package/cpp/unicode.cpp +51 -51
- package/cpp/unicode.h +9 -10
- package/lib/commonjs/index.js +38 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +36 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -3
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +36 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +3 -3
- package/src/index.ts +46 -2
- package/cpp/amx/amx.cpp +0 -196
- package/cpp/amx/amx.h +0 -20
- package/cpp/amx/common.h +0 -101
- package/cpp/amx/mmq.cpp +0 -2524
- package/cpp/amx/mmq.h +0 -16
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
package/cpp/llama.h
CHANGED
@@ -105,12 +105,15 @@ extern "C" {
|
|
105
105
|
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
106
106
|
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
107
107
|
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
108
|
+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
108
109
|
};
|
109
110
|
|
110
111
|
enum llama_rope_type {
|
111
|
-
LLAMA_ROPE_TYPE_NONE
|
112
|
-
LLAMA_ROPE_TYPE_NORM
|
113
|
-
LLAMA_ROPE_TYPE_NEOX
|
112
|
+
LLAMA_ROPE_TYPE_NONE = -1,
|
113
|
+
LLAMA_ROPE_TYPE_NORM = 0,
|
114
|
+
LLAMA_ROPE_TYPE_NEOX = LM_GGML_ROPE_TYPE_NEOX,
|
115
|
+
LLAMA_ROPE_TYPE_MROPE = LM_GGML_ROPE_TYPE_MROPE,
|
116
|
+
LLAMA_ROPE_TYPE_VISION = LM_GGML_ROPE_TYPE_VISION,
|
114
117
|
};
|
115
118
|
|
116
119
|
enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
|
@@ -172,9 +175,9 @@ extern "C" {
|
|
172
175
|
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
|
173
176
|
LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
|
174
177
|
LLAMA_FTYPE_MOSTLY_BF16 = 32, // except 1d tensors
|
175
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33, //
|
176
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34, //
|
177
|
-
LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35, //
|
178
|
+
//LLAMA_FTYPE_MOSTLY_Q4_0_4_4 = 33, // removed from gguf files, use Q4_0 and runtime repack
|
179
|
+
//LLAMA_FTYPE_MOSTLY_Q4_0_4_8 = 34, // removed from gguf files, use Q4_0 and runtime repack
|
180
|
+
//LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35, // removed from gguf files, use Q4_0 and runtime repack
|
178
181
|
LLAMA_FTYPE_MOSTLY_TQ1_0 = 36, // except 1d tensors
|
179
182
|
LLAMA_FTYPE_MOSTLY_TQ2_0 = 37, // except 1d tensors
|
180
183
|
|
@@ -186,7 +189,8 @@ extern "C" {
|
|
186
189
|
LLAMA_ROPE_SCALING_TYPE_NONE = 0,
|
187
190
|
LLAMA_ROPE_SCALING_TYPE_LINEAR = 1,
|
188
191
|
LLAMA_ROPE_SCALING_TYPE_YARN = 2,
|
189
|
-
|
192
|
+
LLAMA_ROPE_SCALING_TYPE_LONGROPE = 3,
|
193
|
+
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_LONGROPE,
|
190
194
|
};
|
191
195
|
|
192
196
|
enum llama_pooling_type {
|
@@ -455,6 +459,7 @@ extern "C" {
|
|
455
459
|
// Functions to access the model's GGUF metadata scalar values
|
456
460
|
// - The functions return the length of the string on success, or -1 on failure
|
457
461
|
// - The output string is always null-terminated and cleared on failure
|
462
|
+
// - When retrieving a string, an extra byte must be allocated to account for the null terminator
|
458
463
|
// - GGUF array values are not supported by these functions
|
459
464
|
|
460
465
|
// Get metadata value as a string by key name
|
@@ -478,9 +483,6 @@ extern "C" {
|
|
478
483
|
// Returns the total number of parameters in the model
|
479
484
|
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
|
480
485
|
|
481
|
-
// Get a llama model tensor
|
482
|
-
LLAMA_API struct lm_ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
|
483
|
-
|
484
486
|
// Returns true if the model contains an encoder that requires llama_encode() call
|
485
487
|
LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
|
486
488
|
|
@@ -991,6 +993,9 @@ extern "C" {
|
|
991
993
|
char * buf,
|
992
994
|
int32_t length);
|
993
995
|
|
996
|
+
// Get list of built-in chat templates
|
997
|
+
LLAMA_API int32_t llama_chat_builtin_templates(const char ** output, size_t len);
|
998
|
+
|
994
999
|
//
|
995
1000
|
// Sampling API
|
996
1001
|
//
|
@@ -1132,16 +1137,12 @@ extern "C" {
|
|
1132
1137
|
const char * grammar_str,
|
1133
1138
|
const char * grammar_root);
|
1134
1139
|
|
1140
|
+
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
1135
1141
|
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
1136
|
-
int32_t
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
float penalty_repeat, // 1.0 = disabled
|
1141
|
-
float penalty_freq, // 0.0 = disabled
|
1142
|
-
float penalty_present, // 0.0 = disabled
|
1143
|
-
bool penalize_nl, // consider newlines as a repeatable token
|
1144
|
-
bool ignore_eos); // ignore the end-of-sequence token
|
1142
|
+
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
1143
|
+
float penalty_repeat, // 1.0 = disabled
|
1144
|
+
float penalty_freq, // 0.0 = disabled
|
1145
|
+
float penalty_present); // 0.0 = disabled
|
1145
1146
|
|
1146
1147
|
/// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
|
1147
1148
|
LLAMA_API struct llama_sampler * llama_sampler_init_dry(
|
package/cpp/sampling.cpp
CHANGED
@@ -161,32 +161,20 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
161
161
|
params.logit_bias.size(),
|
162
162
|
params.logit_bias.data()));
|
163
163
|
|
164
|
-
llama_sampler_chain_add(result->chain,
|
165
|
-
llama_sampler_init_penalties(
|
166
|
-
llama_n_vocab (model),
|
167
|
-
llama_token_eos(model),
|
168
|
-
llama_token_nl (model),
|
169
|
-
params.penalty_last_n,
|
170
|
-
params.penalty_repeat,
|
171
|
-
params.penalty_freq,
|
172
|
-
params.penalty_present,
|
173
|
-
params.penalize_nl,
|
174
|
-
params.ignore_eos));
|
175
|
-
|
176
164
|
if (params.mirostat == 0) {
|
177
165
|
for (const auto & cnstr : params.samplers) {
|
178
166
|
switch (cnstr) {
|
179
|
-
|
167
|
+
case COMMON_SAMPLER_TYPE_DRY:
|
180
168
|
{
|
181
|
-
std::vector<const char*> c_breakers;
|
169
|
+
std::vector<const char *> c_breakers;
|
182
170
|
c_breakers.reserve(params.dry_sequence_breakers.size());
|
183
|
-
for (const auto& str : params.dry_sequence_breakers) {
|
171
|
+
for (const auto & str : params.dry_sequence_breakers) {
|
184
172
|
c_breakers.push_back(str.c_str());
|
185
173
|
}
|
186
174
|
|
187
175
|
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
188
176
|
}
|
189
|
-
|
177
|
+
break;
|
190
178
|
case COMMON_SAMPLER_TYPE_TOP_K:
|
191
179
|
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
192
180
|
break;
|
@@ -208,6 +196,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
|
|
208
196
|
case COMMON_SAMPLER_TYPE_INFILL:
|
209
197
|
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
|
210
198
|
break;
|
199
|
+
case COMMON_SAMPLER_TYPE_PENALTIES:
|
200
|
+
llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
201
|
+
break;
|
211
202
|
default:
|
212
203
|
LM_GGML_ASSERT(false && "unknown sampler type");
|
213
204
|
}
|
@@ -415,6 +406,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
|
415
406
|
case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
|
416
407
|
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
417
408
|
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
|
409
|
+
case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
|
418
410
|
default : return '?';
|
419
411
|
}
|
420
412
|
}
|
@@ -429,6 +421,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
|
429
421
|
case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
|
430
422
|
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
431
423
|
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
|
424
|
+
case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
|
432
425
|
default : return "";
|
433
426
|
}
|
434
427
|
}
|
@@ -443,6 +436,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
|
|
443
436
|
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
444
437
|
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
|
445
438
|
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
|
439
|
+
{ "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
|
446
440
|
};
|
447
441
|
|
448
442
|
// since samplers names are written multiple ways
|
@@ -489,6 +483,7 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
|
|
489
483
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
|
490
484
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
491
485
|
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
|
486
|
+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
|
492
487
|
};
|
493
488
|
|
494
489
|
std::vector<common_sampler_type> samplers;
|