cui-llama.rn 1.0.6 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/android/src/main/jni.cpp +2 -2
- package/cpp/common.cpp +68 -29
- package/cpp/common.h +23 -4
- package/cpp/ggml-aarch64.c +16 -14
- package/cpp/ggml-backend.c +15 -10
- package/cpp/ggml-impl.h +4 -6
- package/cpp/ggml-metal.h +2 -0
- package/cpp/ggml-metal.m +54 -21
- package/cpp/ggml-quants.c +8 -8
- package/cpp/ggml-quants.h +4 -0
- package/cpp/ggml.c +81 -12
- package/cpp/ggml.h +6 -4
- package/cpp/llama-impl.h +15 -0
- package/cpp/llama-vocab.cpp +10 -16
- package/cpp/llama-vocab.h +2 -0
- package/cpp/llama.cpp +434 -265
- package/cpp/llama.h +4 -1
- package/cpp/rn-llama.hpp +7 -6
- package/ios/RNLlamaContext.mm +1 -1
- package/jest/mock.js +3 -0
- package/package.json +1 -1
package/cpp/llama.h
CHANGED
@@ -345,7 +345,7 @@ extern "C" {
|
|
345
345
|
int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
|
346
346
|
enum llama_ftype ftype; // quantize to this llama_ftype
|
347
347
|
enum lm_ggml_type output_tensor_type; // output tensor type
|
348
|
-
enum lm_ggml_type token_embedding_type; //
|
348
|
+
enum lm_ggml_type token_embedding_type; // token embeddings tensor type
|
349
349
|
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
350
350
|
bool quantize_output_tensor; // quantize output.weight
|
351
351
|
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
@@ -504,6 +504,9 @@ extern "C" {
|
|
504
504
|
// Returns true if the model contains an encoder that requires llama_encode() call
|
505
505
|
LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
|
506
506
|
|
507
|
+
// Returns true if the model contains a decoder that requires llama_decode() call
|
508
|
+
LLAMA_API bool llama_model_has_decoder(const struct llama_model * model);
|
509
|
+
|
507
510
|
// For encoder-decoder models, this function returns id of the token that must be provided
|
508
511
|
// to the decoder to start generating output sequence. For other models, it returns -1.
|
509
512
|
LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
|
package/cpp/rn-llama.hpp
CHANGED
@@ -6,13 +6,10 @@
|
|
6
6
|
#include "common.h"
|
7
7
|
#include "llama.h"
|
8
8
|
|
9
|
-
|
10
9
|
#include <android/log.h>
|
11
10
|
#define LLAMA_ANDROID_TAG "RNLLAMA_LOG_ANDROID"
|
12
11
|
#define LLAMA_LOG_INFO(...) __android_log_print(ANDROID_LOG_INFO , LLAMA_ANDROID_TAG, __VA_ARGS__)
|
13
12
|
|
14
|
-
|
15
|
-
|
16
13
|
namespace rnllama {
|
17
14
|
|
18
15
|
static void llama_batch_clear(llama_batch *batch) {
|
@@ -227,7 +224,9 @@ struct llama_rn_context
|
|
227
224
|
bool loadModel(gpt_params ¶ms_)
|
228
225
|
{
|
229
226
|
params = params_;
|
230
|
-
|
227
|
+
llama_init_result result = llama_init_from_gpt_params(params);
|
228
|
+
model = result.model;
|
229
|
+
ctx = result.context;
|
231
230
|
if (model == nullptr)
|
232
231
|
{
|
233
232
|
LOG_ERROR("unable to load model: %s", params_.model.c_str());
|
@@ -240,9 +239,11 @@ struct llama_rn_context
|
|
240
239
|
bool validateModelChatTemplate() const {
|
241
240
|
llama_chat_message chat[] = {{"user", "test"}};
|
242
241
|
|
243
|
-
|
242
|
+
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
243
|
+
std::string template_key = "tokenizer.chat_template";
|
244
|
+
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
244
245
|
|
245
|
-
return res
|
246
|
+
return res >= 0;
|
246
247
|
}
|
247
248
|
|
248
249
|
void truncatePrompt(std::vector<llama_token> &prompt_tokens) {
|
package/ios/RNLlamaContext.mm
CHANGED
@@ -102,7 +102,7 @@
|
|
102
102
|
for (int i = 0; i < count; i++) {
|
103
103
|
char key[256];
|
104
104
|
llama_model_meta_key_by_index(llama->model, i, key, sizeof(key));
|
105
|
-
char val[
|
105
|
+
char val[2048];
|
106
106
|
llama_model_meta_val_str_by_index(llama->model, i, val, sizeof(val));
|
107
107
|
|
108
108
|
NSString *keyStr = [NSString stringWithUTF8String:key];
|
package/jest/mock.js
CHANGED