npm - cui-llama.rn - Versions diffs - 1.0.6 → 1.0.9 - Mend

cui-llama.rn 1.0.6 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/cpp/llama.h CHANGED Viewed

@@ -345,7 +345,7 @@ extern "C" {
         int32_t nthread;                     // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
         enum llama_ftype ftype;              // quantize to this llama_ftype
         enum lm_ggml_type output_tensor_type;   // output tensor type
-        enum lm_ggml_type token_embedding_type; // itoken embeddings tensor type
+        enum lm_ggml_type token_embedding_type; // token embeddings tensor type
         bool allow_requantize;               // allow quantizing non-f32/f16 tensors
         bool quantize_output_tensor;         // quantize output.weight
         bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
@@ -504,6 +504,9 @@ extern "C" {
     // Returns true if the model contains an encoder that requires llama_encode() call
     LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
+    // Returns true if the model contains a decoder that requires llama_decode() call
+    LLAMA_API bool llama_model_has_decoder(const struct llama_model * model);
     // For encoder-decoder models, this function returns id of the token that must be provided
     // to the decoder to start generating output sequence. For other models, it returns -1.
     LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);

package/cpp/rn-llama.hpp CHANGED Viewed

@@ -6,13 +6,10 @@
 #include "common.h"
 #include "llama.h"
 #include <android/log.h>
 #define LLAMA_ANDROID_TAG "RNLLAMA_LOG_ANDROID"
 #define LLAMA_LOG_INFO(...)  __android_log_print(ANDROID_LOG_INFO , LLAMA_ANDROID_TAG, __VA_ARGS__)
 namespace rnllama {
 static void llama_batch_clear(llama_batch *batch) {
@@ -227,7 +224,9 @@ struct llama_rn_context
     bool loadModel(gpt_params &params_)
     {
         params = params_;
-        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        llama_init_result result = llama_init_from_gpt_params(params);
+        model = result.model;
+        ctx = result.context;
         if (model == nullptr)
         {
            LOG_ERROR("unable to load model: %s", params_.model.c_str());
@@ -240,9 +239,11 @@ struct llama_rn_context
     bool validateModelChatTemplate() const {
         llama_chat_message chat[] = {{"user", "test"}};
-        const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
+        std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
+        std::string template_key = "tokenizer.chat_template";
+        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
-        return res > 0;
+        return res >= 0;
     }
     void truncatePrompt(std::vector<llama_token> &prompt_tokens) {

package/ios/RNLlamaContext.mm CHANGED Viewed

@@ -102,7 +102,7 @@
     for (int i = 0; i < count; i++) {
         char key[256];
         llama_model_meta_key_by_index(llama->model, i, key, sizeof(key));
-        char val[256];
+        char val[2048];
         llama_model_meta_val_str_by_index(llama->model, i, val, sizeof(val));
         NSString *keyStr = [NSString stringWithUTF8String:key];

package/jest/mock.js CHANGED Viewed

@@ -10,6 +10,9 @@ if (!NativeModules.RNLlama) {
       }),
     ),
+    // TODO: Use jinja parser
+    getFormattedChat: jest.fn(() => ''),
     completion: jest.fn(async (contextId, jobId) => {
       const testResult = {
         text: '*giggles*',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cui-llama.rn",
-  "version": "1.0.6",
+  "version": "1.0.9",
   "description": "Fork of llama.rn for ChatterUI",
   "main": "lib/commonjs/index",
   "module": "lib/module/index",