npm - cui-llama.rn - Versions diffs - 1.6.0 → 1.7.0 - Mend

cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (285) hide show

package/android/src/main/jni.cpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #include <string>
 #include <thread>
 #include <unordered_map>
+#include "json.hpp"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
 #include "chat.h"
@@ -252,6 +253,7 @@ Java_com_rnllama_LlamaContext_initContext(
     jfloat rope_freq_base,
     jfloat rope_freq_scale,
     jint pooling_type,
+    jboolean ctx_shift,
     jobject load_progress_callback
 ) {
     UNUSED(thiz);
@@ -264,7 +266,7 @@ Java_com_rnllama_LlamaContext_initContext(
     }
     const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
-    defaultParams.model = { model_path_chars };
+    defaultParams.model.path = model_path_chars;
     const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
     defaultParams.chat_template = chat_template_chars;
@@ -279,6 +281,7 @@ Java_com_rnllama_LlamaContext_initContext(
     defaultParams.n_ctx = n_ctx;
     defaultParams.n_batch = n_batch;
     defaultParams.n_ubatch = n_ubatch;
+    defaultParams.ctx_shift = ctx_shift;
     if (pooling_type != -1) {
         defaultParams.pooling_type = static_cast<enum llama_pooling_type>(pooling_type);
@@ -298,7 +301,7 @@ Java_com_rnllama_LlamaContext_initContext(
     int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
     defaultParams.cpuparams.n_threads = n_threads > 0 ? n_threads : default_n_threads;
-    // defaultParams.n_gpu_layers = n_gpu_layers;
+    defaultParams.n_gpu_layers = n_gpu_layers;
     defaultParams.flash_attn = flash_attn;
     const char *cache_type_k_chars = env->GetStringUTFChars(cache_type_k, nullptr);
@@ -534,9 +537,15 @@ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
             pushString(env, additional_stops, stop.c_str());
         }
         putArray(env, result, "additional_stops", additional_stops);
+    } catch (const nlohmann::json_abi_v3_11_3::detail::parse_error& e) {
+        std::string errorMessage = "JSON parse error in getFormattedChat: " + std::string(e.what());
+        putString(env, result, "_error", errorMessage.c_str());
+        LOGI("[RNLlama] %s", errorMessage.c_str());
     } catch (const std::runtime_error &e) {
-        LOGI("[RNLlama] Error: %s", e.what());
         putString(env, result, "_error", e.what());
+        LOGI("[RNLlama] Error: %s", e.what());
+    } catch (...) {
+        putString(env, result, "_error", "Unknown error in getFormattedChat");
     }
     env->ReleaseStringUTFChars(tools, tools_chars);
     env->ReleaseStringUTFChars(messages, messages_chars);
@@ -591,6 +600,12 @@ Java_com_rnllama_LlamaContext_loadSession(
     llama->embd.resize(n_token_count_out);
     env->ReleaseStringUTFChars(path, path_chars);
+    // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
+    auto null_token_iter = std::find(llama->embd.begin(), llama->embd.end(), LLAMA_TOKEN_NULL);
+    if (null_token_iter != llama->embd.end()) {
+        llama->embd.resize(std::distance(llama->embd.begin(), null_token_iter));
+    }
     const std::string text = rnllama::tokens_to_str(llama->ctx, llama->embd.cbegin(), llama->embd.cend());
     putInt(env, result, "tokens_loaded", n_token_count_out);
     putString(env, result, "prompt", text.c_str());
@@ -611,6 +626,13 @@ Java_com_rnllama_LlamaContext_saveSession(
     const char *path_chars = env->GetStringUTFChars(path, nullptr);
     std::vector<llama_token> session_tokens = llama->embd;
+    // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
+    auto null_token_iter = std::find(session_tokens.begin(), session_tokens.end(), LLAMA_TOKEN_NULL);
+    if (null_token_iter != session_tokens.end()) {
+        session_tokens.resize(std::distance(session_tokens.begin(), null_token_iter));
+    }
     int default_size = session_tokens.size();
     int save_size = size > 0 && size <= default_size ? size : default_size;
     if (!llama_state_save_file(llama->ctx, path_chars, session_tokens.data(), save_size)) {
@@ -685,6 +707,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
     jint dry_penalty_last_n,
     jfloat top_n_sigma,
     jobjectArray dry_sequence_breakers,
+    jobjectArray media_paths,
     jobject partial_completion_callback
 ) {
     UNUSED(thiz);
@@ -694,8 +717,32 @@ Java_com_rnllama_LlamaContext_doCompletion(
     //llama_reset_timings(llama->ctx);
-    auto prompt_chars = env->GetStringUTFChars(prompt, nullptr);
+    const char *prompt_chars = env->GetStringUTFChars(prompt, nullptr);
+    // Set the prompt parameter
     llama->params.prompt = prompt_chars;
+    // Process image paths if provided
+    std::vector<std::string> media_paths_vector;
+    jint media_paths_size = env->GetArrayLength(media_paths);
+    if (media_paths_size > 0) {
+        // Check if multimodal is enabled
+        if (!llama->isMultimodalEnabled()) {
+            auto result = createWriteableMap(env);
+            putString(env, result, "error", "Multimodal support not enabled. Call initMultimodal first.");
+            env->ReleaseStringUTFChars(prompt, prompt_chars);
+            return reinterpret_cast<jobject>(result);
+        }
+        for (jint i = 0; i < media_paths_size; i++) {
+            jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
+            const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
+            media_paths_vector.push_back(image_path_chars);
+            env->ReleaseStringUTFChars(image_path, image_path_chars);
+        }
+    }
     llama->params.sampling.seed = (seed == -1) ? time(NULL) : seed;
     int max_threads = std::thread::hardware_concurrency();
@@ -852,8 +899,23 @@ Java_com_rnllama_LlamaContext_doCompletion(
         putString(env, result, "error", "Failed to initialize sampling");
         return reinterpret_cast<jobject>(result);
     }
     llama->beginCompletion();
-    llama->loadPrompt();
+    try {
+        llama->loadPrompt(media_paths_vector);
+    } catch (const std::exception &e) {
+        llama->endCompletion();
+        auto result = createWriteableMap(env);
+        putString(env, result, "error", e.what());
+        return reinterpret_cast<jobject>(result);
+    }
+    if (llama->context_full) {
+        llama->endCompletion();
+        auto result = createWriteableMap(env);
+        putString(env, result, "error", "Context is full");
+        return reinterpret_cast<jobject>(result);
+    }
     size_t sent_count = 0;
     size_t sent_token_probs_index = 0;
@@ -916,9 +978,14 @@ Java_com_rnllama_LlamaContext_doCompletion(
     }
     env->ReleaseStringUTFChars(grammar, grammar_chars);
-    env->ReleaseStringUTFChars(prompt, prompt_chars);
+    // Release prompt_chars if it's still allocated
+    if (prompt_chars != nullptr) {
+        env->ReleaseStringUTFChars(prompt, prompt_chars);
+    }
     llama_perf_context_print(llama->ctx);
-    llama->is_predicting = false;
+    llama->endCompletion();
     auto toolCalls = createWritableArray(env);
     std::string reasoningContent = "";
@@ -945,7 +1012,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
                 toolCallsSize++;
             }
         } catch (const std::exception &e) {
-            // LOGI("Error parsing tool calls: %s", e.what());
+        } catch (...) {
         }
     }
@@ -964,6 +1031,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
     putInt(env, result, "tokens_predicted", llama->num_tokens_predicted);
     putInt(env, result, "tokens_evaluated", llama->num_prompt_tokens);
     putInt(env, result, "truncated", llama->truncated);
+    putBoolean(env, result, "context_full", llama->context_full);
     putInt(env, result, "stopped_eos", llama->stopped_eos);
     putInt(env, result, "stopped_word", llama->stopped_word);
     putInt(env, result, "stopped_limit", llama->stopped_limit);
@@ -1007,22 +1075,47 @@ Java_com_rnllama_LlamaContext_isPredicting(
 JNIEXPORT jobject JNICALL
 Java_com_rnllama_LlamaContext_tokenize(
-        JNIEnv *env, jobject thiz, jlong context_ptr, jstring text) {
+        JNIEnv *env, jobject thiz, jlong context_ptr, jstring text, jobjectArray media_paths) {
     UNUSED(thiz);
     auto llama = context_map[(long) context_ptr];
     const char *text_chars = env->GetStringUTFChars(text, nullptr);
+    std::vector<std::string> media_paths_vector;
+    for (int i = 0; i < env->GetArrayLength(media_paths); i++) {
+        jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
+        const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
+        media_paths_vector.push_back(image_path_chars);
+        env->ReleaseStringUTFChars(image_path, image_path_chars);
+    }
+    auto tokenize_result = llama->tokenize(text_chars, media_paths_vector);
+    auto result = createWriteableMap(env);
+    auto tokens = createWritableArray(env);
+    for (const auto &tok : tokenize_result.tokens) {
+      pushInt(env, tokens, tok);
+    }
+    putArray(env, result, "tokens", tokens);
-    const std::vector<llama_token> toks = common_tokenize(
-        llama->ctx,
-        text_chars,
-        false
-    );
+    putBoolean(env, result, "has_media", tokenize_result.has_media);
-    jobject result = createWritableArray(env);
-    for (const auto &tok : toks) {
-      pushInt(env, result, tok);
+    auto bitmap_hashes = createWritableArray(env);
+    for (const auto &hash : tokenize_result.bitmap_hashes) {
+      pushString(env, bitmap_hashes, hash.c_str());
     }
+    putArray(env, result, "bitmap_hashes", bitmap_hashes);
+    auto chunk_pos = createWritableArray(env);
+    for (const auto &pos : tokenize_result.chunk_pos) {
+      pushInt(env, chunk_pos, pos);
+    }
+    putArray(env, result, "chunk_pos", chunk_pos);
+    auto chunk_pos_media = createWritableArray(env);
+    for (const auto &pos : tokenize_result.chunk_pos_media) {
+      pushInt(env, chunk_pos_media, pos);
+    }
+    putArray(env, result, "chunk_pos_media", chunk_pos_media);
     env->ReleaseStringUTFChars(text, text_chars);
     return result;
@@ -1091,7 +1184,12 @@ Java_com_rnllama_LlamaContext_embedding(
     }
     llama->beginCompletion();
-    llama->loadPrompt();
+    try {
+        llama->loadPrompt({});
+    } catch (const std::exception &e) {
+        putString(env, result, "error", e.what());
+        return reinterpret_cast<jobject>(result);
+    }
     llama->doCompletion();
     std::vector<float> embedding = llama->getEmbedding(embdParams);
@@ -1260,4 +1358,61 @@ Java_com_rnllama_LlamaContext_unsetLog(JNIEnv *env, jobject thiz) {
     llama_log_set(rnllama_log_callback_default, NULL);
 }
+JNIEXPORT jboolean JNICALL
+Java_com_rnllama_LlamaContext_initMultimodal(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr,
+    jstring mmproj_path,
+    jboolean mmproj_use_gpu
+) {
+    UNUSED(thiz);
+    auto llama = context_map[(long) context_ptr];
+    const char *mmproj_path_chars = env->GetStringUTFChars(mmproj_path, nullptr);
+    bool result = llama->initMultimodal(mmproj_path_chars, mmproj_use_gpu);
+    env->ReleaseStringUTFChars(mmproj_path, mmproj_path_chars);
+    return result;
+}
+JNIEXPORT jboolean JNICALL
+Java_com_rnllama_LlamaContext_isMultimodalEnabled(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    auto llama = context_map[(long) context_ptr];
+    return llama->isMultimodalEnabled();
+}
+JNIEXPORT jobject JNICALL
+Java_com_rnllama_LlamaContext_getMultimodalSupport(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    auto llama = context_map[(long) context_ptr];
+    auto result = createWriteableMap(env);
+    putBoolean(env, result, "vision", llama->isMultimodalSupportVision());
+    putBoolean(env, result, "audio", llama->isMultimodalSupportAudio());
+    return result;
+}
+JNIEXPORT void JNICALL
+Java_com_rnllama_LlamaContext_releaseMultimodal(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    auto llama = context_map[(long) context_ptr];
+    llama->releaseMultimodal();
+}
 } // extern "C"

package/android/src/main/jniLibs/arm64-v8a/librnllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/librnllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so CHANGED Viewed

Binary file

package/android/src/newarch/java/com/rnllama/RNLlamaModule.java CHANGED Viewed

@@ -53,6 +53,26 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
     rnllama.initContext(id, params, promise);
   }
+  @ReactMethod
+  public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
+    rnllama.initMultimodal(id, params, promise);
+  }
+  @ReactMethod
+  public void isMultimodalEnabled(double id, final Promise promise) {
+    rnllama.isMultimodalEnabled(id, promise);
+  }
+  @ReactMethod
+  public void getMultimodalSupport(double id, final Promise promise) {
+    rnllama.getMultimodalSupport(id, promise);
+  }
+  @ReactMethod
+  public void releaseMultimodal(double id, final Promise promise) {
+    rnllama.releaseMultimodal(id, promise);
+  }
   @ReactMethod
   public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
     rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
@@ -79,13 +99,13 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
   }
   @ReactMethod
-  public void tokenizeAsync(double id, final String text, final Promise promise) {
-    rnllama.tokenizeAsync(id, text, promise);
+  public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
+    rnllama.tokenizeAsync(id, text, media_paths, promise);
   }
   @ReactMethod(isBlockingSynchronousMethod=true)
-  public WritableMap tokenizeSync(double id, final String text) {
-    return rnllama.tokenizeSync(id, text);
+  public WritableMap tokenizeSync(double id, final String text, final ReadableArray media_paths) {
+    return rnllama.tokenizeSync(id, text, media_paths);
   }
   @ReactMethod

package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java CHANGED Viewed

@@ -54,6 +54,26 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
     rnllama.initContext(id, params, promise);
   }
+  @ReactMethod
+  public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
+    rnllama.initMultimodal(id, params, promise);
+  }
+  @ReactMethod
+  public void isMultimodalEnabled(double id, final Promise promise) {
+    rnllama.isMultimodalEnabled(id, promise);
+  }
+  @ReactMethod
+  public void getMultimodalSupport(double id, final Promise promise) {
+    rnllama.getMultimodalSupport(id, promise);
+  }
+  @ReactMethod
+  public void releaseMultimodal(double id, final Promise promise) {
+    rnllama.releaseMultimodal(id, promise);
+  }
   @ReactMethod
   public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
     rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
@@ -80,12 +100,12 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
   }
   @ReactMethod
-  public void tokenizeAsync(double id, final String text, final Promise promise) {
+  public void tokenizeAsync(double id, final String text, final ReadableArray image_paths, final Promise promise) {
     rnllama.tokenizeAsync(id, text, promise);
   }
   @ReactMethod(isBlockingSynchronousMethod=true)
-  public WritableMap tokenizeSync(double id, final String text) {
+  public WritableMap tokenizeSync(double id, final String text, final ReadableArray image_paths) {
     return rnllama.tokenizeSync(id, text);
   }

package/cpp/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023-2024 The ggml authors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.