npm - cui-llama.rn - Versions diffs - 1.6.0 → 1.7.0 - Mend

cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (285) hide show

package/README.md CHANGED Viewed

@@ -123,22 +123,50 @@ console.log('Result:', textResult.text)
 console.log('Timings:', textResult.timings)
 ```
-The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp, so you can map its API to LlamaContext:
+The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp:
 - `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
 - `/tokenize`: `context.tokenize(content)`
 - `/detokenize`: `context.detokenize(tokens)`
 - `/embedding`: `context.embedding(content)`
-- Other methods
-  - `context.loadSession(path)`
-  - `context.saveSession(path)`
-  - `context.stopCompletion()`
-  - `context.release()`
+- ... Other methods
 Please visit the [Documentation](docs/API) for more details.
 You can also visit the [example](example) to see how to use it.
+## Session (State)
+The session file is a binary file that contains the state of the context, it can saves time of prompt processing.
+```js
+const context = await initLlama({ ...params })
+// After prompt processing or completion ...
+// Save the session
+await context.saveSession('<path to save session>')
+// Load the session
+await context.loadSession('<path to load session>')
+```
+## Embedding
+The embedding API is used to get the embedding of a text.
+```js
+const context = await initLlama({
+  ...params,
+  embedding: true,
+})
+const { embedding } = await context.embedding('Hello, world!')
+```
+- You can use model like [nomic-ai/nomic-embed-text-v1.5-GGUF](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF) for better embedding quality.
+- You can use DB like [op-sqlite](https://github.com/OP-Engineering/op-sqlite) with sqlite-vec support to store and search embeddings.
 ## Tool Calling
 `llama.rn` has universal tool call support by using [minja](https://github.com/google/minja) (as Jinja template parser) and [chat.cpp](https://github.com/ggerganov/llama.cpp/blob/master/common/chat.cpp) in llama.cpp.
@@ -273,7 +301,7 @@ jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
 iOS:
-- The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
+- The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) and [Increased Memory Limit](https://developer.apple.com/documentation/bundleresources/entitlements/com.apple.developer.kernel.increased-memory-limit?language=objc) capabilities are recommended to enable on iOS project.
 - Metal:
   - We have tested to know some devices is not able to use Metal (GPU) due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
   - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.

package/android/src/main/CMakeLists.txt CHANGED Viewed

@@ -11,7 +11,11 @@ endif(CCACHE_FOUND)
 set(CMAKE_CXX_STANDARD 17)
 set(RNLLAMA_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
-include_directories(${RNLLAMA_LIB_DIR})
+include_directories(
+    ${RNLLAMA_LIB_DIR}
+    ${RNLLAMA_LIB_DIR}/ggml-cpu
+    ${RNLLAMA_LIB_DIR}/tools/mtmd
+)
 set(
     SOURCE_FILES
@@ -19,21 +23,29 @@ set(
     ${RNLLAMA_LIB_DIR}/ggml-alloc.c
     ${RNLLAMA_LIB_DIR}/ggml-backend.cpp
     ${RNLLAMA_LIB_DIR}/ggml-backend-reg.cpp
-    ${RNLLAMA_LIB_DIR}/ops.cpp
-    ${RNLLAMA_LIB_DIR}/unary-ops.cpp
-    ${RNLLAMA_LIB_DIR}/binary-ops.cpp
-    ${RNLLAMA_LIB_DIR}/vec.cpp
-    ${RNLLAMA_LIB_DIR}/ggml-cpu.c
-    ${RNLLAMA_LIB_DIR}/ggml-cpu.cpp
-    ${RNLLAMA_LIB_DIR}/ggml-cpu-aarch64.cpp
-    ${RNLLAMA_LIB_DIR}/ggml-cpu-quants.c
-    ${RNLLAMA_LIB_DIR}/ggml-cpu-traits.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/amx/amx.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/amx/mmq.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.c
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-aarch64.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-quants.c
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ggml-cpu-traits.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/unary-ops.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/binary-ops.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/sgemm.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/vec.cpp
+    ${RNLLAMA_LIB_DIR}/ggml-cpu/ops.cpp
     ${RNLLAMA_LIB_DIR}/ggml-opt.cpp
     ${RNLLAMA_LIB_DIR}/ggml-threading.cpp
     ${RNLLAMA_LIB_DIR}/ggml-quants.c
     ${RNLLAMA_LIB_DIR}/gguf.cpp
     ${RNLLAMA_LIB_DIR}/log.cpp
     ${RNLLAMA_LIB_DIR}/llama-impl.cpp
+    # Multimodal support
+    ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
+    ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
+    ${RNLLAMA_LIB_DIR}/tools/mtmd/clip.cpp
+    ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-helper.cpp
     ${RNLLAMA_LIB_DIR}/llama-grammar.cpp
     ${RNLLAMA_LIB_DIR}/llama-sampling.cpp
     ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
@@ -56,7 +68,6 @@ set(
     ${RNLLAMA_LIB_DIR}/sampling.cpp
     ${RNLLAMA_LIB_DIR}/unicode-data.cpp
     ${RNLLAMA_LIB_DIR}/unicode.cpp
-    ${RNLLAMA_LIB_DIR}/sgemm.cpp
     ${RNLLAMA_LIB_DIR}/common.cpp
     ${RNLLAMA_LIB_DIR}/chat.cpp
     ${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp

package/android/src/main/java/com/rnllama/LlamaContext.java CHANGED Viewed

@@ -170,6 +170,8 @@ public class LlamaContext {
       params.hasKey("rope_freq_scale") ? (float) params.getDouble("rope_freq_scale") : 0.0f,
       // int pooling_type,
       params.hasKey("pooling_type") ? params.getInt("pooling_type") : -1,
+      // boolean ctx_shift,
+      params.hasKey("ctx_shift") ? params.getBoolean("ctx_shift") : true,
       // LoadProgressCallback load_progress_callback
       params.hasKey("use_progress_callback") ? new LoadProgressCallback(this) : null
     );
@@ -367,6 +369,8 @@ public class LlamaContext {
       params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
       // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
       params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
+      // String[] media_paths
+      params.hasKey("media_paths") ? params.getArray("media_paths").toArrayList().toArray(new String[0]) : new String[0],
       // PartialCompletionCallback partial_completion_callback
       new PartialCompletionCallback(
         this,
@@ -387,10 +391,8 @@ public class LlamaContext {
     return isPredicting(this.context);
   }
-  public WritableMap tokenize(String text) {
-    WritableMap result = Arguments.createMap();
-    result.putArray("tokens", tokenize(this.context, text));
-    return result;
+  public WritableMap tokenize(String text, ReadableArray media_paths) {
+    return tokenize(this.context, text, media_paths == null ? new String[0] : media_paths.toArrayList().toArray(new String[0]));
   }
   public String detokenize(ReadableArray tokens) {
@@ -437,6 +439,34 @@ public class LlamaContext {
     return getLoadedLoraAdapters(this.context);
   }
+  public boolean initMultimodal(ReadableMap params) {
+    String mmprojPath = params.getString("path");
+    boolean mmprojUseGpu = params.hasKey("use_gpu") ? params.getBoolean("use_gpu") : true;
+    if (mmprojPath == null || mmprojPath.isEmpty()) {
+      throw new IllegalArgumentException("mmproj_path is empty");
+    }
+    File file = new File(mmprojPath);
+    if (!file.exists()) {
+      throw new IllegalArgumentException("mmproj file does not exist: " + mmprojPath);
+    }
+    return initMultimodal(this.context, mmprojPath, mmprojUseGpu);
+  }
+  public boolean isMultimodalEnabled() {
+    return isMultimodalEnabled(this.context);
+  }
+  public WritableMap getMultimodalSupport() {
+    if (!isMultimodalEnabled()) {
+      throw new IllegalStateException("Multimodal is not enabled");
+    }
+    return getMultimodalSupport(this.context);
+  }
+  public void releaseMultimodal() {
+    releaseMultimodal(this.context);
+  }
   public void release() {
     freeContext(context);
   }
@@ -536,7 +566,7 @@ public class LlamaContext {
     String[] skip
   );
   protected static native long initContext(
-    String model,
+    String model_path,
     String chat_template,
     String reasoning_format,
     boolean embedding,
@@ -558,8 +588,12 @@ public class LlamaContext {
     float rope_freq_base,
     float rope_freq_scale,
     int pooling_type,
+    boolean ctx_shift,
     LoadProgressCallback load_progress_callback
   );
+  protected static native boolean initMultimodal(long contextPtr, String mmproj_path, boolean MMPROJ_USE_GPU);
+  protected static native boolean isMultimodalEnabled(long contextPtr);
+  protected static native WritableMap getMultimodalSupport(long contextPtr);
   protected static native void interruptLoad(long contextPtr);
   protected static native WritableMap loadModelDetails(
     long contextPtr
@@ -623,11 +657,12 @@ public class LlamaContext {
     int dry_penalty_last_n,
     float top_n_sigma,
     String[] dry_sequence_breakers,
+    String[] media_paths,
     PartialCompletionCallback partial_completion_callback
   );
   protected static native void stopCompletion(long contextPtr);
   protected static native boolean isPredicting(long contextPtr);
-  protected static native WritableArray tokenize(long contextPtr, String text);
+  protected static native WritableMap tokenize(long contextPtr, String text, String[] media_paths);
   protected static native String detokenize(long contextPtr, int[] tokens);
   protected static native boolean isEmbeddingEnabled(long contextPtr);
   protected static native WritableMap embedding(
@@ -642,4 +677,5 @@ public class LlamaContext {
   protected static native void freeContext(long contextPtr);
   protected static native void setupLog(NativeLogCallback logCallback);
   protected static native void unsetLog();
+  protected static native void releaseMultimodal(long contextPtr);
 }

package/android/src/main/java/com/rnllama/RNLlama.java CHANGED Viewed

@@ -322,7 +322,7 @@ public class RNLlama implements LifecycleEventListener {
     tasks.put(task, "stopCompletion-" + contextId);
   }
-  public void tokenizeAsync(double id, final String text, final Promise promise) {
+  public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
     final int contextId = (int) id;
     AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
       private Exception exception;
@@ -334,7 +334,7 @@ public class RNLlama implements LifecycleEventListener {
           if (context == null) {
             throw new Exception("Context not found");
           }
-          return context.tokenize(text);
+          return context.tokenize(text, media_paths);
         } catch (Exception e) {
           exception = e;
         }
@@ -354,13 +354,13 @@ public class RNLlama implements LifecycleEventListener {
     tasks.put(task, "tokenize-" + contextId);
   }
-  public WritableMap tokenizeSync(double id, final String text) {
+  public WritableMap tokenizeSync(double id, final String text, final ReadableArray image_paths) {
     int contextId = (int) id;
     LlamaContext context = contexts.get(contextId);
     if (context == null) {
       return Arguments.createMap();
     }
-    return context.tokenize(text);
+    return context.tokenize(text, image_paths);
   }
   public void getCpuFeatures(Promise promise) {
@@ -597,6 +597,141 @@ public class RNLlama implements LifecycleEventListener {
     tasks.put(task, "getLoadedLoraAdapters-" + contextId);
   }
+  public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
+      private Exception exception;
+      @Override
+      protected Boolean doInBackground(Void... voids) {
+        try {
+          LlamaContext context = contexts.get(contextId);
+          if (context == null) {
+            throw new Exception("Context not found");
+          }
+          if (context.isPredicting()) {
+            throw new Exception("Context is busy");
+          }
+          return context.initMultimodal(params);
+        } catch (Exception e) {
+          exception = e;
+        }
+        return false;
+      }
+      @Override
+      protected void onPostExecute(Boolean result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(result);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "initMultimodal-" + contextId);
+  }
+  public void isMultimodalEnabled(double id, final Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
+      private Exception exception;
+      @Override
+      protected Boolean doInBackground(Void... voids) {
+        try {
+          LlamaContext context = contexts.get(contextId);
+          if (context == null) {
+            throw new Exception("Context not found");
+          }
+          return context.isMultimodalEnabled();
+        } catch (Exception e) {
+          exception = e;
+        }
+        return false;
+      }
+      @Override
+      protected void onPostExecute(Boolean result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(result);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "isMultimodalEnabled" + contextId);
+  }
+  public void getMultimodalSupport(double id, final Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
+      private Exception exception;
+      @Override
+      protected WritableMap doInBackground(Void... voids) {
+        try {
+          LlamaContext context = contexts.get(contextId);
+          if (context == null) {
+            throw new Exception("Context not found");
+          }
+          if (!context.isMultimodalEnabled()) {
+            throw new Exception("Multimodal is not enabled");
+          }
+          return context.getMultimodalSupport();
+        } catch (Exception e) {
+          exception = e;
+        }
+        return null;
+      }
+      @Override
+      protected void onPostExecute(WritableMap result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(result);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "getMultimodalSupport-" + contextId);
+  }
+  @ReactMethod
+  public void releaseMultimodal(double id, final Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, Void>() {
+      private Exception exception;
+      @Override
+      protected Void doInBackground(Void... voids) {
+        try {
+          LlamaContext context = contexts.get(contextId);
+          if (context == null) {
+            throw new Exception("Context not found");
+          }
+          context.releaseMultimodal();
+        } catch (Exception e) {
+          exception = e;
+        }
+        return null;
+      }
+      @Override
+      protected void onPostExecute(Void result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(null);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "releaseMultimodal" + id);
+  }
   public void releaseContext(double id, Promise promise) {
     final int contextId = (int) id;
     AsyncTask task = new AsyncTask<Void, Void, Void>() {