npm - cui-llama.rn - Versions diffs - 0.2.0 - Mend

cui-llama.rn 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/LICENSE +20 -0
package/README.md +330 -0
package/android/build.gradle +107 -0
package/android/gradle.properties +5 -0
package/android/src/main/AndroidManifest.xml +4 -0
package/android/src/main/CMakeLists.txt +69 -0
package/android/src/main/java/com/rnllama/LlamaContext.java +353 -0
package/android/src/main/java/com/rnllama/RNLlama.java +446 -0
package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -0
package/android/src/main/jni.cpp +635 -0
package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +94 -0
package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +95 -0
package/cpp/README.md +4 -0
package/cpp/common.cpp +3237 -0
package/cpp/common.h +467 -0
package/cpp/ggml-aarch64.c +2193 -0
package/cpp/ggml-aarch64.h +39 -0
package/cpp/ggml-alloc.c +1041 -0
package/cpp/ggml-alloc.h +76 -0
package/cpp/ggml-backend-impl.h +153 -0
package/cpp/ggml-backend.c +2225 -0
package/cpp/ggml-backend.h +236 -0
package/cpp/ggml-common.h +1829 -0
package/cpp/ggml-impl.h +655 -0
package/cpp/ggml-metal.h +65 -0
package/cpp/ggml-metal.m +3273 -0
package/cpp/ggml-quants.c +15022 -0
package/cpp/ggml-quants.h +132 -0
package/cpp/ggml.c +22034 -0
package/cpp/ggml.h +2444 -0
package/cpp/grammar-parser.cpp +536 -0
package/cpp/grammar-parser.h +29 -0
package/cpp/json-schema-to-grammar.cpp +1045 -0
package/cpp/json-schema-to-grammar.h +8 -0
package/cpp/json.hpp +24766 -0
package/cpp/llama.cpp +21789 -0
package/cpp/llama.h +1201 -0
package/cpp/log.h +737 -0
package/cpp/rn-llama.hpp +630 -0
package/cpp/sampling.cpp +460 -0
package/cpp/sampling.h +160 -0
package/cpp/sgemm.cpp +1027 -0
package/cpp/sgemm.h +14 -0
package/cpp/unicode-data.cpp +7032 -0
package/cpp/unicode-data.h +20 -0
package/cpp/unicode.cpp +812 -0
package/cpp/unicode.h +64 -0
package/ios/RNLlama.h +11 -0
package/ios/RNLlama.mm +302 -0
package/ios/RNLlama.xcodeproj/project.pbxproj +278 -0
package/ios/RNLlamaContext.h +39 -0
package/ios/RNLlamaContext.mm +426 -0
package/jest/mock.js +169 -0
package/lib/commonjs/NativeRNLlama.js +10 -0
package/lib/commonjs/NativeRNLlama.js.map +1 -0
package/lib/commonjs/grammar.js +574 -0
package/lib/commonjs/grammar.js.map +1 -0
package/lib/commonjs/index.js +151 -0
package/lib/commonjs/index.js.map +1 -0
package/lib/module/NativeRNLlama.js +3 -0
package/lib/module/NativeRNLlama.js.map +1 -0
package/lib/module/grammar.js +566 -0
package/lib/module/grammar.js.map +1 -0
package/lib/module/index.js +129 -0
package/lib/module/index.js.map +1 -0
package/lib/typescript/NativeRNLlama.d.ts +107 -0
package/lib/typescript/NativeRNLlama.d.ts.map +1 -0
package/lib/typescript/grammar.d.ts +38 -0
package/lib/typescript/grammar.d.ts.map +1 -0
package/lib/typescript/index.d.ts +46 -0
package/lib/typescript/index.d.ts.map +1 -0
package/llama-rn.podspec +56 -0
package/package.json +230 -0
package/src/NativeRNLlama.ts +132 -0
package/src/grammar.ts +849 -0
package/src/index.ts +182 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+MIT License
+Copyright (c) 2023 Jhen-Jie Hong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,330 @@
+# cui-llama.rn
+This is a fork of llama.rn meant for ChatterUI
+Original repo README.md below.
+# llama.rn
+[![Actions Status](https://github.com/mybigday/llama.rn/workflows/CI/badge.svg)](https://github.com/mybigday/llama.rn/actions)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![npm](https://img.shields.io/npm/v/llama.rn.svg)](https://www.npmjs.com/package/llama.rn/)
+React Native binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
+[llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
+## Installation
+```sh
+npm install llama.rn
+```
+#### iOS
+Please re-run `npx pod-install` again.
+#### Android
+Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
+```proguard
+# llama.rn
+-keep class com.rnllama.** { *; }
+```
+## Obtain the model
+You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggingface.co/search/full-text?q=GGUF&type=model)).
+For create a GGUF model manually, for example in Llama 2:
+Download the Llama 2 model
+1. Request access from [here](https://ai.meta.com/llama)
+2. Download the model from HuggingFace [here](https://huggingface.co/meta-llama/Llama-2-7b-chat) (`Llama-2-7b-chat`)
+Convert the model to ggml format
+```bash
+# Start with submodule in this repo (or you can clone the repo https://github.com/ggerganov/llama.cpp.git)
+yarn && yarn bootstrap
+cd llama.cpp
+# install Python dependencies
+python3 -m pip install -r requirements.txt
+# Move the Llama model weights to the models folder
+mv <path to Llama-2-7b-chat> ./models/7B
+# convert the 7B model to ggml FP16 format
+python3 convert.py models/7B/ --outtype f16
+# Build the quantize tool
+make quantize
+# quantize the model to 2-bits (using q2_k method)
+./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q2_k.gguf q2_k
+# quantize the model to 4-bits (using q4_0 method)
+./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0
+```
+## Usage
+```js
+import { initLlama } from 'llama.rn'
+// Initial a Llama context with the model (may take a while)
+const context = await initLlama({
+  model: 'file://<path to gguf model>',
+  use_mlock: true,
+  n_ctx: 2048,
+  n_gpu_layers: 1, // > 0: enable Metal on iOS
+  // embedding: true, // use embedding
+})
+// Do completion
+const { text, timings } = await context.completion(
+  {
+    prompt:
+      'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
+    n_predict: 100,
+    stop: ['</s>', 'Llama:', 'User:'],
+    // n_threads: 4,
+  },
+  (data) => {
+    // This is a partial completion callback
+    const { token } = data
+  },
+)
+console.log('Result:', text)
+console.log('Timings:', timings)
+```
+The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp, so you can map its API to LlamaContext:
+- `/completion`: `context.completion(params, partialCompletionCallback)`
+- `/tokenize`: `context.tokenize(content)`
+- `/detokenize`: `context.detokenize(tokens)`
+- `/embedding`: `context.embedding(content)`
+- Other methods
+  - `context.loadSession(path)`
+  - `context.saveSession(path)`
+  - `context.stopCompletion()`
+  - `context.release()`
+Please visit the [Documentation](docs/API) for more details.
+You can also visit the [example](example) to see how to use it.
+Run the example:
+```bash
+yarn && yarn bootstrap
+# iOS
+yarn example ios
+# Use device
+yarn example ios --device "<device name>"
+# With release mode
+yarn example ios --mode Release
+# Android
+yarn example android
+# With release mode
+yarn example android --mode release
+```
+This example used [react-native-document-picker](https://github.com/rnmods/react-native-document-picker) for select model.
+- iOS: You can move the model to iOS Simulator, or iCloud for real device.
+- Android: Selected file will be copied or downloaded to cache directory so it may be slow.
+## Grammar Sampling
+GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis.
+You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for more details.
+`llama.rn` provided a built-in function to convert JSON Schema to GBNF:
+```js
+import { initLlama, convertJsonSchemaToGrammar } from 'llama.rn'
+const schema = {
+  /* JSON Schema, see below */
+}
+const context = await initLlama({
+  model: 'file://<path to gguf model>',
+  use_mlock: true,
+  n_ctx: 2048,
+  n_gpu_layers: 1, // > 0: enable Metal on iOS
+  // embedding: true, // use embedding
+  grammar: convertJsonSchemaToGrammar({
+    schema,
+    propOrder: { function: 0, arguments: 1 },
+  }),
+})
+const { text } = await context.completion({
+  prompt: 'Schedule a birthday party on Aug 14th 2023 at 8pm.',
+})
+console.log('Result:', text)
+// Example output:
+// {"function": "create_event","arguments":{"date": "Aug 14th 2023", "time": "8pm", "title": "Birthday Party"}}
+```
+<details>
+<summary>JSON Schema example (Define function get_current_weather / create_event / image_search)</summary>
+```json5
+{
+  oneOf: [
+    {
+      type: 'object',
+      name: 'get_current_weather',
+      description: 'Get the current weather in a given location',
+      properties: {
+        function: {
+          const: 'get_current_weather',
+        },
+        arguments: {
+          type: 'object',
+          properties: {
+            location: {
+              type: 'string',
+              description: 'The city and state, e.g. San Francisco, CA',
+            },
+            unit: {
+              type: 'string',
+              enum: ['celsius', 'fahrenheit'],
+            },
+          },
+          required: ['location'],
+        },
+      },
+    },
+    {
+      type: 'object',
+      name: 'create_event',
+      description: 'Create a calendar event',
+      properties: {
+        function: {
+          const: 'create_event',
+        },
+        arguments: {
+          type: 'object',
+          properties: {
+            title: {
+              type: 'string',
+              description: 'The title of the event',
+            },
+            date: {
+              type: 'string',
+              description: 'The date of the event',
+            },
+            time: {
+              type: 'string',
+              description: 'The time of the event',
+            },
+          },
+          required: ['title', 'date', 'time'],
+        },
+      },
+    },
+    {
+      type: 'object',
+      name: 'image_search',
+      description: 'Search for an image',
+      properties: {
+        function: {
+          const: 'image_search',
+        },
+        arguments: {
+          type: 'object',
+          properties: {
+            query: {
+              type: 'string',
+              description: 'The search query',
+            },
+          },
+          required: ['query'],
+        },
+      },
+    },
+  ],
+}
+```
+</details>
+<details>
+<summary>Converted GBNF looks like</summary>
+```bnf
+space ::= " "?
+0-function ::= "\"get_current_weather\""
+string ::=  "\"" (
+        [^"\\] |
+        "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+      )* "\"" space
+0-arguments-unit ::= "\"celsius\"" | "\"fahrenheit\""
+0-arguments ::= "{" space "\"location\"" space ":" space string "," space "\"unit\"" space ":" space 0-arguments-unit "}" space
+0 ::= "{" space "\"function\"" space ":" space 0-function "," space "\"arguments\"" space ":" space 0-arguments "}" space
+1-function ::= "\"create_event\""
+1-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
+1 ::= "{" space "\"function\"" space ":" space 1-function "," space "\"arguments\"" space ":" space 1-arguments "}" space
+2-function ::= "\"image_search\""
+2-arguments ::= "{" space "\"query\"" space ":" space string "}" space
+2 ::= "{" space "\"function\"" space ":" space 2-function "," space "\"arguments\"" space ":" space 2-arguments "}" space
+root ::= 0 | 1 | 2
+```
+</details>
+## Mock `llama.rn`
+We have provided a mock version of `llama.rn` for testing purpose you can use on Jest:
+```js
+jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
+```
+## NOTE
+iOS:
+- The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
+- Metal:
+  - We have tested to know some devices is not able to use Metal ('params.n_gpu_layers > 0') due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
+  - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.
+Android:
+- Currently only supported arm64-v8a / x86_64 platform, this means you can't initialize a context on another platforms. The 64-bit platform are recommended because it can allocate more memory for the model.
+- No integrated any GPU backend yet.
+## Contributing
+See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.
+## License
+MIT
+---
+Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
+---
+<p align="center">
+  <a href="https://bricks.tools">
+    <img width="90px" src="https://avatars.githubusercontent.com/u/17320237?s=200&v=4">
+  </a>
+  <p align="center">
+    Built and maintained by <a href="https://bricks.tools">BRICKS</a>.
+  </p>
+</p>

package/android/build.gradle ADDED Viewed

@@ -0,0 +1,107 @@
+buildscript {
+  repositories {
+    google()
+    mavenCentral()
+  }
+  dependencies {
+    classpath "com.android.tools.build:gradle:7.2.1"
+  }
+}
+def isNewArchitectureEnabled() {
+  return rootProject.hasProperty("newArchEnabled") && rootProject.getProperty("newArchEnabled") == "true"
+}
+apply plugin: "com.android.library"
+def appProject = rootProject.allprojects.find { it.plugins.hasPlugin('com.android.application') }
+if (isNewArchitectureEnabled()) {
+  apply plugin: "com.facebook.react"
+}
+def getExtOrDefault(name) {
+  return rootProject.ext.has(name) ? rootProject.ext.get(name) : project.properties["RNLlama_" + name]
+}
+def getExtOrIntegerDefault(name) {
+  return rootProject.ext.has(name) ? rootProject.ext.get(name) : (project.properties["RNLlama_" + name]).toInteger()
+}
+def reactNativeArchitectures() {
+  def value = project.getProperties().get("reactNativeArchitectures")
+  def archs = value ? value.split(",") : ["x86_64", "arm64-v8a"]
+  return archs.findAll { it != "armeabi-v7a" && it != "x86" } // Not building for 32-bit architectures
+}
+android {
+  ndkVersion getExtOrDefault("ndkVersion")
+  def ndkVersionMajor = ndkVersion.split("\\.")[0].toInteger()
+  if (ndkVersionMajor < 24) {
+    ndkVersion = project.properties["RNLlama_ndkversion"]
+  }
+  compileSdkVersion getExtOrIntegerDefault("compileSdkVersion")
+  defaultConfig {
+    minSdkVersion getExtOrIntegerDefault("minSdkVersion")
+    targetSdkVersion getExtOrIntegerDefault("targetSdkVersion")
+    buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString()
+    externalNativeBuild {
+      cmake {
+        abiFilters (*reactNativeArchitectures())
+      }
+    }
+  }
+  externalNativeBuild {
+    cmake {
+      path = file('src/main/CMakeLists.txt')
+    }
+  }
+  buildTypes {
+    release {
+      minifyEnabled false
+    }
+  }
+  lintOptions {
+    disable "GradleCompatible"
+  }
+  compileOptions {
+    sourceCompatibility JavaVersion.VERSION_1_8
+    targetCompatibility JavaVersion.VERSION_1_8
+  }
+  sourceSets {
+    main {
+      if (isNewArchitectureEnabled()) {
+        java.srcDirs += ['src/newarch']
+      } else {
+        java.srcDirs += ['src/oldarch']
+      }
+    }
+  }
+}
+repositories {
+  mavenCentral()
+  google()
+}
+dependencies {
+  // For < 0.71, this will be from the local maven repo
+  // For > 0.71, this will be replaced by `com.facebook.react:react-android:$version` by react gradle plugin
+  //noinspection GradleDynamicVersion
+  implementation "com.facebook.react:react-native:+"
+}
+if (isNewArchitectureEnabled()) {
+  react {
+    jsRootDir = file("../src/")
+    libraryName = "RNLlama"
+    codegenJavaPackageName = "com.rnllama"
+  }
+}

package/android/gradle.properties ADDED Viewed

@@ -0,0 +1,5 @@
+RNLlama_kotlinVersion=1.7.0
+RNLlama_minSdkVersion=23
+RNLlama_targetSdkVersion=31
+RNLlama_compileSdkVersion=31
+RNLlama_ndkversion=25.1.8937393

package/android/src/main/AndroidManifest.xml ADDED Viewed

@@ -0,0 +1,4 @@
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+          package="com.rnllama">
+</manifest>

package/android/src/main/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,69 @@
+cmake_minimum_required(VERSION 3.10)
+project(llama.rn)
+set(CMAKE_CXX_STANDARD 11)
+set(RNLLAMA_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
+include_directories(${RNLLAMA_LIB_DIR})
+set(
+    SOURCE_FILES
+    ${RNLLAMA_LIB_DIR}/ggml-aarch64.c
+    ${RNLLAMA_LIB_DIR}/ggml-alloc.c
+    ${RNLLAMA_LIB_DIR}/ggml-backend.c
+    ${RNLLAMA_LIB_DIR}/ggml.c
+    ${RNLLAMA_LIB_DIR}/ggml-quants.c
+    ${RNLLAMA_LIB_DIR}/common.cpp
+    ${RNLLAMA_LIB_DIR}/grammar-parser.cpp
+    ${RNLLAMA_LIB_DIR}/json.hpp
+    ${RNLLAMA_LIB_DIR}/json-schema-to-grammar.cpp
+    ${RNLLAMA_LIB_DIR}/sampling.cpp
+    ${RNLLAMA_LIB_DIR}/unicode-data.cpp
+    ${RNLLAMA_LIB_DIR}/unicode.cpp
+    ${RNLLAMA_LIB_DIR}/llama.cpp
+    ${RNLLAMA_LIB_DIR}/sgemm.cpp
+    ${RNLLAMA_LIB_DIR}/rn-llama.hpp
+    ${CMAKE_SOURCE_DIR}/jni.cpp
+)
+find_library(LOG_LIB log)
+function(build_library target_name)
+    add_library(
+        ${target_name}
+        SHARED
+        ${SOURCE_FILES}
+    )
+    target_link_libraries(${target_name} ${LOG_LIB} android)
+    target_compile_options(${target_name} PRIVATE -pthread)
+    if (${target_name} STREQUAL "rnllama_v8fp16_va")
+        target_compile_options(${target_name} PRIVATE -march=armv8.4-a+fp16+dotprod)
+    endif ()
+    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+        target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
+    endif ()
+    # NOTE: If you want to debug the native code, you can uncomment if and endif
+    # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+    target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
+    target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
+    target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
+    target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
+    target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
+    target_link_options(${target_name} PRIVATE -flto)
+    # endif ()
+endfunction()
+build_library("rnllama") # Default target
+if (${ANDROID_ABI} STREQUAL "arm64-v8a")
+    build_library("rnllama_v8fp16_va")
+endif ()