npm - @fugood/llama.node - Versions diffs - 0.2.3 → 0.3.1 - Mend

@fugood/llama.node 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (319) hide show

package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts ADDED Viewed

@@ -0,0 +1,68 @@
+plugins {
+    id("com.android.library")
+    id("org.jetbrains.kotlin.android")
+}
+android {
+    namespace = "android.llama.cpp"
+    compileSdk = 34
+    defaultConfig {
+        minSdk = 33
+        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+        consumerProguardFiles("consumer-rules.pro")
+        ndk {
+            // Add NDK properties if wanted, e.g.
+            // abiFilters += listOf("arm64-v8a")
+        }
+        externalNativeBuild {
+            cmake {
+                arguments += "-DCMAKE_BUILD_TYPE=Release"
+                cppFlags += listOf()
+                arguments += listOf()
+                cppFlags("")
+            }
+        }
+    }
+    buildTypes {
+        release {
+            isMinifyEnabled = false
+            proguardFiles(
+                getDefaultProguardFile("proguard-android-optimize.txt"),
+                "proguard-rules.pro"
+            )
+        }
+    }
+    externalNativeBuild {
+        cmake {
+            path("src/main/cpp/CMakeLists.txt")
+            version = "3.22.1"
+        }
+    }
+    compileOptions {
+        sourceCompatibility = JavaVersion.VERSION_1_8
+        targetCompatibility = JavaVersion.VERSION_1_8
+    }
+    kotlinOptions {
+        jvmTarget = "1.8"
+    }
+    packaging {
+        resources {
+            excludes += "/META-INF/{AL2.0,LGPL2.1}"
+        }
+    }
+}
+dependencies {
+    implementation("androidx.core:core-ktx:1.12.0")
+    implementation("androidx.appcompat:appcompat:1.6.1")
+    implementation("com.google.android.material:material:1.11.0")
+    testImplementation("junit:junit:4.13.2")
+    androidTestImplementation("androidx.test.ext:junit:1.1.5")
+    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
+}

package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt CHANGED Viewed

@@ -11,15 +11,15 @@ cmake_minimum_required(VERSION 3.22.1)
 # build script scope).
 project("llama-android")
-include(FetchContent)
-FetchContent_Declare(
-        llama
-        GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
-        GIT_TAG        master
-)
+#include(FetchContent)
+#FetchContent_Declare(
+#        llama
+#        GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
+#        GIT_TAG        master
+#)
 # Also provides "common"
-FetchContent_MakeAvailable(llama)
+#FetchContent_MakeAvailable(llama)
 # Creates and names a library, sets it as either STATIC
 # or SHARED, and provides the relative paths to its source code.
@@ -30,6 +30,10 @@ FetchContent_MakeAvailable(llama)
 # the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
 # is preferred for the same purpose.
 #
+#load local llama.cpp
+add_subdirectory(../../../../../../ build-llama)
 # In order to load a library into your app from Java/Kotlin, you must call
 # System.loadLibrary() and pass the name of the library defined here;
 # for GameActivity/NativeActivity derived applications, the same library name must be

package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp CHANGED Viewed

@@ -5,7 +5,7 @@
 #include <string>
 #include <unistd.h>
 #include "llama.h"
-#include "common/common.h"
+#include "common.h"
 // Write C++ code here.
 //
@@ -409,7 +409,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
     const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
     if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
-        return env->NewStringUTF("");
+        return nullptr;
     }
     auto new_token_chars = llama_token_to_piece(context, new_token_id);

package/src/llama.cpp/examples/llama.android/settings.gradle.kts ADDED Viewed

@@ -0,0 +1,18 @@
+pluginManagement {
+    repositories {
+        google()
+        mavenCentral()
+        gradlePluginPortal()
+    }
+}
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+    }
+}
+rootProject.name = "LlamaAndroid"
+include(":app")
+include(":llama")

package/src/llama.cpp/examples/llava/CMakeLists.txt CHANGED Viewed

@@ -30,8 +30,9 @@ if(TARGET BUILD_INFO)
     add_dependencies(llava BUILD_INFO)
 endif()
-set(TARGET llava-cli)
-add_executable(llava-cli llava-cli.cpp)
-install(TARGETS llava-cli RUNTIME)
-target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(llava PRIVATE cxx_std_11)
+set(TARGET llama-llava-cli)
+add_executable(${TARGET} llava-cli.cpp)
+set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)

package/src/llama.cpp/examples/llava/android/build_64.sh ADDED Viewed

@@ -0,0 +1,8 @@
+#!/bin/bash
+cmake ../../../../ \
+-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
+-DCMAKE_BUILD_TYPE=Release \
+-DANDROID_ABI="arm64-v8a" \
+-DANDROID_PLATFORM=android-23 $1
+make -j4

package/src/llama.cpp/examples/llava/clip.cpp CHANGED Viewed

@@ -16,6 +16,10 @@
 #include "ggml-metal.h"
 #endif
+#ifdef GGML_USE_CANN
+#include "ggml-cann.h"
+#endif
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
@@ -865,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
             embeddings = peg_0;
         }
         else {
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
         }
     }
@@ -1001,6 +1005,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     LOG_TEE("%s: CLIP using Metal backend\n", __func__);
 #endif
+#ifdef GGML_USE_CANN
+    new_clip->backend = ggml_backend_cann_init(0);
+    LOG_TEE("%s: CLIP using CANN backend\n", __func__);
+#endif
     if (!new_clip->backend) {
         new_clip->backend = ggml_backend_cpu_init();
@@ -1121,20 +1130,20 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
             }
             if (n < 32)
                 hparams.image_grid_pinpoints[n] = 0;
-        } catch (std::runtime_error & e) {
+        } catch (std::runtime_error & /*e*/) {
             hparams.image_grid_pinpoints[0]=0;
         }
         try {
             int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE);
             strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx));
-        } catch (std::runtime_error & e) {
+        } catch (std::runtime_error & /*e*/) {
             strcpy(hparams.mm_patch_merge_type, "flat");
         }
         try {
             hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6
-        } catch(const std::exception& e) {
+        } catch(const std::exception& /*e*/) {
             hparams.image_crop_resolution = hparams.image_size;
         }
@@ -1173,7 +1182,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
         try {
             vision_model.class_embedding  = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
             new_clip->has_class_embedding = true;
-        } catch (const std::exception& e) {
+        } catch (const std::exception& /*e*/) {
             new_clip->has_class_embedding = false;
         }
@@ -1181,7 +1190,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
             vision_model.pre_ln_w  = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
             vision_model.pre_ln_b  = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
             new_clip->has_pre_norm = true;
-        } catch (std::exception & e) {
+        } catch (std::exception & /*e*/) {
             new_clip->has_pre_norm = false;
         }
@@ -1189,21 +1198,21 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
             vision_model.post_ln_w  = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
             vision_model.post_ln_b  = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
             new_clip->has_post_norm = true;
-        } catch (std::exception & e) {
+        } catch (std::exception & /*e*/) {
             new_clip->has_post_norm = false;
         }
         try {
             vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
             new_clip->has_patch_bias = true;
-        } catch (std::exception & e) {
+        } catch (std::exception & /*e*/) {
             new_clip->has_patch_bias = false;
         }
         try {
             vision_model.patch_embeddings    = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
             vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
-        } catch(const std::exception& e) {
+        } catch(const std::exception& /*e*/) {
             LOG_TEE("%s: failed to load vision model tensors\n", __func__);
         }
@@ -1215,26 +1224,26 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
                 // Yi-type llava
                 vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight"));
                 vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias"));
-            } catch (std::runtime_error & e) {  }
+            } catch (std::runtime_error & /*e*/) { }
             try {
                 // missing in Yi-type llava
                 vision_model.mm_2_w              = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
                 vision_model.mm_2_b              = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
-            } catch (std::runtime_error & e) {  }
+            } catch (std::runtime_error & /*e*/) { }
             try {
                 // Yi-type llava
                 vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight"));
                 vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias"));
-            } catch (std::runtime_error & e) {  }
+            } catch (std::runtime_error & /*e*/) { }
             try {
                 // Yi-type llava
                 vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight"));
                 vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias"));
-            } catch (std::runtime_error & e) {  }
+            } catch (std::runtime_error & /*e*/) { }
             try {
                 vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE);
                 // LOG_TEE("%s: image_newline tensor (llava-1.6) found\n", __func__);
-            } catch (std::runtime_error & e) {  }
+            } catch (std::runtime_error & /*e*/) { }
         } else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
             // MobileVLM projection
             vision_model.mm_model_mlp_1_w               = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));

package/src/llama.cpp/examples/llava/llava-cli.cpp CHANGED Viewed

@@ -112,9 +112,12 @@ struct llava_context {
     struct llama_model * model = NULL;
 };
-static void show_additional_info(int /*argc*/, char ** argv) {
-    LOG_TEE("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
-    LOG_TEE("  note: a lower temperature value like 0.1 is recommended for better quality.\n");
+static void print_usage(int argc, char ** argv, const gpt_params & params) {
+    gpt_params_print_usage(argc, argv, params);
+    LOG_TEE("\n example usage:\n");
+    LOG_TEE("\n     %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
+    LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
 }
 static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_params * params, const std::string & fname) {
@@ -278,7 +281,7 @@ int main(int argc, char ** argv) {
     gpt_params params;
     if (!gpt_params_parse(argc, argv, params)) {
-        show_additional_info(argc, argv);
+        print_usage(argc, argv, params);
         return 1;
     }
@@ -290,8 +293,7 @@ int main(int argc, char ** argv) {
 #endif // LOG_DISABLE_LOGS
     if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
-        gpt_params_print_usage(argc, argv, params);
-        show_additional_info(argc, argv);
+        print_usage(argc, argv, {});
         return 1;
     }
     auto model = llava_init(&params);

package/src/llama.cpp/examples/llava/requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
--r ../../requirements/requirements-convert.txt
+-r ../../requirements/requirements-convert_legacy_llama.txt
+--extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
-torch~=2.1.1
+torch~=2.2.1

package/src/llama.cpp/examples/lookahead/CMakeLists.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-set(TARGET lookahead)
+set(TARGET llama-lookahead)
 add_executable(${TARGET} lookahead.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})

package/src/llama.cpp/examples/lookahead/lookahead.cpp CHANGED Viewed

@@ -37,7 +37,8 @@ struct ngram_container {
 int main(int argc, char ** argv) {
     gpt_params params;
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
+        gpt_params_print_usage(argc, argv, params);
         return 1;
     }

package/src/llama.cpp/examples/lookup/CMakeLists.txt CHANGED Viewed

@@ -1,22 +1,22 @@
-set(TARGET lookup)
+set(TARGET llama-lookup)
 add_executable(${TARGET} lookup.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
-set(TARGET lookup-create)
+set(TARGET llama-lookup-create)
 add_executable(${TARGET} lookup-create.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
-set(TARGET lookup-merge)
+set(TARGET llama-lookup-merge)
 add_executable(${TARGET} lookup-merge.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
-set(TARGET lookup-stats)
+set(TARGET llama-lookup-stats)
 add_executable(${TARGET} lookup-stats.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})

package/src/llama.cpp/examples/lookup/lookup-create.cpp CHANGED Viewed

@@ -14,8 +14,10 @@ int main(int argc, char ** argv){
     gpt_params params;
     if (!gpt_params_parse(argc, argv, params)) {
+        gpt_params_print_usage(argc, argv, params);
         return 1;
     }
     // init llama.cpp
     llama_backend_init();
     llama_numa_init(params.numa);

package/src/llama.cpp/examples/lookup/lookup-merge.cpp CHANGED Viewed

@@ -11,14 +11,14 @@
 #include <unordered_map>
 #include <vector>
-static void print_usage() {
+static void print_usage(char* argv0) {
     fprintf(stderr, "Merges multiple lookup cache files into a single one.\n");
-    fprintf(stderr, "Usage: lookup-merge [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n");
+    fprintf(stderr, "Usage: %s [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n", argv0);
 }
 int main(int argc, char ** argv){
     if (argc < 3) {
-        print_usage();
+        print_usage(argv[0]);
         exit(1);
     }
@@ -27,7 +27,7 @@ int main(int argc, char ** argv){
     for (int i = 0; i < argc-1; ++i) {
         args[i] = argv[i+1];
         if (args[i] == "-h" || args[i] == "--help") {
-            print_usage();
+            print_usage(argv[0]);
             exit(0);
         }
     }

package/src/llama.cpp/examples/lookup/lookup-stats.cpp CHANGED Viewed

@@ -16,6 +16,7 @@ int main(int argc, char ** argv){
     gpt_params params;
     if (!gpt_params_parse(argc, argv, params)) {
+        gpt_params_print_usage(argc, argv, params);
         return 1;
     }
@@ -30,7 +31,6 @@ int main(int argc, char ** argv){
     // load the model
     std::tie(model, ctx) = llama_init_from_gpt_params(params);
-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
     // tokenize the prompt
     std::vector<llama_token> inp;
@@ -64,7 +64,7 @@ int main(int argc, char ** argv){
     }
     const int n_input = inp.size();
-    const int n_ctx = params.n_ctx;
+    const int n_ctx = llama_n_ctx(ctx);
     int n_drafted = 0;
     int n_accept  = 0;

package/src/llama.cpp/examples/lookup/lookup.cpp CHANGED Viewed

@@ -15,6 +15,7 @@ int main(int argc, char ** argv){
     gpt_params params;
     if (!gpt_params_parse(argc, argv, params)) {
+        gpt_params_print_usage(argc, argv, params);
         return 1;
     }
@@ -38,7 +39,6 @@ int main(int argc, char ** argv){
     // load the model
     std::tie(model, ctx) = llama_init_from_gpt_params(params);
-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
     // tokenize the prompt
     std::vector<llama_token> inp;

package/src/llama.cpp/examples/main/CMakeLists.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-set(TARGET main)
+set(TARGET llama-cli)
 add_executable(${TARGET} main.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})