npm - node-llama-cpp - Versions diffs - 3.0.0-beta.14 → 3.0.0-beta.16 - Mend

node-llama-cpp 3.0.0-beta.14 → 3.0.0-beta.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (383) hide show

package/llama/addon.cpp CHANGED Viewed

@@ -3,13 +3,14 @@
 #include <algorithm>
 #include <sstream>
 #include <vector>
+#include <unordered_map>
 #include "common.h"
 #include "common/grammar-parser.h"
 #include "llama.h"
 #include "napi.h"
-#ifdef GPU_INFO_USE_CUBLAS
+#ifdef GPU_INFO_USE_CUDA
 #  include "gpuInfo/cuda-gpu-info.h"
 #endif
 #ifdef GPU_INFO_USE_VULKAN
@@ -121,7 +122,7 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
     return std::string(result.data(), result.size());
 }
-#ifdef GPU_INFO_USE_CUBLAS
+#ifdef GPU_INFO_USE_CUDA
 void logCudaError(const char* message) {
     addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
 }
@@ -136,7 +137,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     uint64_t total = 0;
     uint64_t used = 0;
-#ifdef GPU_INFO_USE_CUBLAS
+#ifdef GPU_INFO_USE_CUDA
     size_t cudaDeviceTotal = 0;
     size_t cudaDeviceUsed = 0;
     bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
@@ -161,7 +162,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 #ifdef GPU_INFO_USE_METAL
     uint64_t metalDeviceTotal = 0;
     uint64_t metalDeviceUsed = 0;
-    get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
+    getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
     total += metalDeviceTotal;
     used += metalDeviceUsed;
@@ -174,8 +175,34 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     return result;
 }
+Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
+    std::vector<std::string> deviceNames;
+#ifdef GPU_INFO_USE_CUDA
+    gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
+#endif
+#ifdef GPU_INFO_USE_VULKAN
+    gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
+#endif
+#ifdef GPU_INFO_USE_METAL
+    getMetalGpuDeviceNames(&deviceNames);
+#endif
+    Napi::Object result = Napi::Object::New(info.Env());
+    Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
+    for (size_t i = 0; i < deviceNames.size(); ++i) {
+        deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
+    }
+    result.Set("deviceNames", deviceNamesNapiArray);
+    return result;
+}
 Napi::Value getGpuType(const Napi::CallbackInfo& info) {
-#ifdef GPU_INFO_USE_CUBLAS
+#ifdef GPU_INFO_USE_CUDA
     return Napi::String::New(info.Env(), "cuda");
 #endif
@@ -507,6 +534,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
             return Napi::Number::From(info.Env(), int32_t(tokenType));
         }
+        Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
+            if (disposed) {
+                Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
+                return info.Env().Undefined();
+            }
+            auto vocabularyType = llama_vocab_type(model);
+            return Napi::Number::From(info.Env(), int32_t(vocabularyType));
+        }
         Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
             const int addBos = llama_add_bos_token(model);
@@ -515,6 +552,10 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
             return Napi::Boolean::New(info.Env(), shouldPrependBos);
         }
+        Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
+            return Napi::Number::From(info.Env(), llama_model_size(model));
+        }
         static void init(Napi::Object exports) {
             exports.Set(
                 "AddonModel",
@@ -540,7 +581,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
                         InstanceMethod("eotToken", &AddonModel::EotToken),
                         InstanceMethod("getTokenString", &AddonModel::GetTokenString),
                         InstanceMethod("getTokenType", &AddonModel::GetTokenType),
+                        InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
                         InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
+                        InstanceMethod("getModelSize", &AddonModel::GetModelSize),
                         InstanceMethod("dispose", &AddonModel::Dispose),
                     }
                 )
@@ -822,6 +865,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
                     context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
                 }
+                if (options.Has("sequences")) {
+                    context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
+                }
                 if (options.Has("embeddings")) {
                     context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
                 }
@@ -1039,6 +1086,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
             return result;
         }
+        Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
+            if (disposed) {
+                Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
+                return info.Env().Undefined();
+            }
+            return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
+        }
         Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
             llama_print_timings(ctx);
             llama_reset_timings(ctx);
@@ -1063,6 +1119,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
                         InstanceMethod("sampleToken", &AddonContext::SampleToken),
                         InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
                         InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
+                        InstanceMethod("getStateSize", &AddonContext::GetStateSize),
                         InstanceMethod("printTimings", &AddonContext::PrintTimings),
                         InstanceMethod("dispose", &AddonContext::Dispose),
                     }
@@ -1163,7 +1220,7 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
         }
         void OnOK() {
             if (context->contextLoaded) {
-                uint64_t contextMemorySize = llama_get_state_size(context->ctx);
+                uint64_t contextMemorySize = llama_state_get_size(context->ctx);
                 adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
                 context->loadedContextMemorySize = contextMemorySize;
             }
@@ -1278,6 +1335,8 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
         float repeat_penalty_presence_penalty = 0.00f;  // 0.0 = disabled
         float repeat_penalty_frequency_penalty = 0.00f;  // 0.0 = disabled
         std::vector<llama_token> repeat_penalty_tokens;
+        std::unordered_map<llama_token, float> tokenBiases;
+        bool useTokenBiases = false;
         bool use_repeat_penalty = false;
         AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
@@ -1322,6 +1381,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
                     use_repeat_penalty = true;
                 }
+                if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
+                    Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
+                    Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
+                    if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
+                        for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
+                            tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
+                        }
+                        useTokenBiases = true;
+                    }
+                }
                 if (options.Has("repeatPenaltyPresencePenalty")) {
                     repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
                 }
@@ -1370,18 +1442,33 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
             // Select the best prediction.
             auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
             auto n_vocab = llama_n_vocab(ctx->model->model);
+            auto eos_token = llama_token_eos(ctx->model->model);
             std::vector<llama_token_data> candidates;
             candidates.reserve(n_vocab);
             for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-                candidates.emplace_back(llama_token_data { token_id, logits[token_id], 0.0f });
+                auto logit = logits[token_id];
+                if (useTokenBiases) {
+                    bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
+                    if (hasTokenBias) {
+                        auto logitBias = tokenBiases.at(token_id);
+                        if (logitBias == -INFINITY || logitBias < -INFINITY) {
+                            if (token_id != eos_token) {
+                                logit = -INFINITY;
+                            }
+                        } else {
+                            logit += logitBias;
+                        }
+                    }
+                }
+                candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
             }
             llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
-            auto eos_token = llama_token_eos(ctx->model->model);
             if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
                 llama_sample_repetition_penalties(
                     ctx->ctx,
@@ -1396,6 +1483,13 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
             if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
                 llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
+                if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
+                    // logit biases caused grammar sampling to fail, so sampling again without logit biases
+                    useTokenBiases = false;
+                    SampleToken();
+                    return;
+                }
             }
             if (temperature <= 0) {
@@ -1444,6 +1538,55 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
     return Napi::String::From(info.Env(), llama_print_system_info());
 }
+Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
+}
+Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_mmap());
+}
+Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
+    return Napi::Boolean::New(info.Env(), llama_supports_mlock());
+}
+Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
+    const int ggmlType = info[0].As<Napi::Number>().Int32Value();
+    if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
+        return info.Env().Undefined();
+    }
+    const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
+    return Napi::Number::New(info.Env(), blockSize);
+}
+Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
+    const int ggmlType = info[0].As<Napi::Number>().Int32Value();
+    if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
+        return info.Env().Undefined();
+    }
+    const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
+    return Napi::Number::New(info.Env(), typeSize);
+}
+Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
+    Napi::Object consts = Napi::Object::New(info.Env());
+    consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
+    consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
+    consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
+    consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
+    consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
+    consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
+    consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
+    return consts;
+}
 int addonGetGgmlLogLevelNumber(ggml_log_level level) {
     switch (level) {
         case GGML_LOG_LEVEL_ERROR: return 2;
@@ -1693,9 +1836,16 @@ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
 Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
     exports.DefineProperties({
         Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
+        Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
+        Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
+        Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
+        Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
+        Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
+        Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
         Napi::PropertyDescriptor::Function("setLogger", setLogger),
         Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
         Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
+        Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
         Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
         Napi::PropertyDescriptor::Function("init", addonInit),
         Napi::PropertyDescriptor::Function("dispose", addonDispose),

package/llama/binariesGithubRelease.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-    "release": "b2440"
+    "release": "b2665"
 }

package/llama/gitRelease.bundle CHANGED Viewed

Binary file

package/llama/gpuInfo/cuda-gpu-info.cu CHANGED Viewed

@@ -1,4 +1,6 @@
 #include <stddef.h>
+#include <vector>
+#include <string>
 #if defined(GPU_INFO_USE_HIPBLAS)
 #include <hip/hip_runtime.h>
@@ -97,3 +99,22 @@ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaEr
     return true;
 }
+void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
+    int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
+    if (deviceCount < 0) {
+        return;
+    }
+    for (int i = 0; i < deviceCount; i++) {
+        cudaDeviceProp prop;
+        auto getDevicePropertiesResult = cudaGetDeviceProperties(&prop, i);
+        if (getDevicePropertiesResult != cudaSuccess) {
+            errorLogCallback(cudaGetErrorString(getDevicePropertiesResult));
+        } else {
+            (*deviceNames).push_back(std::string(prop.name));
+        }
+    }
+}

package/llama/gpuInfo/cuda-gpu-info.h CHANGED Viewed

@@ -1,7 +1,10 @@
 #pragma once
 #include <stddef.h>
+#include <vector>
+#include <string>
 typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
 bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
+void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback);

package/llama/gpuInfo/metal-gpu-info.h CHANGED Viewed

@@ -1,5 +1,8 @@
 #pragma once
 #include <stdint.h>
+#include <string>
+#include <vector>
-void get_metal_gpu_info(uint64_t * total, uint64_t * used);
+void getMetalGpuInfo(uint64_t * total, uint64_t * used);
+void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames);

package/llama/gpuInfo/metal-gpu-info.mm CHANGED Viewed

@@ -1,7 +1,9 @@
 #include <stdint.h>
+#include <vector>
+#include <string>
 #import <Metal/Metal.h>
-void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
+void getMetalGpuInfo(uint64_t * total, uint64_t * used) {
     id<MTLDevice> device = MTLCreateSystemDefaultDevice();
     if (device) {
@@ -15,3 +17,14 @@ void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
     [device release];
     device = nil;
 }
+void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames) {
+    NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
+    for (id<MTLDevice> device in devices) {
+        (*deviceNames).push_back(std::string(([NSString stringWithUTF8String:device.name.UTF8String]).UTF8String));
+    }
+    [devices release];
+    devices = nil;
+}

package/llama/gpuInfo/vulkan-gpu-info.cpp CHANGED Viewed

@@ -1,10 +1,11 @@
 #include <stddef.h>
+#include <vector>
 #include <vulkan/vulkan.hpp>
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
-bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
+static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
     vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
     vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
     vk::Instance instance = vk::createInstance(createInfo);
@@ -41,8 +42,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
             for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
                 if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-                    totalMem += memProps.memoryHeaps[i].size;
+                    const auto size = memProps.memoryHeaps[i].size;
+                    totalMem += size;
                     usedMem += memoryBudgetProperties.heapUsage[i];
+                    if (size > 0 && addDeviceNames) {
+                        (*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
+                    }
                     break;
                 }
             }
@@ -63,3 +70,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
     *used = usedMem;
     return true;
 }
+bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
+    return enumerateVulkanDevices(total, used, false, nullptr, warningLogCallback);
+}
+bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
+    size_t vulkanDeviceTotal = 0;
+    size_t vulkanDeviceUsed = 0;
+    return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, true, deviceNames, warningLogCallback);
+}

package/llama/gpuInfo/vulkan-gpu-info.h CHANGED Viewed

@@ -1,7 +1,9 @@
 #pragma once
 #include <stddef.h>
+#include <vector>
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
+bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);

package/llama/grammars/README.md CHANGED Viewed

@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
 ```
 ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
 ```
+## Troubleshooting
+Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
+### Efficient optional repetitions
+A common pattern is to allow repetitions of a pattern `x` up to N times.
+While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)

package/llama/llama.cpp.info.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-    "tag": "b2440",
+    "tag": "b2665",
     "llamaCppGithubRepo": "ggerganov/llama.cpp"
 }

package/llama/toolchains/win32.host-x64.target-arm64.cmake ADDED Viewed

@@ -0,0 +1,41 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR ARM64)
+# Look for cl.exe in the Visual Studio installation directories
+set(PROGRAMFILES "$ENV{ProgramFiles}")
+set(PROGRAMFILES_X86 "$ENV{ProgramFiles\(x86\)}")
+set(VS_INSTALL_PATHS
+    "${PROGRAMFILES_X86}/Microsoft Visual Studio"
+    "${PROGRAMFILES}/Microsoft Visual Studio"
+    "C:/Program Files (x86)/Microsoft Visual Studio"
+    "C:/Program Files/Microsoft Visual Studio"
+)
+foreach(PATH IN LISTS VS_INSTALL_PATHS)
+    if(CL_EXE_PATH)
+        break()
+    endif()
+    file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
+    if(FOUND_CL_EXE)
+        list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
+        break()
+    endif()
+    if(CL_EXE_PATH)
+        break()
+    endif()
+    file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/**/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
+    if(FOUND_CL_EXE)
+        list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
+        break()
+    endif()
+endforeach()
+if(NOT CL_EXE_PATH)
+    message(FATAL_ERROR "cl.exe not found for ARM architecture.")
+else()
+    set(CMAKE_C_COMPILER "${CL_EXE_PATH}")
+    set(CMAKE_CXX_COMPILER "${CL_EXE_PATH}")
+endif()

package/llamaBins/linux-arm64/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/linux-arm64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-armv7l/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/linux-armv7l/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-x64/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/linux-x64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/linux-x64-cuda/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/linux-x64-vulkan/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/mac-arm64-metal/default.metallib CHANGED Viewed

Binary file

package/llamaBins/mac-arm64-metal/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/mac-x64/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/mac-x64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/win-arm64/_nlcBuildMetadata.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/win-arm64/llama-addon.exp ADDED Viewed

Binary file

package/llamaBins/win-arm64/llama-addon.lib ADDED Viewed

Binary file

package/llamaBins/win-arm64/llama-addon.node ADDED Viewed

Binary file

package/llamaBins/win-x64/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/win-x64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/win-x64-cuda/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"~~b2440~~"}}}
1	+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}

package/llamaBins/win-x64-vulkan/llama-addon.node CHANGED Viewed

Binary file