npm - @fugood/llama.node - Versions diffs - 1.4.13 → 1.4.15 - Mend

@fugood/llama.node 1.4.13 → 1.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/lib/binding.ts +23 -2
package/lib/index.js +2 -1
package/lib/index.ts +8 -1
package/lib/parallel.ts +2 -2
package/package.json +15 -15
package/scripts/llama.cpp.patch +9 -12
package/src/LlamaContext.cpp +16 -4
package/src/llama.cpp/CMakeLists.txt +24 -8
package/src/llama.cpp/common/CMakeLists.txt +3 -34
package/src/llama.cpp/common/arg.cpp +183 -60
package/src/llama.cpp/common/arg.h +0 -8
package/src/llama.cpp/common/chat-parser.cpp +115 -0
package/src/llama.cpp/common/chat.cpp +67 -0
package/src/llama.cpp/common/chat.h +1 -0
package/src/llama.cpp/common/common.cpp +2 -1
package/src/llama.cpp/common/common.h +12 -7
package/src/llama.cpp/common/debug.cpp +165 -0
package/src/llama.cpp/common/debug.h +43 -0
package/src/llama.cpp/common/download.cpp +88 -369
package/src/llama.cpp/common/download.h +32 -5
package/src/llama.cpp/common/preset.cpp +87 -2
package/src/llama.cpp/common/preset.h +10 -1
package/src/llama.cpp/ggml/include/ggml.h +5 -0
package/src/llama.cpp/include/llama.h +5 -2
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-arch.cpp +35 -0
package/src/llama.cpp/src/llama-arch.h +1 -0
package/src/llama.cpp/src/llama-chat.cpp +20 -0
package/src/llama.cpp/src/llama-chat.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +31 -43
package/src/llama.cpp/src/llama-mmap.cpp +78 -42
package/src/llama.cpp/src/llama-mmap.h +5 -4
package/src/llama.cpp/src/llama-model-loader.cpp +17 -5
package/src/llama.cpp/src/llama-model-loader.h +2 -0
package/src/llama.cpp/src/llama-model.cpp +225 -101
package/src/llama.cpp/src/llama-quant.cpp +1 -1
package/src/llama.cpp/src/llama-sampling.cpp +1 -1
package/src/llama.cpp/src/llama-vocab.cpp +37 -24
package/src/llama.cpp/src/llama-vocab.h +1 -0
package/src/llama.cpp/src/llama.cpp +63 -27
package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
package/src/llama.cpp/src/models/models.h +13 -2
package/src/llama.cpp/src/models/qwen3next.cpp +198 -182

package/lib/binding.ts CHANGED Viewed

@@ -112,7 +112,7 @@ export type CompletionResponseFormat = {
 export type LlamaCompletionOptions = {
   messages?: ChatMessage[]
   jinja?: boolean
-  reasoning_format?: string
+  reasoning_format?: 'none' | 'auto' | 'deepseek'
   chat_template?: string
   response_format?: CompletionResponseFormat
   tools?: Tool[]
@@ -200,6 +200,13 @@ export type LlamaParallelCompletionOptions = LlamaCompletionOptions & {
    */
   save_state_path?: string
+  /**
+   * File path to save prompt-only state to after prompt processing.
+   * Useful for fast prompt reuse (especially for recurrent/hybrid models).
+   * Example: `'/path/to/prompt_state.bin'` or `'file:///path/to/prompt_state.bin'`
+   */
+  save_prompt_state_path?: string
   /**
    * Number of tokens to load when loading state.
    * If not specified or <= 0, all tokens from the state file will be loaded.
@@ -363,6 +370,8 @@ export type ModelInfo = {
   nEmbd: number
   nParams: number
   size: number
+  is_recurrent: boolean
+  is_hybrid: boolean
   chatTemplates: {
     llamaChat: boolean
     minja: {
@@ -475,6 +484,7 @@ export interface LlamaContext {
       parallel_tool_calls?: boolean
       tool_choice?: string
       enable_thinking?: boolean
+      reasoning_format?: 'none' | 'auto' | 'deepseek'
       add_generation_prompt?: boolean
       now?: string | number
       chat_template_kwargs?: Record<string, string>
@@ -505,9 +515,20 @@ export interface LlamaContext {
   /**
    * Initialize multimodal support with a mmproj file
    * @param options Object containing path and optional use_gpu flag
+   * @param options.path Path to the multimodal projector model file (mmproj)
+   * @param options.use_gpu Whether to use GPU for multimodal processing (default: true)
+   * @param options.image_min_tokens Minimum number of tokens for image input (for dynamic resolution models)
+   * @param options.image_max_tokens Maximum number of tokens for image input (for dynamic resolution models).
+   *                                  Lower values reduce memory usage and improve speed for high-resolution images.
+   *                                  Recommended: 256-512 for faster inference, up to 4096 for maximum detail.
    * @returns boolean indicating if initialization was successful
    */
-  initMultimodal(options: { path: string; use_gpu?: boolean }): boolean
+  initMultimodal(options: {
+    path: string
+    use_gpu?: boolean
+    image_min_tokens?: number
+    image_max_tokens?: number
+  }): boolean
   /**
    * Check if multimodal support is enabled

package/lib/index.js CHANGED Viewed

@@ -87,7 +87,7 @@ class LlamaContextWrapper {
         return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
     }
     getFormattedChat(messages, template, params) {
-        var _a, _b;
+        var _a, _b, _c;
         const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
         const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
         let tmpl;
@@ -100,6 +100,7 @@ class LlamaContextWrapper {
             parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
             tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
             enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
+            reasoning_format: (_c = params === null || params === void 0 ? void 0 : params.reasoning_format) !== null && _c !== void 0 ? _c : 'none',
             add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
             now: params === null || params === void 0 ? void 0 : params.now,
             chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)

package/lib/index.ts CHANGED Viewed

@@ -118,6 +118,7 @@ class LlamaContextWrapper {
       parallel_tool_calls?: boolean
       tool_choice?: string
       enable_thinking?: boolean
+      reasoning_format?: 'none' | 'auto' | 'deepseek'
       add_generation_prompt?: boolean
       now?: string | number
       chat_template_kwargs?: Record<string, string>
@@ -136,6 +137,7 @@ class LlamaContextWrapper {
       parallel_tool_calls: params?.parallel_tool_calls,
       tool_choice: params?.tool_choice,
       enable_thinking: params?.enable_thinking ?? true,
+      reasoning_format: params?.reasoning_format ?? 'none',
       add_generation_prompt: params?.add_generation_prompt,
       now: params?.now,
       chat_template_kwargs: params?.chat_template_kwargs
@@ -252,7 +254,12 @@ class LlamaContextWrapper {
     return this.ctx.getLoadedLoraAdapters()
   }
-  initMultimodal(options: { path: string; use_gpu?: boolean }): boolean {
+  initMultimodal(options: {
+    path: string
+    use_gpu?: boolean
+    image_min_tokens?: number
+    image_max_tokens?: number
+  }): boolean {
     return this.ctx.initMultimodal(options)
   }

package/lib/parallel.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 // Parallel decoding API implementation for llama.node
 import type {
   LlamaContext,
-  LlamaCompletionOptions,
   LlamaCompletionToken,
   RerankParams,
   ParallelStatus,
+  LlamaParallelCompletionOptions,
 } from './binding'
 import { formatMediaChat } from './utils'
@@ -68,7 +68,7 @@ export class LlamaParallelAPI {
    * @returns Object with requestId, promise for result, and stop function
    */
   async completion(
-    options: LlamaCompletionOptions,
+    options: LlamaParallelCompletionOptions,
     onToken?: (requestId: number, data: LlamaCompletionToken) => void,
   ): Promise<{
     requestId: number

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.4.13",
+  "version": "1.4.15",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,20 +72,20 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-darwin-arm64": "1.4.13",
-    "@fugood/node-llama-darwin-x64": "1.4.13",
-    "@fugood/node-llama-linux-arm64": "1.4.13",
-    "@fugood/node-llama-linux-arm64-cuda": "1.4.13",
-    "@fugood/node-llama-linux-arm64-snapdragon": "1.4.13",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.4.13",
-    "@fugood/node-llama-linux-x64": "1.4.13",
-    "@fugood/node-llama-linux-x64-cuda": "1.4.13",
-    "@fugood/node-llama-linux-x64-vulkan": "1.4.13",
-    "@fugood/node-llama-win32-arm64": "1.4.13",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.4.13",
-    "@fugood/node-llama-win32-x64": "1.4.13",
-    "@fugood/node-llama-win32-x64-cuda": "1.4.13",
-    "@fugood/node-llama-win32-x64-vulkan": "1.4.13"
+    "@fugood/node-llama-darwin-arm64": "1.4.15",
+    "@fugood/node-llama-darwin-x64": "1.4.15",
+    "@fugood/node-llama-linux-arm64": "1.4.15",
+    "@fugood/node-llama-linux-arm64-cuda": "1.4.15",
+    "@fugood/node-llama-linux-arm64-snapdragon": "1.4.15",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.4.15",
+    "@fugood/node-llama-linux-x64": "1.4.15",
+    "@fugood/node-llama-linux-x64-cuda": "1.4.15",
+    "@fugood/node-llama-linux-x64-vulkan": "1.4.15",
+    "@fugood/node-llama-win32-arm64": "1.4.15",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.4.15",
+    "@fugood/node-llama-win32-x64": "1.4.15",
+    "@fugood/node-llama-win32-x64-cuda": "1.4.15",
+    "@fugood/node-llama-win32-x64-vulkan": "1.4.15"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/scripts/llama.cpp.patch CHANGED Viewed

@@ -1,8 +1,8 @@
 diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
-index f7b99159e..fa37fed19 100644
+index 723973ed7..e4b2c6537 100644
 --- a/src/llama.cpp/common/CMakeLists.txt
 +++ b/src/llama.cpp/common/CMakeLists.txt
-@@ -154,8 +154,14 @@ if (LLAMA_LLGUIDANCE)
+@@ -146,4 +146,11 @@ if (LLAMA_LLGUIDANCE)
      set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
  endif ()
@@ -13,11 +13,8 @@ index f7b99159e..fa37fed19 100644
 +else()
 +    set(LLAMA_COMMON_WIN_LIBS "")
 +endif()
++
 +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
- #
- # copy the license files
 diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
 index 1bcba9cd8..b7cd68734 100644
 --- a/src/llama.cpp/common/chat-peg-parser.cpp
@@ -32,7 +29,7 @@ index 1bcba9cd8..b7cd68734 100644
  static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
      int count = 0;
 diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
-index 22e527bab..c3d0affca 100644
+index d531388bc..e6712b368 100644
 --- a/src/llama.cpp/common/chat.cpp
 +++ b/src/llama.cpp/common/chat.cpp
@@ -7,9 +7,6 @@
@@ -62,7 +59,7 @@ index 22e527bab..c3d0affca 100644
  struct templates_params {
      json messages;
      json tools;
-@@ -752,7 +739,7 @@ static std::string apply(
+@@ -753,7 +740,7 @@ static std::string apply(
          tmpl_inputs.extra_context.merge_patch(*additional_context);
      }
      // TODO: add flag to control date/time, if only for testing purposes.
@@ -72,7 +69,7 @@ index 22e527bab..c3d0affca 100644
      minja::chat_template_options tmpl_opts;
      // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
 diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
-index 8bd4a325f..333b3301f 100644
+index 454085e90..e01390cf9 100644
 --- a/src/llama.cpp/common/chat.h
 +++ b/src/llama.cpp/common/chat.h
@@ -10,7 +10,18 @@
@@ -96,7 +93,7 @@ index 8bd4a325f..333b3301f 100644
  struct common_chat_tool_call {
      std::string name;
 diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
-index 41b2b6833..fe9ba05aa 100644
+index 744f0b4ee..04fcebb9e 100644
 --- a/src/llama.cpp/common/common.cpp
 +++ b/src/llama.cpp/common/common.cpp
@@ -1361,6 +1361,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
@@ -108,10 +105,10 @@ index 41b2b6833..fe9ba05aa 100644
      mparams.main_gpu        = params.main_gpu;
      mparams.split_mode      = params.split_mode;
 diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
-index d6fd0d37a..477209ce5 100644
+index e60087dea..c21797cd8 100644
 --- a/src/llama.cpp/common/common.h
 +++ b/src/llama.cpp/common/common.h
-@@ -310,6 +310,7 @@ struct lr_opt {
+@@ -311,6 +311,7 @@ struct lr_opt {
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
  struct common_params {

package/src/LlamaContext.cpp CHANGED Viewed

@@ -595,6 +595,8 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
   details.Set("nEmbd", llama_model_n_embd(model));
   details.Set("nParams", llama_model_n_params(model));
   details.Set("size", llama_model_size(model));
+  details.Set("is_recurrent", llama_model_is_recurrent(model));
+  details.Set("is_hybrid", llama_model_is_hybrid(model));
   Napi::Object chatTemplates = Napi::Object::New(info.Env());
   chatTemplates.Set("llamaChat", _rn_ctx->validateModelChatTemplate(false, nullptr));
@@ -703,6 +705,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
         get_option<bool>(params, "parallel_tool_calls", false);
     auto tool_choice = get_option<std::string>(params, "tool_choice", "");
     auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
+    auto reasoning_format = get_option<std::string>(params, "reasoning_format", "none");
     auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
     auto now_str = get_option<std::string>(params, "now", "");
@@ -721,7 +724,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
     try {
       chatParams = _rn_ctx->getFormattedChatWithJinja(
           messages, chat_template, json_schema_str, tools_str,
-          parallel_tool_calls, tool_choice, enable_thinking,
+          parallel_tool_calls, tool_choice, enable_thinking, reasoning_format,
           add_generation_prompt, now_str, chat_template_kwargs);
     } catch (const nlohmann::json_abi_v3_12_0::detail::parse_error& e) {
       Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
@@ -962,7 +965,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
       try {
         chatParams = _rn_ctx->getFormattedChatWithJinja(
             json_stringify(messages), chat_template,
-            json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking,
+            json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking, reasoning_format,
             add_generation_prompt, now_str, chat_template_kwargs);
       } catch (const std::exception &e) {
         Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
@@ -1330,7 +1333,7 @@ extern "C" void cleanup_logging() {
 }
-// initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
+// initMultimodal(options: { path: string, use_gpu?: boolean, image_min_tokens?: number, image_max_tokens?: number }): boolean
 Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
   Napi::Env env = info.Env();
@@ -1342,6 +1345,15 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
   auto options = info[0].As<Napi::Object>();
   auto mmproj_path = options.Get("path").ToString().Utf8Value();
   auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
+  int image_min_tokens = -1;
+  int image_max_tokens = -1;
+  if (options.Has("image_min_tokens") && options.Get("image_min_tokens").IsNumber()) {
+    image_min_tokens = options.Get("image_min_tokens").ToNumber().Int32Value();
+  }
+  if (options.Has("image_max_tokens") && options.Get("image_max_tokens").IsNumber()) {
+    image_max_tokens = options.Get("image_max_tokens").ToNumber().Int32Value();
+  }
   if (mmproj_path.empty()) {
     Napi::TypeError::New(env, "mmproj path is required")
@@ -1357,7 +1369,7 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
   // Disable ctx_shift before initializing multimodal
   _rn_ctx->params.ctx_shift = false;
-  bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu);
+  bool result = _rn_ctx->initMultimodal(mmproj_path, use_gpu, image_min_tokens, image_max_tokens);
   if (!result) {
     Napi::Error::New(env, "Failed to initialize multimodal context")
         .ThrowAsJavaScriptException();

package/src/llama.cpp/CMakeLists.txt CHANGED Viewed

@@ -111,11 +111,16 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 option(LLAMA_TOOLS_INSTALL  "llama: install tools"        ${LLAMA_TOOLS_INSTALL_DEFAULT})
 # 3rd party libs
-option(LLAMA_CURL       "llama: use libcurl to download model from an URL" ON)
-option(LLAMA_HTTPLIB    "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
-option(LLAMA_OPENSSL    "llama: use openssl to support HTTPS" OFF)
+option(LLAMA_HTTPLIB    "llama: httplib for downloading functionality" ON)
+option(LLAMA_OPENSSL    "llama: use openssl to support HTTPS" ON)
 option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
+# deprecated
+option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
+if (LLAMA_CURL)
+    message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
+endif()
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
@@ -182,6 +187,9 @@ if (NOT MSVC)
     endif()
 endif()
+include("cmake/license.cmake")
+license_add_file("llama.cpp" "LICENSE")
 #
 # 3rd-party
 #
@@ -209,11 +217,6 @@ add_subdirectory(src)
 # utils, programs, examples and tests
 #
-if (NOT LLAMA_BUILD_COMMON)
-    message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
-    set(LLAMA_CURL OFF)
-endif()
 if (LLAMA_BUILD_COMMON)
     add_subdirectory(common)
     if (LLAMA_HTTPLIB)
@@ -235,6 +238,19 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
     add_subdirectory(tools)
 endif()
+# Automatically add all files from the 'licenses' directory
+file(GLOB EXTRA_LICENSES "${CMAKE_SOURCE_DIR}/licenses/LICENSE-*")
+foreach(FILE_PATH ${EXTRA_LICENSES})
+    get_filename_component(FILE_NAME "${FILE_PATH}" NAME)
+    string(REGEX REPLACE "^LICENSE-" "" NAME "${FILE_NAME}")
+    license_add_file("${NAME}" "${FILE_PATH}")
+endforeach()
+if (LLAMA_BUILD_COMMON)
+    license_generate(common)
+endif()
 #
 # install
 #

package/src/llama.cpp/common/CMakeLists.txt CHANGED Viewed

@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
     common.h
     console.cpp
     console.h
+    debug.cpp
+    debug.h
     download.cpp
     download.h
     http.h
@@ -95,17 +97,7 @@ endif()
 # TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
 set(LLAMA_COMMON_EXTRA_LIBS build_info)
-if (LLAMA_CURL)
-    # Use curl to download model url
-    find_package(CURL)
-    if (NOT CURL_FOUND)
-        message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
-    endif()
-    target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
-    include_directories(${CURL_INCLUDE_DIRS})
-    set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
-elseif (LLAMA_HTTPLIB)
-    # otherwise, use cpp-httplib
+if (LLAMA_HTTPLIB)
     target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
 endif()
@@ -162,26 +154,3 @@ else()
 endif()
 target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
-#
-# copy the license files
-#
-# Check if running in GitHub Actions
-if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
-    message(STATUS "Running inside GitHub Actions - copying license files")
-    # Copy all files from licenses/ to build/bin/
-    file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
-    foreach(LICENSE_FILE ${LICENSE_FILES})
-        get_filename_component(FILENAME ${LICENSE_FILE} NAME)
-        add_custom_command(
-            POST_BUILD
-            TARGET ${TARGET}
-            COMMAND ${CMAKE_COMMAND} -E copy_if_different
-                "${LICENSE_FILE}"
-                "$<TARGET_FILE_DIR:llama>/${FILENAME}"
-            COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
-        message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
-    endforeach()
-endif()