@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/lib/binding.ts +1 -1
  2. package/package.json +14 -14
  3. package/scripts/llama.cpp.patch +27 -26
  4. package/src/LlamaCompletionWorker.cpp +21 -4
  5. package/src/LlamaCompletionWorker.h +2 -0
  6. package/src/LlamaContext.cpp +3 -12
  7. package/src/common.hpp +6 -5
  8. package/src/llama.cpp/CMakeLists.txt +15 -4
  9. package/src/llama.cpp/common/CMakeLists.txt +15 -24
  10. package/src/llama.cpp/common/arg.cpp +172 -110
  11. package/src/llama.cpp/common/chat-parser.cpp +385 -0
  12. package/src/llama.cpp/common/chat-parser.h +120 -0
  13. package/src/llama.cpp/common/chat.cpp +726 -596
  14. package/src/llama.cpp/common/chat.h +74 -8
  15. package/src/llama.cpp/common/common.cpp +56 -38
  16. package/src/llama.cpp/common/common.h +9 -3
  17. package/src/llama.cpp/common/json-partial.cpp +256 -0
  18. package/src/llama.cpp/common/json-partial.h +38 -0
  19. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
  20. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
  21. package/src/llama.cpp/common/sampling.cpp +7 -8
  22. package/src/llama.cpp/common/speculative.cpp +6 -4
  23. package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
  24. package/src/llama.cpp/ggml/include/ggml.h +22 -3
  25. package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
  26. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
  27. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  28. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
  29. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  30. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  31. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  32. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  33. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  34. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  37. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  39. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  40. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
  41. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  42. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
  43. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
  44. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
  45. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
  46. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  47. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  48. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
  49. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
  51. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  52. package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  53. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  54. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  55. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  56. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
  57. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  58. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  59. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
  60. package/src/llama.cpp/include/llama.h +145 -40
  61. package/src/llama.cpp/src/CMakeLists.txt +5 -1
  62. package/src/llama.cpp/src/llama-arch.cpp +99 -3
  63. package/src/llama.cpp/src/llama-arch.h +10 -1
  64. package/src/llama.cpp/src/llama-batch.cpp +728 -272
  65. package/src/llama.cpp/src/llama-batch.h +112 -54
  66. package/src/llama.cpp/src/llama-chat.cpp +19 -2
  67. package/src/llama.cpp/src/llama-chat.h +1 -0
  68. package/src/llama.cpp/src/llama-context.cpp +525 -339
  69. package/src/llama.cpp/src/llama-context.h +38 -17
  70. package/src/llama.cpp/src/llama-cparams.cpp +4 -0
  71. package/src/llama.cpp/src/llama-cparams.h +2 -0
  72. package/src/llama.cpp/src/llama-grammar.cpp +12 -2
  73. package/src/llama.cpp/src/llama-graph.cpp +413 -353
  74. package/src/llama.cpp/src/llama-graph.h +112 -56
  75. package/src/llama.cpp/src/llama-hparams.cpp +10 -2
  76. package/src/llama.cpp/src/llama-hparams.h +13 -2
  77. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
  79. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
  80. package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
  81. package/src/llama.cpp/src/llama-kv-cells.h +415 -0
  82. package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  83. package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
  84. package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
  85. package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
  86. package/src/llama.cpp/src/llama-memory.cpp +41 -0
  87. package/src/llama.cpp/src/llama-memory.h +86 -5
  88. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  89. package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
  90. package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
  91. package/src/llama.cpp/src/llama-model.cpp +1137 -528
  92. package/src/llama.cpp/src/llama-model.h +4 -0
  93. package/src/llama.cpp/src/llama-quant.cpp +2 -1
  94. package/src/llama.cpp/src/llama-sampling.cpp +2 -2
  95. package/src/llama.cpp/src/llama-vocab.cpp +69 -32
  96. package/src/llama.cpp/src/llama-vocab.h +1 -0
  97. package/src/llama.cpp/src/llama.cpp +11 -7
  98. package/src/llama.cpp/src/unicode.cpp +5 -0
  99. package/src/tts_utils.h +1 -1
  100. package/src/llama.cpp/common/json.hpp +0 -24766
  101. package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
  102. package/src/llama.cpp/common/minja/minja.hpp +0 -2974
  103. package/src/llama.cpp/common/stb_image.h +0 -7988
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  105. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
  106. package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
  107. package/src/llama.cpp/src/llama-kv-cache.h +0 -515
  108. /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  109. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  110. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
package/lib/binding.ts CHANGED
@@ -22,7 +22,6 @@ export type ChatMessage = {
22
22
  export type LlamaModelOptions = {
23
23
  model: string
24
24
  chat_template?: string
25
- reasoning_format?: string
26
25
  embedding?: boolean
27
26
  embd_normalize?: number
28
27
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
@@ -74,6 +73,7 @@ export type CompletionResponseFormat = {
74
73
  export type LlamaCompletionOptions = {
75
74
  messages?: ChatMessage[]
76
75
  jinja?: boolean
76
+ reasoning_format?: string
77
77
  chat_template?: string
78
78
  response_format?: CompletionResponseFormat
79
79
  tools?: object
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.0.0-beta.5",
4
+ "version": "1.0.0-beta.6",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -70,19 +70,19 @@
70
70
  "CMakeLists.txt"
71
71
  ],
72
72
  "optionalDependencies": {
73
- "@fugood/node-llama-linux-x64": "1.0.0-beta.5",
74
- "@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.5",
75
- "@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.5",
76
- "@fugood/node-llama-linux-arm64": "1.0.0-beta.5",
77
- "@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.5",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.5",
79
- "@fugood/node-llama-win32-x64": "1.0.0-beta.5",
80
- "@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.5",
81
- "@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.5",
82
- "@fugood/node-llama-win32-arm64": "1.0.0-beta.5",
83
- "@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.5",
84
- "@fugood/node-llama-darwin-x64": "1.0.0-beta.5",
85
- "@fugood/node-llama-darwin-arm64": "1.0.0-beta.5"
73
+ "@fugood/node-llama-linux-x64": "1.0.0-beta.6",
74
+ "@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.6",
75
+ "@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.6",
76
+ "@fugood/node-llama-linux-arm64": "1.0.0-beta.6",
77
+ "@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.6",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.6",
79
+ "@fugood/node-llama-win32-x64": "1.0.0-beta.6",
80
+ "@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.6",
81
+ "@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.6",
82
+ "@fugood/node-llama-win32-arm64": "1.0.0-beta.6",
83
+ "@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.6",
84
+ "@fugood/node-llama-darwin-x64": "1.0.0-beta.6",
85
+ "@fugood/node-llama-darwin-arm64": "1.0.0-beta.6"
86
86
  },
87
87
  "devDependencies": {
88
88
  "@babel/preset-env": "^7.24.4",
@@ -1,18 +1,19 @@
1
1
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
2
- index f138c7bc..e177fe92 100644
2
+ index 7d9aaeb1..a7b68d4a 100644
3
3
  --- a/src/llama.cpp/common/chat.cpp
4
4
  +++ b/src/llama.cpp/common/chat.cpp
5
- @@ -1,8 +1,6 @@
6
- #include "chat.h"
7
- #include "json-schema-to-grammar.h"
5
+ @@ -6,9 +6,6 @@
8
6
  #include "log.h"
9
- -#include "minja/chat-template.hpp"
10
- -#include "minja/minja.hpp"
7
+ #include "regex-partial.h"
11
8
 
12
- #include <optional>
13
-
14
- @@ -15,14 +13,6 @@ static std::string format_time(const std::chrono::system_clock::time_point & now
15
- return res;
9
+ -#include <minja/chat-template.hpp>
10
+ -#include <minja/minja.hpp>
11
+ -
12
+ #include <cstdio>
13
+ #include <exception>
14
+ #include <iostream>
15
+ @@ -121,14 +118,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
16
+ return diffs;
16
17
  }
17
18
 
18
19
  -typedef minja::chat_template common_chat_template;
@@ -27,17 +28,17 @@ index f138c7bc..e177fe92 100644
27
28
  json messages;
28
29
  json tools;
29
30
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
30
- index d26a09c2..cb92721a 100644
31
+ index 9f59e6b0..9b7fe724 100644
31
32
  --- a/src/llama.cpp/common/chat.h
32
33
  +++ b/src/llama.cpp/common/chat.h
33
- @@ -6,8 +6,16 @@
34
- #include <chrono>
34
+ @@ -8,7 +8,16 @@
35
35
  #include <string>
36
36
  #include <vector>
37
- +#include "minja/chat-template.hpp"
38
- +#include "minja/minja.hpp"
39
37
 
40
38
  -struct common_chat_templates;
39
+ +#include <minja/chat-template.hpp>
40
+ +#include <minja/minja.hpp>
41
+ +
41
42
  +typedef minja::chat_template common_chat_template;
42
43
  +
43
44
  +struct common_chat_templates {
@@ -49,10 +50,10 @@ index d26a09c2..cb92721a 100644
49
50
  struct common_chat_tool_call {
50
51
  std::string name;
51
52
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
52
- index 94f545f8..a55df8aa 100644
53
+ index e4e71ad1..091ddda4 100644
53
54
  --- a/src/llama.cpp/common/common.cpp
54
55
  +++ b/src/llama.cpp/common/common.cpp
55
- @@ -1062,6 +1062,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
56
+ @@ -1101,6 +1101,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
56
57
  mparams.n_gpu_layers = params.n_gpu_layers;
57
58
  }
58
59
 
@@ -61,10 +62,10 @@ index 94f545f8..a55df8aa 100644
61
62
  mparams.split_mode = params.split_mode;
62
63
  mparams.tensor_split = params.tensor_split;
63
64
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
64
- index 0a9dc059..996afcd8 100644
65
+ index e08a59ea..d120b67d 100644
65
66
  --- a/src/llama.cpp/common/common.h
66
67
  +++ b/src/llama.cpp/common/common.h
67
- @@ -217,6 +217,7 @@ enum common_reasoning_format {
68
+ @@ -223,6 +223,7 @@ enum common_reasoning_format {
68
69
  };
69
70
 
70
71
  struct common_params {
@@ -73,11 +74,11 @@ index 0a9dc059..996afcd8 100644
73
74
  int32_t n_ctx = 4096; // context size
74
75
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
75
76
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
76
- index 9a3085be..8218cc16 100644
77
+ index 71b1d67b..093cd6f9 100644
77
78
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
78
79
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
79
- @@ -90,7 +90,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
80
- message(STATUS "ARM detected")
80
+ @@ -104,7 +104,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
81
+ )
81
82
 
82
83
  if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
83
84
  - message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
@@ -86,10 +87,10 @@ index 9a3085be..8218cc16 100644
86
87
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
87
88
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
88
89
  diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
89
- index 662f1377..f9f99698 100644
90
+ index 39f022f3..7ae9047e 100644
90
91
  --- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
91
92
  +++ b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
92
- @@ -122,7 +122,7 @@ if (Vulkan_FOUND)
93
+ @@ -110,7 +110,7 @@ if (Vulkan_FOUND)
93
94
  endif()
94
95
 
95
96
  # Set up toolchain for host compilation whether cross-compiling or not
@@ -98,10 +99,10 @@ index 662f1377..f9f99698 100644
98
99
  if (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN)
99
100
  set(HOST_CMAKE_TOOLCHAIN_FILE ${GGML_VULKAN_SHADERS_GEN_TOOLCHAIN})
100
101
  else()
101
- @@ -144,7 +144,7 @@ if (Vulkan_FOUND)
102
+ @@ -130,7 +130,7 @@ if (Vulkan_FOUND)
103
+
102
104
  include(ExternalProject)
103
105
 
104
- # Add toolchain file if cross-compiling
105
106
  - if (CMAKE_CROSSCOMPILING)
106
107
  + if (CMAKE_CROSSCOMPILING OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR)
107
108
  list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${HOST_CMAKE_TOOLCHAIN_FILE})
@@ -25,12 +25,16 @@ size_t findStoppingStrings(const std::string &text,
25
25
 
26
26
  LlamaCompletionWorker::LlamaCompletionWorker(
27
27
  const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
28
- Napi::Function callback, common_params params,
29
- std::vector<std::string> stop_words, int32_t chat_format,
28
+ Napi::Function callback,
29
+ common_params params,
30
+ std::vector<std::string> stop_words,
31
+ int32_t chat_format,
32
+ std::string reasoning_format,
30
33
  const std::vector<std::string> &media_paths,
31
34
  const std::vector<llama_token> &guide_tokens)
32
35
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
33
36
  _params(params), _stop_words(stop_words), _chat_format(chat_format),
37
+ _reasoning_format(reasoning_format),
34
38
  _media_paths(media_paths), _guide_tokens(guide_tokens) {
35
39
  if (!callback.IsEmpty()) {
36
40
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
@@ -65,7 +69,7 @@ void LlamaCompletionWorker::Execute() {
65
69
 
66
70
  // Process media if any are provided
67
71
  if (!_media_paths.empty()) {
68
- const auto *mtmd_ctx = _sess->get_mtmd_ctx();
72
+ auto *mtmd_ctx = _sess->get_mtmd_ctx();
69
73
 
70
74
  if (mtmd_ctx != nullptr) {
71
75
  // Process the media and get the tokens
@@ -234,8 +238,21 @@ void LlamaCompletionWorker::OnOK() {
234
238
  std::string content;
235
239
  if (!_stop) {
236
240
  try {
241
+ common_chat_syntax chat_syntax;
242
+ chat_syntax.format = static_cast<common_chat_format>(_chat_format);
243
+
244
+ if (_reasoning_format == "deepseek") {
245
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
246
+ } else if (_reasoning_format == "deepseek-legacy") {
247
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
248
+ } else {
249
+ chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
250
+ }
237
251
  common_chat_msg message = common_chat_parse(
238
- _result.text, static_cast<common_chat_format>(_chat_format));
252
+ _result.text,
253
+ false,
254
+ chat_syntax
255
+ );
239
256
  if (!message.reasoning_content.empty()) {
240
257
  reasoning_content = message.reasoning_content;
241
258
  }
@@ -20,6 +20,7 @@ public:
20
20
  Napi::Function callback, common_params params,
21
21
  std::vector<std::string> stop_words,
22
22
  int32_t chat_format,
23
+ std::string reasoning_format,
23
24
  const std::vector<std::string> &media_paths = {},
24
25
  const std::vector<llama_token> &guide_tokens = {});
25
26
 
@@ -41,6 +42,7 @@ private:
41
42
  common_params _params;
42
43
  std::vector<std::string> _stop_words;
43
44
  int32_t _chat_format;
45
+ std::string _reasoning_format;
44
46
  std::vector<std::string> _media_paths;
45
47
  std::vector<llama_token> _guide_tokens;
46
48
  std::function<void()> _onComplete;
@@ -10,7 +10,7 @@
10
10
  #include "ggml.h"
11
11
  #include "gguf.h"
12
12
  #include "json-schema-to-grammar.h"
13
- #include "json.hpp"
13
+ #include <nlohmann/json.hpp>
14
14
  #include "llama-impl.h"
15
15
 
16
16
  #include <atomic>
@@ -223,14 +223,6 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
223
223
 
224
224
  params.chat_template = get_option<std::string>(options, "chat_template", "");
225
225
 
226
- std::string reasoning_format =
227
- get_option<std::string>(options, "reasoning_format", "none");
228
- if (reasoning_format == "deepseek") {
229
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
230
- } else {
231
- params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
232
- }
233
-
234
226
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
235
227
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
236
228
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -521,8 +513,6 @@ common_chat_params getFormattedChatWithJinja(
521
513
  if (!json_schema.empty()) {
522
514
  inputs.json_schema = json::parse(json_schema);
523
515
  }
524
- inputs.extract_reasoning =
525
- sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
526
516
 
527
517
  // If chat_template is provided, create new one and use it (probably slow)
528
518
  if (!chat_template.empty()) {
@@ -695,6 +685,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
695
685
  }
696
686
 
697
687
  int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
688
+ std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
698
689
 
699
690
  common_params params = _sess->params();
700
691
  auto grammar_from_params = get_option<std::string>(options, "grammar", "");
@@ -904,7 +895,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
904
895
 
905
896
  auto *worker =
906
897
  new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
907
- chat_format, media_paths, guide_tokens);
898
+ chat_format, reasoning_format, media_paths, guide_tokens);
908
899
  worker->Queue();
909
900
  _wip = worker;
910
901
  worker->OnComplete([this]() { _wip = nullptr; });
package/src/common.hpp CHANGED
@@ -6,6 +6,7 @@
6
6
  #include "llama.h"
7
7
  #include "tools/mtmd/clip.h"
8
8
  #include "tools/mtmd/mtmd.h"
9
+ #include "tools/mtmd/mtmd-helper.h"
9
10
  #include <memory>
10
11
  #include <mutex>
11
12
  #include <napi.h>
@@ -97,7 +98,7 @@ public:
97
98
  inline std::mutex &get_mutex() { return mutex; }
98
99
 
99
100
  // Getter for the multimodal context
100
- inline const mtmd_context *get_mtmd_ctx() const { return _mtmd_ctx; }
101
+ inline mtmd_context *get_mtmd_ctx() { return _mtmd_ctx; }
101
102
 
102
103
  // Setter for the multimodal context
103
104
  inline void set_mtmd_ctx(mtmd_context *ctx) { _mtmd_ctx = ctx; }
@@ -219,7 +220,7 @@ struct TokenizeResult {
219
220
  };
220
221
 
221
222
  static TokenizeResult
222
- tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
223
+ tokenizeWithMedia(mtmd_context *mtmd_ctx, const std::string &prompt,
223
224
  const std::vector<std::string> &media_paths) {
224
225
  if (mtmd_ctx == nullptr) {
225
226
  throw std::runtime_error("Multimodal context is not initialized");
@@ -263,7 +264,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
263
264
  std::vector<uint8_t> media_data = base64_decode(base64_data);
264
265
 
265
266
  // Load bitmap from memory buffer using direct initialization
266
- mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(),
267
+ mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(mtmd_ctx, media_data.data(),
267
268
  media_data.size()));
268
269
  if (!bmp.ptr) {
269
270
  bitmaps.entries.clear();
@@ -300,7 +301,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
300
301
  fclose(file);
301
302
 
302
303
  // Create bitmap directly
303
- mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
304
+ mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(mtmd_ctx, media_path.c_str()));
304
305
  if (!bmp.ptr) {
305
306
  bitmaps.entries.clear();
306
307
  throw std::runtime_error("Failed to load media");
@@ -388,7 +389,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
388
389
 
389
390
  // Process media and add them to the tokenized input
390
391
  static llama_pos
391
- processMediaPrompt(llama_context *ctx, const mtmd_context *mtmd_ctx,
392
+ processMediaPrompt(llama_context *ctx, mtmd_context *mtmd_ctx,
392
393
  LlamaSessionPtr sess, const common_params &params,
393
394
  const std::vector<std::string> &media_paths) {
394
395
  if (mtmd_ctx == nullptr) {
@@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
89
89
  include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
90
90
  include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
91
91
 
92
+ if (NOT DEFINED LLAMA_BUILD_NUMBER)
93
+ set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
94
+ endif()
95
+ if (NOT DEFINED LLAMA_BUILD_COMMIT)
96
+ set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
97
+ endif()
98
+ set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
99
+
92
100
  # override ggml options
93
101
  set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
94
102
  set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
@@ -155,10 +163,17 @@ if (LLAMA_USE_SYSTEM_GGML)
155
163
  endif()
156
164
 
157
165
  if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
166
+ set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
167
+ set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
158
168
  add_subdirectory(ggml)
159
169
  # ... otherwise assume ggml is added by a parent CMakeLists.txt
160
170
  endif()
161
171
 
172
+ if (MINGW)
173
+ # Target Windows 8 for PrefetchVirtualMemory
174
+ add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
175
+ endif()
176
+
162
177
  #
163
178
  # build the library
164
179
  #
@@ -199,10 +214,6 @@ endif()
199
214
  include(GNUInstallDirs)
200
215
  include(CMakePackageConfigHelpers)
201
216
 
202
- set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
203
- set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
204
- set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
205
-
206
217
  set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
207
218
  set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
208
219
  set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
@@ -7,8 +7,8 @@ llama_add_compile_flags()
7
7
  # Build info header
8
8
  #
9
9
 
10
- if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
11
- set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
10
+ if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
11
+ set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
12
12
 
13
13
  # Is git submodule
14
14
  if(NOT IS_DIRECTORY "${GIT_DIR}")
@@ -18,36 +18,26 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
18
18
  if (SLASH_POS EQUAL 0)
19
19
  set(GIT_DIR "${REAL_GIT_DIR}")
20
20
  else()
21
- set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
21
+ set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
22
22
  endif()
23
23
  endif()
24
24
 
25
25
  if(EXISTS "${GIT_DIR}/index")
26
- set(GIT_INDEX "${GIT_DIR}/index")
26
+ # For build-info.cpp below
27
+ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${GIT_DIR}/index")
27
28
  else()
28
29
  message(WARNING "Git index not found in git repository.")
29
- set(GIT_INDEX "")
30
30
  endif()
31
31
  else()
32
32
  message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
33
- set(GIT_INDEX "")
34
33
  endif()
35
34
 
36
- # Add a custom command to rebuild build-info.cpp when .git/index changes
37
- add_custom_command(
38
- OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp"
39
- COMMENT "Generating build details from Git"
40
- COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
41
- -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
42
- -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
43
- -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
44
- -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
45
- WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
46
- DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
47
- VERBATIM
48
- )
35
+ set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
36
+ set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
37
+ configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
38
+
49
39
  set(TARGET build_info)
50
- add_library(${TARGET} OBJECT build-info.cpp)
40
+ add_library(${TARGET} OBJECT ${OUTPUT_FILE})
51
41
  if (BUILD_SHARED_LIBS)
52
42
  set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
53
43
  endif()
@@ -58,19 +48,20 @@ add_library(${TARGET} STATIC
58
48
  arg.cpp
59
49
  arg.h
60
50
  base64.hpp
51
+ chat-parser.cpp
52
+ chat-parser.h
61
53
  chat.cpp
62
54
  chat.h
63
55
  common.cpp
64
56
  common.h
65
57
  console.cpp
66
58
  console.h
59
+ json-partial.cpp
60
+ json-partial.h
67
61
  json-schema-to-grammar.cpp
68
- json.hpp
69
62
  llguidance.cpp
70
63
  log.cpp
71
64
  log.h
72
- minja/chat-template.hpp
73
- minja/minja.hpp
74
65
  ngram-cache.cpp
75
66
  ngram-cache.h
76
67
  regex-partial.cpp
@@ -143,7 +134,7 @@ if (LLAMA_LLGUIDANCE)
143
134
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
144
135
  endif ()
145
136
 
146
- target_include_directories(${TARGET} PUBLIC .)
137
+ target_include_directories(${TARGET} PUBLIC . ../vendor)
147
138
  target_compile_features (${TARGET} PUBLIC cxx_std_17)
148
139
  target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
149
140