node-llama-cpp 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dist/bindings/AddonTypes.d.ts +7 -3
  2. package/dist/bindings/Llama.d.ts +1 -1
  3. package/dist/bindings/Llama.js +17 -2
  4. package/dist/bindings/Llama.js.map +1 -1
  5. package/dist/bindings/getLlama.d.ts +4 -1
  6. package/dist/bindings/getLlama.js +42 -2
  7. package/dist/bindings/getLlama.js.map +1 -1
  8. package/dist/bindings/types.d.ts +3 -1
  9. package/dist/bindings/types.js +2 -0
  10. package/dist/bindings/types.js.map +1 -1
  11. package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -1
  12. package/dist/bindings/utils/compileLLamaCpp.js +41 -2
  13. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  14. package/dist/bindings/utils/testBindingBinary.d.ts +2 -1
  15. package/dist/bindings/utils/testBindingBinary.js +122 -37
  16. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  17. package/dist/chatWrappers/Llama3_1ChatWrapper.js +3 -2
  18. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
  19. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.d.ts +35 -0
  20. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +253 -0
  21. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -0
  22. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +13 -0
  23. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +30 -0
  24. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
  25. package/dist/chatWrappers/utils/getModelLinageNames.d.ts +2 -0
  26. package/dist/chatWrappers/utils/getModelLinageNames.js +18 -0
  27. package/dist/chatWrappers/utils/getModelLinageNames.js.map +1 -0
  28. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.d.ts +2 -0
  29. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js +9 -0
  30. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js.map +1 -0
  31. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
  32. package/dist/chatWrappers/utils/resolveChatWrapper.js +9 -31
  33. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  34. package/dist/cli/commands/ChatCommand.js +1 -1
  35. package/dist/cli/commands/ChatCommand.js.map +1 -1
  36. package/dist/cli/commands/InitCommand.d.ts +2 -1
  37. package/dist/cli/commands/InitCommand.js +32 -13
  38. package/dist/cli/commands/InitCommand.js.map +1 -1
  39. package/dist/cli/recommendedModels.js +11 -0
  40. package/dist/cli/recommendedModels.js.map +1 -1
  41. package/dist/evaluator/LlamaChat/LlamaChat.js +25 -12
  42. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  43. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +140 -132
  44. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  45. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +1 -1
  46. package/dist/evaluator/LlamaCompletion.js +9 -10
  47. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  48. package/dist/evaluator/LlamaEmbeddingContext.d.ts +2 -0
  49. package/dist/evaluator/LlamaEmbeddingContext.js +13 -1
  50. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  51. package/dist/evaluator/LlamaGrammar.js +7 -0
  52. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  53. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -2
  54. package/dist/evaluator/LlamaJsonSchemaGrammar.js +3 -0
  55. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  56. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +20 -0
  57. package/dist/evaluator/LlamaModel/LlamaModel.js +60 -0
  58. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  59. package/dist/gguf/fileReaders/GgufFileReader.d.ts +1 -1
  60. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +1 -1
  61. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +1 -1
  62. package/dist/gguf/insights/GgufInsights.d.ts +2 -0
  63. package/dist/gguf/insights/GgufInsights.js +15 -0
  64. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  65. package/dist/gguf/types/GgufMetadataTypes.d.ts +19 -2
  66. package/dist/gguf/types/GgufMetadataTypes.js +5 -0
  67. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  68. package/dist/index.d.ts +3 -2
  69. package/dist/index.js +2 -1
  70. package/dist/index.js.map +1 -1
  71. package/dist/tsconfig.tsbuildinfo +1 -1
  72. package/dist/types.d.ts +2 -2
  73. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +5 -0
  74. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +15 -0
  75. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  76. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +3 -0
  77. package/dist/utils/gbnfJson/GbnfTerminal.js +25 -3
  78. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  79. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +1 -1
  80. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  81. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.d.ts +9 -0
  82. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js +53 -0
  83. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js.map +1 -0
  84. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +11 -2
  85. package/dist/utils/gbnfJson/terminals/GbnfArray.js +64 -15
  86. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  87. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +3 -2
  88. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js +13 -7
  89. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  90. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.d.ts +11 -0
  91. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js +28 -0
  92. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js.map +1 -0
  93. package/dist/utils/gbnfJson/terminals/GbnfFormatString.d.ts +11 -0
  94. package/dist/utils/gbnfJson/terminals/GbnfFormatString.js +90 -0
  95. package/dist/utils/gbnfJson/terminals/GbnfFormatString.js.map +1 -0
  96. package/dist/utils/gbnfJson/terminals/GbnfGrammar.d.ts +4 -1
  97. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js +8 -1
  98. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  99. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.d.ts +5 -0
  100. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js +24 -0
  101. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js.map +1 -0
  102. package/dist/utils/gbnfJson/terminals/GbnfNumber.js +4 -3
  103. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  104. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.d.ts +3 -1
  105. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js +8 -2
  106. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  107. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +14 -5
  108. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +67 -9
  109. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  110. package/dist/utils/gbnfJson/terminals/GbnfOr.d.ts +2 -1
  111. package/dist/utils/gbnfJson/terminals/GbnfOr.js +6 -2
  112. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  113. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +7 -1
  114. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +69 -20
  115. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -1
  116. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +8 -1
  117. package/dist/utils/gbnfJson/terminals/GbnfString.js +31 -21
  118. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  119. package/dist/utils/gbnfJson/terminals/GbnfStringValue.d.ts +2 -0
  120. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js +7 -4
  121. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  122. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +2 -1
  123. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +24 -5
  124. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  125. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +16 -1
  126. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +47 -1
  127. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  128. package/dist/utils/gbnfJson/types.d.ts +164 -17
  129. package/dist/utils/gbnfJson/types.js +6 -0
  130. package/dist/utils/gbnfJson/types.js.map +1 -1
  131. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +61 -9
  132. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  133. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +99 -8
  134. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  135. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +125 -16
  136. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  137. package/dist/utils/includesText.d.ts +1 -0
  138. package/dist/utils/includesText.js +12 -0
  139. package/dist/utils/includesText.js.map +1 -0
  140. package/dist/utils/tokenizeInput.d.ts +1 -1
  141. package/dist/utils/tokenizeInput.js +3 -1
  142. package/dist/utils/tokenizeInput.js.map +1 -1
  143. package/dist/utils/tokenizerUtils.d.ts +12 -0
  144. package/dist/utils/tokenizerUtils.js +28 -0
  145. package/dist/utils/tokenizerUtils.js.map +1 -0
  146. package/dist/utils/wrapAbortSignal.d.ts +1 -1
  147. package/dist/utils/wrapAbortSignal.js +9 -4
  148. package/dist/utils/wrapAbortSignal.js.map +1 -1
  149. package/llama/CMakeLists.txt +10 -90
  150. package/llama/addon/AddonContext.cpp +2 -1
  151. package/llama/addon/AddonGrammar.cpp +49 -1
  152. package/llama/addon/AddonGrammar.h +3 -0
  153. package/llama/addon/AddonModel.cpp +26 -4
  154. package/llama/addon/AddonModel.h +3 -0
  155. package/llama/addon/addon.cpp +17 -0
  156. package/llama/addon/globals/getGpuInfo.cpp +54 -52
  157. package/llama/binariesGithubRelease.json +1 -1
  158. package/llama/gitRelease.bundle +0 -0
  159. package/llama/gpuInfo/vulkan-gpu-info.cpp +0 -8
  160. package/llama/gpuInfo/vulkan-gpu-info.h +0 -1
  161. package/llama/grammars/README.md +2 -2
  162. package/llama/grammars/english.gbnf +6 -0
  163. package/llama/llama.cpp.info.json +1 -1
  164. package/package.json +49 -45
  165. package/templates/README.md +6 -0
  166. package/templates/packed/electron-typescript-react.json +1 -1
  167. package/templates/packed/node-typescript.json +1 -1
  168. package/llama/gpuInfo/cuda-gpu-info.cu +0 -120
  169. package/llama/gpuInfo/cuda-gpu-info.h +0 -10
  170. package/llama/gpuInfo/metal-gpu-info.h +0 -8
  171. package/llama/gpuInfo/metal-gpu-info.mm +0 -37
@@ -28,6 +28,16 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Ap
28
28
  add_compile_options(-Wno-c++17-extensions)
29
29
  endif()
30
30
 
31
+ if(APPLE)
32
+ set(CMAKE_SKIP_BUILD_RPATH FALSE)
33
+ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
34
+ set(CMAKE_BUILD_RPATH "@loader_path")
35
+ set(CMAKE_INSTALL_RPATH "@loader_path")
36
+ set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
37
+ else()
38
+ set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
39
+ endif()
40
+
31
41
  include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
32
42
 
33
43
  add_subdirectory("llama.cpp")
@@ -39,41 +49,6 @@ unset(GPU_INFO_HEADERS)
39
49
  unset(GPU_INFO_SOURCES)
40
50
  unset(GPU_INFO_EXTRA_LIBS)
41
51
 
42
- if (GGML_CUDA)
43
- cmake_minimum_required(VERSION 3.17)
44
-
45
- find_package(CUDAToolkit)
46
- if (CUDAToolkit_FOUND)
47
- message(STATUS "Using CUDA for GPU info")
48
-
49
- enable_language(CUDA)
50
-
51
- list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
52
- list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)
53
-
54
- add_compile_definitions(GPU_INFO_USE_CUDA)
55
-
56
- if (GGML_STATIC)
57
- list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
58
- else()
59
- list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
60
- endif()
61
-
62
- list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)
63
-
64
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
65
- # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
66
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
67
- set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
68
- else()
69
- set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
70
- endif()
71
- endif()
72
- else()
73
- message(FATAL_ERROR "CUDA was not found")
74
- endif()
75
- endif()
76
-
77
52
  if (GGML_VULKAN OR GGML_KOMPUTE)
78
53
  find_package(Vulkan)
79
54
  if (Vulkan_FOUND)
@@ -94,67 +69,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
94
69
  endif()
95
70
  endif()
96
71
 
97
- if (GGML_HIPBLAS)
98
- list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
99
-
100
- if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
101
- message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
102
- endif()
103
- if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
104
- message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
105
- endif()
106
-
107
- find_package(hip)
108
- find_package(hipblas)
109
- find_package(rocblas)
110
-
111
- if (${hipblas_FOUND} AND ${hip_FOUND})
112
- message(STATUS "Using HIP and hipBLAS for GPU info")
113
- add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
114
- add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
115
- set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
116
- target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
117
-
118
- list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
119
- else()
120
- message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
121
- endif()
122
- endif()
123
-
124
- if (GGML_METAL)
125
- find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
126
- find_library(METAL_FRAMEWORK Metal REQUIRED)
127
- find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
128
-
129
- message(STATUS "Using Metal for GPU info")
130
- list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
131
- list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)
132
-
133
- add_compile_definitions(GPU_INFO_USE_METAL)
134
-
135
- list(APPEND GPU_INFO_EXTRA_LIBS
136
- ${FOUNDATION_LIBRARY}
137
- ${METAL_FRAMEWORK}
138
- ${METALKIT_FRAMEWORK}
139
- )
140
- endif()
141
-
142
72
  list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
143
73
  list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
144
74
  list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
145
75
 
146
76
  file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
147
77
 
148
- if(APPLE)
149
- set(CMAKE_SKIP_BUILD_RPATH FALSE)
150
- set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
151
- set(CMAKE_BUILD_RPATH "@loader_path")
152
- set(CMAKE_INSTALL_RPATH "@loader_path")
153
- set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
154
- else()
155
- set(CMAKE_BUILD_RPATH_USE_ORIGIN ON)
156
- endif()
157
-
158
78
  add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
159
79
  set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
160
80
  target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
@@ -531,7 +531,8 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
531
531
  }
532
532
 
533
533
  const int n_embd = llama_n_embd(model->model);
534
- const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
534
+ const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
535
+ const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
535
536
  if (embeddings == NULL) {
536
537
  embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
537
538
 
@@ -34,6 +34,54 @@ AddonGrammar::~AddonGrammar() {
34
34
  }
35
35
  }
36
36
 
37
+ Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
38
+ const std::string testText = info[0].As<Napi::String>().Utf8Value();
39
+
40
+ auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str());
41
+
42
+ // will be empty if there are parse errors
43
+ if (parsed_grammar == nullptr) {
44
+ Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
45
+ return Napi::Boolean::New(info.Env(), false);
46
+ }
47
+
48
+ const auto cpts = unicode_cpts_from_utf8(testText);
49
+ const llama_grammar_rules & rules = llama_grammar_get_rules(parsed_grammar);
50
+ llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
51
+
52
+ for (const auto & cpt : cpts) {
53
+ const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(parsed_grammar);
54
+
55
+ llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
56
+
57
+ if (stacks_cur.empty()) {
58
+ // no stacks means that the grammar failed to match at this point
59
+ llama_grammar_free_impl(parsed_grammar);
60
+ return Napi::Boolean::New(info.Env(), false);
61
+ }
62
+ }
63
+
64
+ for (const auto & stack : stacks_cur) {
65
+ if (stack.empty()) {
66
+ // an empty stack means that the grammar has been completed
67
+ llama_grammar_free_impl(parsed_grammar);
68
+ return Napi::Boolean::New(info.Env(), true);
69
+ }
70
+ }
71
+
72
+ llama_grammar_free_impl(parsed_grammar);
73
+ return Napi::Boolean::New(info.Env(), false);
74
+ }
75
+
37
76
  void AddonGrammar::init(Napi::Object exports) {
38
- exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
77
+ exports.Set(
78
+ "AddonGrammar",
79
+ DefineClass(
80
+ exports.Env(),
81
+ "AddonGrammar",
82
+ {
83
+ InstanceMethod("isTextCompatible", &AddonGrammar::isTextCompatible),
84
+ }
85
+ )
86
+ );
39
87
  }
@@ -2,6 +2,7 @@
2
2
  #include "llama.h"
3
3
  #include "common/common.h"
4
4
  #include "llama-grammar.h"
5
+ #include "unicode.h"
5
6
  #include "napi.h"
6
7
  #include "addonGlobals.h"
7
8
 
@@ -15,5 +16,7 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
15
16
  AddonGrammar(const Napi::CallbackInfo& info);
16
17
  ~AddonGrammar();
17
18
 
19
+ Napi::Value isTextCompatible(const Napi::CallbackInfo& info);
20
+
18
21
  static void init(Napi::Object exports);
19
22
  };
@@ -9,7 +9,7 @@
9
9
  #include "AddonModelLora.h"
10
10
 
11
11
  static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
12
- if (token < 0) {
12
+ if (token < 0 || token == LLAMA_TOKEN_NULL) {
13
13
  return Napi::Number::From(info.Env(), -1);
14
14
  }
15
15
 
@@ -565,6 +565,22 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
565
565
 
566
566
  return getNapiToken(info, model, llama_token_eot(model));
567
567
  }
568
+ Napi::Value AddonModel::ClsToken(const Napi::CallbackInfo& info) {
569
+ if (disposed) {
570
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
571
+ return info.Env().Undefined();
572
+ }
573
+
574
+ return getNapiToken(info, model, llama_token_cls(model));
575
+ }
576
+ Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
577
+ if (disposed) {
578
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
579
+ return info.Env().Undefined();
580
+ }
581
+
582
+ return getNapiToken(info, model, llama_token_sep(model));
583
+ }
568
584
  Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
569
585
  if (disposed) {
570
586
  Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
@@ -624,11 +640,14 @@ Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
624
640
  return Napi::Number::From(info.Env(), int32_t(vocabularyType));
625
641
  }
626
642
  Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
627
- const int addBos = llama_add_bos_token(model);
643
+ const bool addBos = llama_add_bos_token(model);
628
644
 
629
- bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
645
+ return Napi::Boolean::New(info.Env(), addBos);
646
+ }
647
+ Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
648
+ const bool addEos = llama_add_eos_token(model);
630
649
 
631
- return Napi::Boolean::New(info.Env(), shouldPrependBos);
650
+ return Napi::Boolean::New(info.Env(), addEos);
632
651
  }
633
652
 
634
653
  Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
@@ -659,11 +678,14 @@ void AddonModel::init(Napi::Object exports) {
659
678
  InstanceMethod("middleToken", &AddonModel::MiddleToken),
660
679
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
661
680
  InstanceMethod("eotToken", &AddonModel::EotToken),
681
+ InstanceMethod("clsToken", &AddonModel::ClsToken),
682
+ InstanceMethod("sepToken", &AddonModel::SepToken),
662
683
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
663
684
  InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
664
685
  InstanceMethod("isEogToken", &AddonModel::IsEogToken),
665
686
  InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
666
687
  InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
688
+ InstanceMethod("shouldAppendEosToken", &AddonModel::ShouldAppendEosToken),
667
689
  InstanceMethod("getModelSize", &AddonModel::GetModelSize),
668
690
  InstanceMethod("dispose", &AddonModel::Dispose),
669
691
  }
@@ -49,12 +49,15 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
49
49
  Napi::Value MiddleToken(const Napi::CallbackInfo& info);
50
50
  Napi::Value SuffixToken(const Napi::CallbackInfo& info);
51
51
  Napi::Value EotToken(const Napi::CallbackInfo& info);
52
+ Napi::Value ClsToken(const Napi::CallbackInfo& info);
53
+ Napi::Value SepToken(const Napi::CallbackInfo& info);
52
54
  Napi::Value GetTokenString(const Napi::CallbackInfo& info);
53
55
 
54
56
  Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info);
55
57
  Napi::Value IsEogToken(const Napi::CallbackInfo& info);
56
58
  Napi::Value GetVocabularyType(const Napi::CallbackInfo& info);
57
59
  Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info);
60
+ Napi::Value ShouldAppendEosToken(const Napi::CallbackInfo& info);
58
61
  Napi::Value GetModelSize(const Napi::CallbackInfo& info);
59
62
 
60
63
  static void init(Napi::Object exports);
@@ -151,6 +151,22 @@ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
151
151
  }
152
152
  };
153
153
 
154
+ Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
155
+ const bool forceLoadLibraries = info.Length() == 0
156
+ ? false
157
+ : info[0].IsBoolean()
158
+ ? info[0].As<Napi::Boolean>().Value()
159
+ : false;
160
+
161
+ ggml_backend_reg_count();
162
+
163
+ if (forceLoadLibraries) {
164
+ ggml_backend_load_all();
165
+ }
166
+
167
+ return info.Env().Undefined();
168
+ }
169
+
154
170
  Napi::Value addonInit(const Napi::CallbackInfo& info) {
155
171
  if (backendInitialized) {
156
172
  Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
@@ -205,6 +221,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
205
221
  Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
206
222
  Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
207
223
  Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
224
+ Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
208
225
  Napi::PropertyDescriptor::Function("init", addonInit),
209
226
  Napi::PropertyDescriptor::Function("dispose", addonDispose),
210
227
  });
@@ -1,22 +1,15 @@
1
1
  #include "getGpuInfo.h"
2
2
  #include "addonLog.h"
3
3
 
4
- #ifdef GPU_INFO_USE_CUDA
5
- # include "../../gpuInfo/cuda-gpu-info.h"
4
+ #ifdef __APPLE__
5
+ #include <TargetConditionals.h>
6
6
  #endif
7
+
7
8
  #ifdef GPU_INFO_USE_VULKAN
8
9
  # include "../../gpuInfo/vulkan-gpu-info.h"
9
10
  #endif
10
- #ifdef GPU_INFO_USE_METAL
11
- # include "../../gpuInfo/metal-gpu-info.h"
12
- #endif
13
11
 
14
12
 
15
- #ifdef GPU_INFO_USE_CUDA
16
- void logCudaError(const char* message) {
17
- addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
18
- }
19
- #endif
20
13
  #ifdef GPU_INFO_USE_VULKAN
21
14
  void logVulkanWarning(const char* message) {
22
15
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
@@ -24,20 +17,31 @@ void logVulkanWarning(const char* message) {
24
17
  #endif
25
18
 
26
19
  Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
20
+ ggml_backend_dev_t device = NULL;
21
+ size_t deviceTotal = 0;
22
+ size_t deviceFree = 0;
23
+
27
24
  uint64_t total = 0;
28
25
  uint64_t used = 0;
29
26
  uint64_t unifiedVramSize = 0;
30
27
 
31
- #ifdef GPU_INFO_USE_CUDA
32
- size_t cudaDeviceTotal = 0;
33
- size_t cudaDeviceUsed = 0;
34
- bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
28
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
29
+ device = ggml_backend_dev_get(i);
30
+ if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
31
+ deviceTotal = 0;
32
+ deviceFree = 0;
33
+ ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
35
34
 
36
- if (cudeGetInfoSuccess) {
37
- total += cudaDeviceTotal;
38
- used += cudaDeviceUsed;
39
- }
35
+ total += deviceTotal;
36
+ used += deviceTotal - deviceFree;
37
+
38
+ #if defined(__arm64__) || defined(__aarch64__)
39
+ if (std::string(ggml_backend_dev_name(device)) == "Metal") {
40
+ unifiedVramSize += deviceTotal;
41
+ }
40
42
  #endif
43
+ }
44
+ }
41
45
 
42
46
  #ifdef GPU_INFO_USE_VULKAN
43
47
  uint64_t vulkanDeviceTotal = 0;
@@ -46,23 +50,15 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
46
50
  const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
47
51
 
48
52
  if (vulkanDeviceSupportsMemoryBudgetExtension) {
49
- total += vulkanDeviceTotal;
50
- used += vulkanDeviceUsed;
53
+ if (vulkanDeviceUnifiedVramSize > total) {
54
+ // this means that we counted memory from devices that aren't used by llama.cpp
55
+ vulkanDeviceUnifiedVramSize = 0;
56
+ }
57
+
51
58
  unifiedVramSize += vulkanDeviceUnifiedVramSize;
52
59
  }
53
60
  #endif
54
61
 
55
- #ifdef GPU_INFO_USE_METAL
56
- uint64_t metalDeviceTotal = 0;
57
- uint64_t metalDeviceUsed = 0;
58
- uint64_t metalDeviceUnifiedVramSize = 0;
59
- getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed, &metalDeviceUnifiedVramSize);
60
-
61
- total += metalDeviceTotal;
62
- used += metalDeviceUsed;
63
- unifiedVramSize += metalDeviceUnifiedVramSize;
64
- #endif
65
-
66
62
  Napi::Object result = Napi::Object::New(info.Env());
67
63
  result.Set("total", Napi::Number::From(info.Env(), total));
68
64
  result.Set("used", Napi::Number::From(info.Env(), used));
@@ -74,17 +70,13 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
74
70
  Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
75
71
  std::vector<std::string> deviceNames;
76
72
 
77
- #ifdef GPU_INFO_USE_CUDA
78
- gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
79
- #endif
73
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
74
+ ggml_backend_dev_t device = ggml_backend_dev_get(i);
75
+ if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
80
76
 
81
- #ifdef GPU_INFO_USE_VULKAN
82
- gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
83
- #endif
84
-
85
- #ifdef GPU_INFO_USE_METAL
86
- getMetalGpuDeviceNames(&deviceNames);
87
- #endif
77
+ deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
78
+ }
79
+ }
88
80
 
89
81
  Napi::Object result = Napi::Object::New(info.Env());
90
82
 
@@ -98,17 +90,27 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
98
90
  }
99
91
 
100
92
  Napi::Value getGpuType(const Napi::CallbackInfo& info) {
101
- #ifdef GPU_INFO_USE_CUDA
102
- return Napi::String::New(info.Env(), "cuda");
103
- #endif
104
-
105
- #ifdef GPU_INFO_USE_VULKAN
106
- return Napi::String::New(info.Env(), "vulkan");
107
- #endif
93
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
94
+ ggml_backend_dev_t device = ggml_backend_dev_get(i);
95
+ const auto deviceName = std::string(ggml_backend_dev_name(device));
96
+
97
+ if (deviceName == "Metal") {
98
+ return Napi::String::New(info.Env(), "metal");
99
+ } else if (std::string(deviceName).find("Vulkan") == 0) {
100
+ return Napi::String::New(info.Env(), "vulkan");
101
+ } else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
102
+ return Napi::String::New(info.Env(), "cuda");
103
+ }
104
+ }
108
105
 
109
- #ifdef GPU_INFO_USE_METAL
110
- return Napi::String::New(info.Env(), "metal");
111
- #endif
106
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
107
+ ggml_backend_dev_t device = ggml_backend_dev_get(i);
108
+ const auto deviceName = std::string(ggml_backend_dev_name(device));
109
+
110
+ if (deviceName == "CPU") {
111
+ return Napi::Boolean::New(info.Env(), false);
112
+ }
113
+ }
112
114
 
113
115
  return info.Env().Undefined();
114
- }
116
+ }
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b3995"
2
+ "release": "b4291"
3
3
  }
Binary file
@@ -80,11 +80,3 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM
80
80
  bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
81
81
  return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback);
82
82
  }
83
-
84
- bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
85
- size_t vulkanDeviceTotal = 0;
86
- size_t vulkanDeviceUsed = 0;
87
- size_t unifiedMemorySize = 0;
88
-
89
- return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, &unifiedMemorySize, true, deviceNames, warningLogCallback);
90
- }
@@ -6,4 +6,3 @@
6
6
  typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
7
7
 
8
8
  bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
9
- bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
@@ -46,7 +46,7 @@ Terminals support the full range of Unicode. Unicode characters can be specified
46
46
 
47
47
  Character ranges can be negated with `^`:
48
48
  ```
49
- single-line ::= [^\n]+ "\n"`
49
+ single-line ::= [^\n]+ "\n"
50
50
  ```
51
51
 
52
52
  ## Sequences and Alternatives
@@ -124,7 +124,7 @@ You can use GBNF grammars:
124
124
  - In [llama-cli](../examples/main), passed as the `--json` / `-j` flag
125
125
  - To convert to a grammar ahead of time:
126
126
  - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
127
- - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
127
+ - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
128
128
 
129
129
  Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggerganov/llama.cpp/pull/5978, https://github.com/ggerganov/llama.cpp/pull/6659 & https://github.com/ggerganov/llama.cpp/pull/6555).
130
130
 
@@ -0,0 +1,6 @@
1
+ # note: this might be incomplete, mostly an example
2
+ root ::= en-char+ ([ \t\n] en-char+)*
3
+ en-char ::= letter | digit | punctuation
4
+ letter ::= [a-zA-Z]
5
+ digit ::= [0-9]
6
+ punctuation ::= [!"#$%&'()*+,-./:;<=>?@[\\\]^_`{|}~]
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b3995",
2
+ "tag": "b4291",
3
3
  "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
4
  }