@fugood/llama.node 0.3.8 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +52 -8
  19. package/lib/index.ts +3 -1
  20. package/package.json +8 -1
  21. package/src/LlamaCompletionWorker.cpp +33 -6
  22. package/src/LlamaCompletionWorker.h +3 -1
  23. package/src/LlamaContext.cpp +387 -28
  24. package/src/LlamaContext.h +5 -0
  25. package/src/common.hpp +19 -2
  26. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  27. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  28. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  29. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  30. package/src/llama.cpp/CMakeLists.txt +10 -19
  31. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  32. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  33. package/src/llama.cpp/common/arg.cpp +66 -16
  34. package/src/llama.cpp/common/chat-template.hpp +515 -0
  35. package/src/llama.cpp/common/chat.cpp +966 -0
  36. package/src/llama.cpp/common/chat.hpp +52 -0
  37. package/src/llama.cpp/common/common.cpp +159 -36
  38. package/src/llama.cpp/common/common.h +56 -14
  39. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  40. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  41. package/src/llama.cpp/common/llguidance.cpp +270 -0
  42. package/src/llama.cpp/common/log.cpp +1 -10
  43. package/src/llama.cpp/common/log.h +10 -0
  44. package/src/llama.cpp/common/minja.hpp +2868 -0
  45. package/src/llama.cpp/common/sampling.cpp +22 -1
  46. package/src/llama.cpp/common/sampling.h +3 -0
  47. package/src/llama.cpp/docs/build.md +54 -9
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  49. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  50. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  51. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  52. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  53. package/src/llama.cpp/examples/llava/clip.h +2 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  55. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  56. package/src/llama.cpp/examples/main/main.cpp +26 -25
  57. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  59. package/src/llama.cpp/examples/run/run.cpp +224 -69
  60. package/src/llama.cpp/examples/server/server.cpp +252 -81
  61. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  62. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  63. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  65. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  68. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  71. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  73. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  74. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  76. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  79. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  81. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  82. package/src/llama.cpp/include/llama.h +14 -1
  83. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  85. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  87. package/src/llama.cpp/src/llama-arch.h +3 -1
  88. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  89. package/src/llama.cpp/src/llama-chat.h +1 -0
  90. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  91. package/src/llama.cpp/src/llama-grammar.h +22 -1
  92. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  93. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  94. package/src/llama.cpp/src/llama-model.cpp +76 -6
  95. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  96. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  97. package/src/llama.cpp/src/llama.cpp +181 -123
  98. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  99. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  100. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  101. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  102. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  103. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  104. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  105. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.js CHANGED
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
52
52
  try {
53
53
  if (variant && variant !== 'default') {
54
54
  setupEnv(variant);
55
- return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
55
+ return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
56
56
  }
57
57
  }
58
58
  catch (_a) { } // ignore errors and try the common path
59
59
  setupEnv();
60
- return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
60
+ return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
61
61
  });
62
62
  exports.loadModule = loadModule;
package/lib/binding.ts CHANGED
@@ -2,11 +2,12 @@ import * as path from 'path'
2
2
 
3
3
  export type ChatMessage = {
4
4
  role: string
5
- text: string
5
+ content: string
6
6
  }
7
7
 
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
+ chat_template?: string
10
11
  embedding?: boolean
11
12
  embd_normalize?: number
12
13
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
@@ -37,12 +38,29 @@ export type LlamaModelOptions = {
37
38
  use_mlock?: boolean
38
39
  use_mmap?: boolean
39
40
  vocab_only?: boolean
41
+ lora?: string
42
+ lora_scaled?: number
43
+ lora_list?: { path: string; scaled: number }[]
44
+ }
45
+
46
+ export type CompletionResponseFormat = {
47
+ type: 'text' | 'json_object' | 'json_schema'
48
+ json_schema?: {
49
+ strict?: boolean
50
+ schema: object
51
+ }
52
+ schema?: object // for json_object type
40
53
  }
41
54
 
42
55
  export type LlamaCompletionOptions = {
43
56
  messages?: ChatMessage[]
57
+ jinja?: boolean
58
+ chat_template?: string
59
+ response_format?: CompletionResponseFormat
60
+ tools?: object
61
+ parallel_tool_calls?: boolean
62
+ tool_choice?: string
44
63
  prompt?: string
45
- n_samples?: number
46
64
  temperature?: number
47
65
  top_k?: number
48
66
  top_p?: number
@@ -67,6 +85,9 @@ export type LlamaCompletionOptions = {
67
85
  seed?: number
68
86
  stop?: string[]
69
87
  grammar?: string
88
+ grammar_lazy?: boolean
89
+ grammar_triggers?: { word: string; at_start: boolean }[]
90
+ preserved_tokens?: string[]
70
91
  }
71
92
 
72
93
  export type LlamaCompletionResult = {
@@ -102,8 +123,21 @@ export interface LlamaContext {
102
123
  new (options: LlamaModelOptions): LlamaContext
103
124
  getSystemInfo(): string
104
125
  getModelInfo(): object
105
- getFormattedChat(messages: ChatMessage[]): string
106
- completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
126
+ getFormattedChat(
127
+ messages: ChatMessage[],
128
+ chat_template?: string,
129
+ params?: {
130
+ jinja?: boolean
131
+ response_format?: CompletionResponseFormat
132
+ tools?: object
133
+ parallel_tool_calls?: object
134
+ tool_choice?: string
135
+ },
136
+ ): object | string
137
+ completion(
138
+ options: LlamaCompletionOptions,
139
+ callback?: (token: LlamaCompletionToken) => void,
140
+ ): Promise<LlamaCompletionResult>
107
141
  stopCompletion(): void
108
142
  tokenize(text: string): Promise<TokenizeResult>
109
143
  detokenize(tokens: number[]): Promise<string>
@@ -111,6 +145,9 @@ export interface LlamaContext {
111
145
  saveSession(path: string): Promise<void>
112
146
  loadSession(path: string): Promise<void>
113
147
  release(): Promise<void>
148
+ applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
149
+ removeLoraAdapters(adapters: { path: string }[]): void
150
+ getLoadedLoraAdapters(): { path: string; scaled: number }[]
114
151
  // static
115
152
  loadModelInfo(path: string, skip: string[]): Promise<Object>
116
153
  }
@@ -123,7 +160,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
123
160
 
124
161
  const setupEnv = (variant?: string) => {
125
162
  const postfix = variant ? `-${variant}` : ''
126
- const binPath = path.resolve(__dirname, `../bin/${process.platform}${postfix}/${process.arch}/`)
163
+ const binPath = path.resolve(
164
+ __dirname,
165
+ `../bin/${process.platform}${postfix}/${process.arch}/`,
166
+ )
127
167
  const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
128
168
  if (!systemPathEnv.includes(binPath)) {
129
169
  if (process.platform === 'win32') {
@@ -138,9 +178,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
138
178
  try {
139
179
  if (variant && variant !== 'default') {
140
180
  setupEnv(variant)
141
- return await import(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
181
+ return (await import(
182
+ `../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
183
+ )) as Module
142
184
  }
143
185
  } catch {} // ignore errors and try the common path
144
186
  setupEnv()
145
- return await import(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
146
- }
187
+ return (await import(
188
+ `../bin/${process.platform}/${process.arch}/llama-node.node`
189
+ )) as Module
190
+ }
package/lib/index.ts CHANGED
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
9
9
 
10
10
  const mods: { [key: string]: Module } = {}
11
11
 
12
- export const loadModel = async (options: LlamaModelOptionsExtended): Promise<LlamaContext> => {
12
+ export const loadModel = async (
13
+ options: LlamaModelOptionsExtended,
14
+ ): Promise<LlamaContext> => {
13
15
  const variant = options.lib_variant ?? 'default'
14
16
  mods[variant] ??= await loadModule(options.lib_variant)
15
17
  return new mods[variant].LlamaContext(options)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.8",
4
+ "version": "0.3.10",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -77,5 +77,12 @@
77
77
  "testMatch": [
78
78
  "**/*.test.ts"
79
79
  ]
80
+ },
81
+ "prettier": {
82
+ "trailingComma": "all",
83
+ "tabWidth": 2,
84
+ "semi": false,
85
+ "singleQuote": true,
86
+ "printWidth": 80
80
87
  }
81
88
  }
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
35
35
  LlamaCompletionWorker::LlamaCompletionWorker(
36
36
  const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
37
37
  Napi::Function callback, common_params params,
38
- std::vector<std::string> stop_words)
38
+ std::vector<std::string> stop_words,
39
+ int32_t chat_format)
39
40
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
40
- _params(params), _stop_words(stop_words) {
41
+ _params(params), _stop_words(stop_words), _chat_format(chat_format) {
41
42
  if (!callback.IsEmpty()) {
42
43
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
43
44
  "LlamaCompletionCallback", 0, 1);
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
152
153
  }
153
154
 
154
155
  void LlamaCompletionWorker::OnOK() {
155
- auto result = Napi::Object::New(Napi::AsyncWorker::Env());
156
- result.Set("tokens_evaluated", Napi::Number::New(Napi::AsyncWorker::Env(),
156
+ auto env = Napi::AsyncWorker::Env();
157
+ auto result = Napi::Object::New(env);
158
+ result.Set("tokens_evaluated", Napi::Number::New(env,
157
159
  _result.tokens_evaluated));
158
160
  result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
159
161
  _result.tokens_predicted));
160
162
  result.Set("truncated",
161
- Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
163
+ Napi::Boolean::New(env, _result.truncated));
162
164
  result.Set("text",
163
- Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
165
+ Napi::String::New(env, _result.text.c_str()));
166
+
167
+ Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
168
+ if (!_stop) {
169
+ try {
170
+ common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
171
+ for (size_t i = 0; i < message.tool_calls.size(); i++) {
172
+ const auto &tc = message.tool_calls[i];
173
+ Napi::Object tool_call = Napi::Object::New(env);
174
+ tool_call.Set("type", "function");
175
+ Napi::Object function = Napi::Object::New(env);
176
+ function.Set("name", tc.name);
177
+ function.Set("arguments", tc.arguments);
178
+ tool_call.Set("function", function);
179
+ if (!tc.id.empty()) {
180
+ tool_call.Set("id", tc.id);
181
+ }
182
+ tool_calls.Set(i, tool_call);
183
+ }
184
+ } catch (const std::exception &e) {
185
+ // console_log(env, "Error parsing tool calls: " + std::string(e.what()));
186
+ }
187
+ }
188
+ if (tool_calls.Length() > 0) {
189
+ result.Set("tool_calls", tool_calls);
190
+ }
164
191
 
165
192
  auto ctx = _sess->context();
166
193
  const auto timings_token = llama_perf_context(ctx);
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
13
13
  public:
14
14
  LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
15
15
  Napi::Function callback, common_params params,
16
- std::vector<std::string> stop_words = {});
16
+ std::vector<std::string> stop_words = {},
17
+ int32_t chat_format = 0);
17
18
 
18
19
  ~LlamaCompletionWorker();
19
20
 
@@ -30,6 +31,7 @@ private:
30
31
  LlamaSessionPtr _sess;
31
32
  common_params _params;
32
33
  std::vector<std::string> _stop_words;
34
+ int32_t _chat_format;
33
35
  Napi::ThreadSafeFunction _tsfn;
34
36
  bool _has_callback = false;
35
37
  bool _stop = false;