@fugood/llama.node 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +46 -8
  19. package/lib/index.ts +3 -1
  20. package/package.json +8 -1
  21. package/src/LlamaCompletionWorker.cpp +33 -6
  22. package/src/LlamaCompletionWorker.h +3 -1
  23. package/src/LlamaContext.cpp +292 -28
  24. package/src/LlamaContext.h +1 -0
  25. package/src/common.hpp +19 -2
  26. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  27. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  28. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  29. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  30. package/src/llama.cpp/CMakeLists.txt +10 -19
  31. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  32. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  33. package/src/llama.cpp/common/arg.cpp +66 -16
  34. package/src/llama.cpp/common/chat-template.hpp +515 -0
  35. package/src/llama.cpp/common/chat.cpp +966 -0
  36. package/src/llama.cpp/common/chat.hpp +52 -0
  37. package/src/llama.cpp/common/common.cpp +159 -36
  38. package/src/llama.cpp/common/common.h +56 -14
  39. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  40. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  41. package/src/llama.cpp/common/llguidance.cpp +270 -0
  42. package/src/llama.cpp/common/log.cpp +1 -10
  43. package/src/llama.cpp/common/log.h +10 -0
  44. package/src/llama.cpp/common/minja.hpp +2868 -0
  45. package/src/llama.cpp/common/sampling.cpp +22 -1
  46. package/src/llama.cpp/common/sampling.h +3 -0
  47. package/src/llama.cpp/docs/build.md +54 -9
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  49. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  50. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  51. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  52. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  53. package/src/llama.cpp/examples/llava/clip.h +2 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  55. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  56. package/src/llama.cpp/examples/main/main.cpp +26 -25
  57. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  59. package/src/llama.cpp/examples/run/run.cpp +224 -69
  60. package/src/llama.cpp/examples/server/server.cpp +252 -81
  61. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  62. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  63. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  65. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  68. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  71. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  73. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  74. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  76. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  79. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  81. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  82. package/src/llama.cpp/include/llama.h +14 -1
  83. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  85. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  87. package/src/llama.cpp/src/llama-arch.h +3 -1
  88. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  89. package/src/llama.cpp/src/llama-chat.h +1 -0
  90. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  91. package/src/llama.cpp/src/llama-grammar.h +22 -1
  92. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  93. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  94. package/src/llama.cpp/src/llama-model.cpp +76 -6
  95. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  96. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  97. package/src/llama.cpp/src/llama.cpp +181 -123
  98. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  99. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  100. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  101. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  102. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  103. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  104. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  105. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.js CHANGED
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
52
52
  try {
53
53
  if (variant && variant !== 'default') {
54
54
  setupEnv(variant);
55
- return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
55
+ return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
56
56
  }
57
57
  }
58
58
  catch (_a) { } // ignore errors and try the common path
59
59
  setupEnv();
60
- return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
60
+ return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
61
61
  });
62
62
  exports.loadModule = loadModule;
package/lib/binding.ts CHANGED
@@ -2,11 +2,12 @@ import * as path from 'path'
2
2
 
3
3
  export type ChatMessage = {
4
4
  role: string
5
- text: string
5
+ content: string
6
6
  }
7
7
 
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
+ chat_template?: string
10
11
  embedding?: boolean
11
12
  embd_normalize?: number
12
13
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
@@ -42,10 +43,24 @@ export type LlamaModelOptions = {
42
43
  lora_list?: { path: string; scaled: number }[]
43
44
  }
44
45
 
46
+ export type CompletionResponseFormat = {
47
+ type: 'text' | 'json_object' | 'json_schema'
48
+ json_schema?: {
49
+ strict?: boolean
50
+ schema: object
51
+ }
52
+ schema?: object // for json_object type
53
+ }
54
+
45
55
  export type LlamaCompletionOptions = {
46
56
  messages?: ChatMessage[]
57
+ jinja?: boolean
58
+ chat_template?: string
59
+ response_format?: CompletionResponseFormat
60
+ tools?: object
61
+ parallel_tool_calls?: boolean
62
+ tool_choice?: string
47
63
  prompt?: string
48
- n_samples?: number
49
64
  temperature?: number
50
65
  top_k?: number
51
66
  top_p?: number
@@ -70,6 +85,9 @@ export type LlamaCompletionOptions = {
70
85
  seed?: number
71
86
  stop?: string[]
72
87
  grammar?: string
88
+ grammar_lazy?: boolean
89
+ grammar_triggers?: { word: string; at_start: boolean }[]
90
+ preserved_tokens?: string[]
73
91
  }
74
92
 
75
93
  export type LlamaCompletionResult = {
@@ -105,8 +123,21 @@ export interface LlamaContext {
105
123
  new (options: LlamaModelOptions): LlamaContext
106
124
  getSystemInfo(): string
107
125
  getModelInfo(): object
108
- getFormattedChat(messages: ChatMessage[]): string
109
- completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
126
+ getFormattedChat(
127
+ messages: ChatMessage[],
128
+ chat_template?: string,
129
+ params?: {
130
+ jinja?: boolean
131
+ response_format?: CompletionResponseFormat
132
+ tools?: object
133
+ parallel_tool_calls?: object
134
+ tool_choice?: string
135
+ },
136
+ ): object | string
137
+ completion(
138
+ options: LlamaCompletionOptions,
139
+ callback?: (token: LlamaCompletionToken) => void,
140
+ ): Promise<LlamaCompletionResult>
110
141
  stopCompletion(): void
111
142
  tokenize(text: string): Promise<TokenizeResult>
112
143
  detokenize(tokens: number[]): Promise<string>
@@ -129,7 +160,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
129
160
 
130
161
  const setupEnv = (variant?: string) => {
131
162
  const postfix = variant ? `-${variant}` : ''
132
- const binPath = path.resolve(__dirname, `../bin/${process.platform}${postfix}/${process.arch}/`)
163
+ const binPath = path.resolve(
164
+ __dirname,
165
+ `../bin/${process.platform}${postfix}/${process.arch}/`,
166
+ )
133
167
  const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
134
168
  if (!systemPathEnv.includes(binPath)) {
135
169
  if (process.platform === 'win32') {
@@ -144,9 +178,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
144
178
  try {
145
179
  if (variant && variant !== 'default') {
146
180
  setupEnv(variant)
147
- return await import(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
181
+ return (await import(
182
+ `../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
183
+ )) as Module
148
184
  }
149
185
  } catch {} // ignore errors and try the common path
150
186
  setupEnv()
151
- return await import(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
152
- }
187
+ return (await import(
188
+ `../bin/${process.platform}/${process.arch}/llama-node.node`
189
+ )) as Module
190
+ }
package/lib/index.ts CHANGED
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
9
9
 
10
10
  const mods: { [key: string]: Module } = {}
11
11
 
12
- export const loadModel = async (options: LlamaModelOptionsExtended): Promise<LlamaContext> => {
12
+ export const loadModel = async (
13
+ options: LlamaModelOptionsExtended,
14
+ ): Promise<LlamaContext> => {
13
15
  const variant = options.lib_variant ?? 'default'
14
16
  mods[variant] ??= await loadModule(options.lib_variant)
15
17
  return new mods[variant].LlamaContext(options)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.9",
4
+ "version": "0.3.10",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -77,5 +77,12 @@
77
77
  "testMatch": [
78
78
  "**/*.test.ts"
79
79
  ]
80
+ },
81
+ "prettier": {
82
+ "trailingComma": "all",
83
+ "tabWidth": 2,
84
+ "semi": false,
85
+ "singleQuote": true,
86
+ "printWidth": 80
80
87
  }
81
88
  }
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
35
35
  LlamaCompletionWorker::LlamaCompletionWorker(
36
36
  const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
37
37
  Napi::Function callback, common_params params,
38
- std::vector<std::string> stop_words)
38
+ std::vector<std::string> stop_words,
39
+ int32_t chat_format)
39
40
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
40
- _params(params), _stop_words(stop_words) {
41
+ _params(params), _stop_words(stop_words), _chat_format(chat_format) {
41
42
  if (!callback.IsEmpty()) {
42
43
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
43
44
  "LlamaCompletionCallback", 0, 1);
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
152
153
  }
153
154
 
154
155
  void LlamaCompletionWorker::OnOK() {
155
- auto result = Napi::Object::New(Napi::AsyncWorker::Env());
156
- result.Set("tokens_evaluated", Napi::Number::New(Napi::AsyncWorker::Env(),
156
+ auto env = Napi::AsyncWorker::Env();
157
+ auto result = Napi::Object::New(env);
158
+ result.Set("tokens_evaluated", Napi::Number::New(env,
157
159
  _result.tokens_evaluated));
158
160
  result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
159
161
  _result.tokens_predicted));
160
162
  result.Set("truncated",
161
- Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
163
+ Napi::Boolean::New(env, _result.truncated));
162
164
  result.Set("text",
163
- Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
165
+ Napi::String::New(env, _result.text.c_str()));
166
+
167
+ Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
168
+ if (!_stop) {
169
+ try {
170
+ common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
171
+ for (size_t i = 0; i < message.tool_calls.size(); i++) {
172
+ const auto &tc = message.tool_calls[i];
173
+ Napi::Object tool_call = Napi::Object::New(env);
174
+ tool_call.Set("type", "function");
175
+ Napi::Object function = Napi::Object::New(env);
176
+ function.Set("name", tc.name);
177
+ function.Set("arguments", tc.arguments);
178
+ tool_call.Set("function", function);
179
+ if (!tc.id.empty()) {
180
+ tool_call.Set("id", tc.id);
181
+ }
182
+ tool_calls.Set(i, tool_call);
183
+ }
184
+ } catch (const std::exception &e) {
185
+ // console_log(env, "Error parsing tool calls: " + std::string(e.what()));
186
+ }
187
+ }
188
+ if (tool_calls.Length() > 0) {
189
+ result.Set("tool_calls", tool_calls);
190
+ }
164
191
 
165
192
  auto ctx = _sess->context();
166
193
  const auto timings_token = llama_perf_context(ctx);
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
13
13
  public:
14
14
  LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
15
15
  Napi::Function callback, common_params params,
16
- std::vector<std::string> stop_words = {});
16
+ std::vector<std::string> stop_words = {},
17
+ int32_t chat_format = 0);
17
18
 
18
19
  ~LlamaCompletionWorker();
19
20
 
@@ -30,6 +31,7 @@ private:
30
31
  LlamaSessionPtr _sess;
31
32
  common_params _params;
32
33
  std::vector<std::string> _stop_words;
34
+ int32_t _chat_format;
33
35
  Napi::ThreadSafeFunction _tsfn;
34
36
  bool _has_callback = false;
35
37
  bool _stop = false;