@fugood/llama.node 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +47 -8
  19. package/lib/index.js +21 -1
  20. package/lib/index.ts +31 -1
  21. package/package.json +12 -3
  22. package/src/LlamaCompletionWorker.cpp +33 -6
  23. package/src/LlamaCompletionWorker.h +3 -1
  24. package/src/LlamaContext.cpp +336 -28
  25. package/src/LlamaContext.h +2 -0
  26. package/src/common.hpp +19 -2
  27. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  29. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  31. package/src/llama.cpp/CMakeLists.txt +10 -19
  32. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  33. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  34. package/src/llama.cpp/common/arg.cpp +66 -16
  35. package/src/llama.cpp/common/chat-template.hpp +515 -0
  36. package/src/llama.cpp/common/chat.cpp +966 -0
  37. package/src/llama.cpp/common/chat.hpp +52 -0
  38. package/src/llama.cpp/common/common.cpp +159 -36
  39. package/src/llama.cpp/common/common.h +56 -14
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  41. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  42. package/src/llama.cpp/common/llguidance.cpp +270 -0
  43. package/src/llama.cpp/common/log.cpp +1 -10
  44. package/src/llama.cpp/common/log.h +10 -0
  45. package/src/llama.cpp/common/minja.hpp +2868 -0
  46. package/src/llama.cpp/common/sampling.cpp +22 -1
  47. package/src/llama.cpp/common/sampling.h +3 -0
  48. package/src/llama.cpp/docs/build.md +54 -9
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  50. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  51. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  52. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  54. package/src/llama.cpp/examples/llava/clip.h +2 -0
  55. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  56. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  57. package/src/llama.cpp/examples/main/main.cpp +26 -25
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  59. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  60. package/src/llama.cpp/examples/run/run.cpp +224 -69
  61. package/src/llama.cpp/examples/server/server.cpp +252 -81
  62. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  63. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  64. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  65. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  66. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  67. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  68. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  71. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  73. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  74. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  75. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  77. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  81. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  82. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  83. package/src/llama.cpp/include/llama.h +14 -1
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  85. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  86. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  87. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  88. package/src/llama.cpp/src/llama-arch.h +3 -1
  89. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  90. package/src/llama.cpp/src/llama-chat.h +1 -0
  91. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  92. package/src/llama.cpp/src/llama-grammar.h +22 -1
  93. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  94. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  95. package/src/llama.cpp/src/llama-model.cpp +76 -6
  96. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  97. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  98. package/src/llama.cpp/src/llama.cpp +181 -123
  99. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  100. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  101. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  102. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  103. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  104. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  105. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  106. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.js CHANGED
@@ -52,11 +52,11 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
52
52
  try {
53
53
  if (variant && variant !== 'default') {
54
54
  setupEnv(variant);
55
- return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
55
+ return (yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
56
56
  }
57
57
  }
58
58
  catch (_a) { } // ignore errors and try the common path
59
59
  setupEnv();
60
- return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
60
+ return (yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s))));
61
61
  });
62
62
  exports.loadModule = loadModule;
package/lib/binding.ts CHANGED
@@ -2,11 +2,12 @@ import * as path from 'path'
2
2
 
3
3
  export type ChatMessage = {
4
4
  role: string
5
- text: string
5
+ content: string
6
6
  }
7
7
 
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
+ chat_template?: string
10
11
  embedding?: boolean
11
12
  embd_normalize?: number
12
13
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
@@ -42,10 +43,24 @@ export type LlamaModelOptions = {
42
43
  lora_list?: { path: string; scaled: number }[]
43
44
  }
44
45
 
46
+ export type CompletionResponseFormat = {
47
+ type: 'text' | 'json_object' | 'json_schema'
48
+ json_schema?: {
49
+ strict?: boolean
50
+ schema: object
51
+ }
52
+ schema?: object // for json_object type
53
+ }
54
+
45
55
  export type LlamaCompletionOptions = {
46
56
  messages?: ChatMessage[]
57
+ jinja?: boolean
58
+ chat_template?: string
59
+ response_format?: CompletionResponseFormat
60
+ tools?: object
61
+ parallel_tool_calls?: boolean
62
+ tool_choice?: string
47
63
  prompt?: string
48
- n_samples?: number
49
64
  temperature?: number
50
65
  top_k?: number
51
66
  top_p?: number
@@ -70,6 +85,9 @@ export type LlamaCompletionOptions = {
70
85
  seed?: number
71
86
  stop?: string[]
72
87
  grammar?: string
88
+ grammar_lazy?: boolean
89
+ grammar_triggers?: { word: string; at_start: boolean }[]
90
+ preserved_tokens?: string[]
73
91
  }
74
92
 
75
93
  export type LlamaCompletionResult = {
@@ -105,8 +123,21 @@ export interface LlamaContext {
105
123
  new (options: LlamaModelOptions): LlamaContext
106
124
  getSystemInfo(): string
107
125
  getModelInfo(): object
108
- getFormattedChat(messages: ChatMessage[]): string
109
- completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
126
+ getFormattedChat(
127
+ messages: ChatMessage[],
128
+ chat_template?: string,
129
+ params?: {
130
+ jinja?: boolean
131
+ response_format?: CompletionResponseFormat
132
+ tools?: object
133
+ parallel_tool_calls?: object
134
+ tool_choice?: string
135
+ },
136
+ ): object | string
137
+ completion(
138
+ options: LlamaCompletionOptions,
139
+ callback?: (token: LlamaCompletionToken) => void,
140
+ ): Promise<LlamaCompletionResult>
110
141
  stopCompletion(): void
111
142
  tokenize(text: string): Promise<TokenizeResult>
112
143
  detokenize(tokens: number[]): Promise<string>
@@ -119,6 +150,7 @@ export interface LlamaContext {
119
150
  getLoadedLoraAdapters(): { path: string; scaled: number }[]
120
151
  // static
121
152
  loadModelInfo(path: string, skip: string[]): Promise<Object>
153
+ toggleNativeLog(enable: boolean, callback: (level: string, text: string) => void): void
122
154
  }
123
155
 
124
156
  export interface Module {
@@ -129,7 +161,10 @@ export type LibVariant = 'default' | 'vulkan' | 'cuda'
129
161
 
130
162
  const setupEnv = (variant?: string) => {
131
163
  const postfix = variant ? `-${variant}` : ''
132
- const binPath = path.resolve(__dirname, `../bin/${process.platform}${postfix}/${process.arch}/`)
164
+ const binPath = path.resolve(
165
+ __dirname,
166
+ `../bin/${process.platform}${postfix}/${process.arch}/`,
167
+ )
133
168
  const systemPathEnv = process.env.PATH ?? process.env.Path ?? ''
134
169
  if (!systemPathEnv.includes(binPath)) {
135
170
  if (process.platform === 'win32') {
@@ -144,9 +179,13 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
144
179
  try {
145
180
  if (variant && variant !== 'default') {
146
181
  setupEnv(variant)
147
- return await import(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
182
+ return (await import(
183
+ `../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`
184
+ )) as Module
148
185
  }
149
186
  } catch {} // ignore errors and try the common path
150
187
  setupEnv()
151
- return await import(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
152
- }
188
+ return (await import(
189
+ `../bin/${process.platform}/${process.arch}/llama-node.node`
190
+ )) as Module
191
+ }
package/lib/index.js CHANGED
@@ -23,7 +23,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
23
23
  });
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
26
+ exports.toggleNativeLog = exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
27
+ exports.addNativeLogListener = addNativeLogListener;
27
28
  const binding_1 = require("./binding");
28
29
  __exportStar(require("./binding"), exports);
29
30
  const mods = {};
@@ -49,3 +50,22 @@ const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function*
49
50
  return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
50
51
  });
51
52
  exports.loadLlamaModelInfo = loadLlamaModelInfo;
53
+ const logListeners = [];
54
+ const logCallback = (level, text) => {
55
+ logListeners.forEach((listener) => listener(level, text));
56
+ };
57
+ const toggleNativeLog = (enable, options) => __awaiter(void 0, void 0, void 0, function* () {
58
+ var _a, _b;
59
+ const v = (_a = options === null || options === void 0 ? void 0 : options.variant) !== null && _a !== void 0 ? _a : 'default';
60
+ (_b = mods[v]) !== null && _b !== void 0 ? _b : (mods[v] = yield (0, binding_1.loadModule)(v));
61
+ return mods[v].LlamaContext.toggleNativeLog(enable, logCallback);
62
+ });
63
+ exports.toggleNativeLog = toggleNativeLog;
64
+ function addNativeLogListener(listener) {
65
+ logListeners.push(listener);
66
+ return {
67
+ remove: () => {
68
+ logListeners.splice(logListeners.indexOf(listener), 1);
69
+ },
70
+ };
71
+ }
package/lib/index.ts CHANGED
@@ -9,7 +9,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
9
9
 
10
10
  const mods: { [key: string]: Module } = {}
11
11
 
12
- export const loadModel = async (options: LlamaModelOptionsExtended): Promise<LlamaContext> => {
12
+ export const loadModel = async (
13
+ options: LlamaModelOptionsExtended,
14
+ ): Promise<LlamaContext> => {
13
15
  const variant = options.lib_variant ?? 'default'
14
16
  mods[variant] ??= await loadModule(options.lib_variant)
15
17
  return new mods[variant].LlamaContext(options)
@@ -30,3 +32,31 @@ export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
30
32
  mods[variant] ??= await loadModule(variant)
31
33
  return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
32
34
  }
35
+
36
+ const logListeners: Array<(level: string, text: string) => void> = []
37
+
38
+ const logCallback = (level: string, text: string) => {
39
+ logListeners.forEach((listener) => listener(level, text))
40
+ }
41
+
42
+ export const toggleNativeLog = async (
43
+ enable: boolean,
44
+ options?: {
45
+ variant?: LibVariant
46
+ },
47
+ ) => {
48
+ const v = options?.variant ?? 'default'
49
+ mods[v] ??= await loadModule(v)
50
+ return mods[v].LlamaContext.toggleNativeLog(enable, logCallback)
51
+ }
52
+
53
+ export function addNativeLogListener(
54
+ listener: (level: string, text: string) => void,
55
+ ): { remove: () => void } {
56
+ logListeners.push(listener)
57
+ return {
58
+ remove: () => {
59
+ logListeners.splice(logListeners.indexOf(listener), 1)
60
+ },
61
+ }
62
+ }
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.9",
5
- "description": "Llama.cpp for Node.js",
4
+ "version": "0.3.11",
5
+ "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
8
8
  "test": "jest",
@@ -22,7 +22,9 @@
22
22
  "llama",
23
23
  "llm",
24
24
  "ai",
25
- "genai"
25
+ "genai",
26
+ "Local LLM",
27
+ "llama.cpp"
26
28
  ],
27
29
  "author": "Hans <hans.chen@bricks.tools>",
28
30
  "license": "MIT",
@@ -77,5 +79,12 @@
77
79
  "testMatch": [
78
80
  "**/*.test.ts"
79
81
  ]
82
+ },
83
+ "prettier": {
84
+ "trailingComma": "all",
85
+ "tabWidth": 2,
86
+ "semi": false,
87
+ "singleQuote": true,
88
+ "printWidth": 80
80
89
  }
81
90
  }
@@ -35,9 +35,10 @@ size_t findStoppingStrings(const std::string &text,
35
35
  LlamaCompletionWorker::LlamaCompletionWorker(
36
36
  const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
37
37
  Napi::Function callback, common_params params,
38
- std::vector<std::string> stop_words)
38
+ std::vector<std::string> stop_words,
39
+ int32_t chat_format)
39
40
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
40
- _params(params), _stop_words(stop_words) {
41
+ _params(params), _stop_words(stop_words), _chat_format(chat_format) {
41
42
  if (!callback.IsEmpty()) {
42
43
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
43
44
  "LlamaCompletionCallback", 0, 1);
@@ -152,15 +153,41 @@ void LlamaCompletionWorker::Execute() {
152
153
  }
153
154
 
154
155
  void LlamaCompletionWorker::OnOK() {
155
- auto result = Napi::Object::New(Napi::AsyncWorker::Env());
156
- result.Set("tokens_evaluated", Napi::Number::New(Napi::AsyncWorker::Env(),
156
+ auto env = Napi::AsyncWorker::Env();
157
+ auto result = Napi::Object::New(env);
158
+ result.Set("tokens_evaluated", Napi::Number::New(env,
157
159
  _result.tokens_evaluated));
158
160
  result.Set("tokens_predicted", Napi::Number::New(Napi::AsyncWorker::Env(),
159
161
  _result.tokens_predicted));
160
162
  result.Set("truncated",
161
- Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
163
+ Napi::Boolean::New(env, _result.truncated));
162
164
  result.Set("text",
163
- Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
165
+ Napi::String::New(env, _result.text.c_str()));
166
+
167
+ Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
168
+ if (!_stop) {
169
+ try {
170
+ common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
171
+ for (size_t i = 0; i < message.tool_calls.size(); i++) {
172
+ const auto &tc = message.tool_calls[i];
173
+ Napi::Object tool_call = Napi::Object::New(env);
174
+ tool_call.Set("type", "function");
175
+ Napi::Object function = Napi::Object::New(env);
176
+ function.Set("name", tc.name);
177
+ function.Set("arguments", tc.arguments);
178
+ tool_call.Set("function", function);
179
+ if (!tc.id.empty()) {
180
+ tool_call.Set("id", tc.id);
181
+ }
182
+ tool_calls.Set(i, tool_call);
183
+ }
184
+ } catch (const std::exception &e) {
185
+ // console_log(env, "Error parsing tool calls: " + std::string(e.what()));
186
+ }
187
+ }
188
+ if (tool_calls.Length() > 0) {
189
+ result.Set("tool_calls", tool_calls);
190
+ }
164
191
 
165
192
  auto ctx = _sess->context();
166
193
  const auto timings_token = llama_perf_context(ctx);
@@ -13,7 +13,8 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
13
13
  public:
14
14
  LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
15
15
  Napi::Function callback, common_params params,
16
- std::vector<std::string> stop_words = {});
16
+ std::vector<std::string> stop_words = {},
17
+ int32_t chat_format = 0);
17
18
 
18
19
  ~LlamaCompletionWorker();
19
20
 
@@ -30,6 +31,7 @@ private:
30
31
  LlamaSessionPtr _sess;
31
32
  common_params _params;
32
33
  std::vector<std::string> _stop_words;
34
+ int32_t _chat_format;
33
35
  Napi::ThreadSafeFunction _tsfn;
34
36
  bool _has_callback = false;
35
37
  bool _stop = false;