@fugood/llama.node 1.3.8 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/lib/binding.js +25 -18
  2. package/lib/binding.ts +19 -1
  3. package/lib/index.js +3 -3
  4. package/lib/index.ts +1 -1
  5. package/package.json +17 -17
  6. package/scripts/llama.cpp.patch +53 -4
  7. package/src/LlamaCompletionWorker.cpp +2 -2
  8. package/src/LlamaContext.cpp +6 -1
  9. package/src/llama.cpp/common/arg.cpp +1 -1
  10. package/src/llama.cpp/common/chat-parser.cpp +968 -0
  11. package/src/llama.cpp/common/chat.cpp +0 -952
  12. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
  13. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  14. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
  15. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
  16. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
  17. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
  18. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
  19. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
  20. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
  21. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
  22. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  23. package/src/llama.cpp/src/llama-arch.cpp +48 -3
  24. package/src/llama.cpp/src/llama-arch.h +2 -0
  25. package/src/llama.cpp/src/llama-context.cpp +6 -2
  26. package/src/llama.cpp/src/llama-hparams.h +1 -1
  27. package/src/llama.cpp/src/llama-model.cpp +102 -5
  28. package/src/llama.cpp/src/llama-model.h +4 -0
  29. package/src/llama.cpp/src/llama-quant.cpp +13 -5
  30. package/src/llama.cpp/src/models/lfm2.cpp +5 -3
  31. package/src/llama.cpp/src/models/models.h +51 -1
  32. package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
package/lib/binding.js CHANGED
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
15
15
  }) : function(o, v) {
16
16
  o["default"] = v;
17
17
  });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
35
25
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
26
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
27
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -41,8 +31,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
41
31
  step((generator = generator.apply(thisArg, _arguments || [])).next());
42
32
  });
43
33
  };
34
+ var __importDefault = (this && this.__importDefault) || function (mod) {
35
+ return (mod && mod.__esModule) ? mod : { "default": mod };
36
+ };
44
37
  Object.defineProperty(exports, "__esModule", { value: true });
45
38
  exports.isLibVariantAvailable = exports.loadModule = void 0;
39
+ const path_1 = __importDefault(require("path"));
46
40
  const getPlatformPackageName = (variant) => {
47
41
  const platform = process.platform;
48
42
  const arch = process.arch;
@@ -58,7 +52,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
58
52
  }
59
53
  });
60
54
  const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
61
- let module = yield loadPlatformPackage(getPlatformPackageName(variant));
55
+ const packageName = getPlatformPackageName(variant);
56
+ // Set ADSP_LIBRARY_PATH for load HTP libs
57
+ if (variant === 'snapdragon') {
58
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
59
+ if (!adspLibraryPath) {
60
+ try {
61
+ process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
62
+ }
63
+ catch (_a) {
64
+ /* no-op */
65
+ }
66
+ }
67
+ }
68
+ let module = yield loadPlatformPackage(packageName);
62
69
  if (module) {
63
70
  return module;
64
71
  }
package/lib/binding.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import path from 'path'
2
+
1
3
  export type MessagePart = {
2
4
  type: string
3
5
  text?: string
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
600
602
  }
601
603
 
602
604
  export const loadModule = async (variant?: LibVariant): Promise<Module> => {
603
- let module = await loadPlatformPackage(getPlatformPackageName(variant))
605
+ const packageName = getPlatformPackageName(variant)
606
+
607
+ // Set ADSP_LIBRARY_PATH for load HTP libs
608
+ if (variant === 'snapdragon') {
609
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
610
+ if (!adspLibraryPath) {
611
+ try {
612
+ process.env.ADSP_LIBRARY_PATH = path.dirname(
613
+ require.resolve(packageName),
614
+ )
615
+ } catch {
616
+ /* no-op */
617
+ }
618
+ }
619
+ }
620
+
621
+ let module = await loadPlatformPackage(packageName)
604
622
  if (module) {
605
623
  return module
606
624
  }
package/lib/index.js CHANGED
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
87
87
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
88
88
  }
89
89
  getFormattedChat(messages, template, params) {
90
- var _a;
90
+ var _a, _b;
91
91
  const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
92
- const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
92
+ const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
93
93
  let tmpl;
94
94
  if (template)
95
95
  tmpl = template; // Force replace if provided
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
99
99
  tools: params === null || params === void 0 ? void 0 : params.tools,
100
100
  parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
101
101
  tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
102
- enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
102
+ enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
103
103
  add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
104
104
  now: params === null || params === void 0 ? void 0 : params.now,
105
105
  chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
package/lib/index.ts CHANGED
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
124
124
  ): FormattedChatResult {
125
125
  const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
126
126
 
127
- const useJinja = this.isJinjaSupported() && params?.jinja
127
+ const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
128
128
  let tmpl
129
129
  if (template) tmpl = template // Force replace if provided
130
130
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.8",
4
+ "version": "1.4.1",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.8",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.8",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.8",
78
- "@fugood/node-llama-linux-arm64-snapdragon": "1.3.8",
79
- "@fugood/node-llama-linux-arm64": "1.3.8",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.8",
81
- "@fugood/node-llama-linux-arm64-cuda": "1.3.8",
82
- "@fugood/node-llama-win32-x64": "1.3.8",
83
- "@fugood/node-llama-win32-x64-vulkan": "1.3.8",
84
- "@fugood/node-llama-win32-x64-cuda": "1.3.8",
85
- "@fugood/node-llama-win32-arm64": "1.3.8",
86
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.8",
87
- "@fugood/node-llama-darwin-x64": "1.3.8",
88
- "@fugood/node-llama-darwin-arm64": "1.3.8"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.1",
76
+ "@fugood/node-llama-darwin-x64": "1.4.1",
77
+ "@fugood/node-llama-linux-arm64": "1.4.1",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.1",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.1",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.1",
81
+ "@fugood/node-llama-linux-x64": "1.4.1",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.1",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.1",
84
+ "@fugood/node-llama-win32-arm64": "1.4.1",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.1",
86
+ "@fugood/node-llama-win32-x64": "1.4.1",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.1",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.1"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -100,7 +100,7 @@
100
100
  "jest": "^29.7.0",
101
101
  "node-addon-api": "^8.0.0",
102
102
  "node-wav": "^0.0.2",
103
- "release-it": "^17.7.0",
103
+ "release-it": "^19.0.6",
104
104
  "rimraf": "^6.0.1",
105
105
  "typescript": "^5.4.5",
106
106
  "wait-for-expect": "^3.0.2"
@@ -130,4 +130,4 @@
130
130
  "singleQuote": true,
131
131
  "printWidth": 80
132
132
  }
133
- }
133
+ }
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index 6fa05a604..87dfa7a8b 100644
24
+ index b4a0f985e..2383d2ea9 100644
25
25
  --- a/src/llama.cpp/common/chat.cpp
26
26
  +++ b/src/llama.cpp/common/chat.cpp
27
27
  @@ -6,9 +6,6 @@
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
51
51
  struct templates_params {
52
52
  json messages;
53
53
  json tools;
54
- @@ -817,7 +804,7 @@ static std::string apply(
54
+ @@ -709,7 +696,7 @@ static std::string apply(
55
55
  tmpl_inputs.extra_context.merge_patch(*additional_context);
56
56
  }
57
57
  // TODO: add flag to control date/time, if only for testing purposes.
@@ -122,10 +122,59 @@ index 7e53a57b7..a328d4db4 100644
122
122
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
123
123
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
124
124
  diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
125
- index 72a82a891..7869ad323 100644
125
+ index 72a82a891..1b681f4dd 100644
126
126
  --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
127
127
  +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
128
- @@ -3417,6 +3417,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
128
+ @@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
129
+ GGML_UNUSED(dev);
130
+ }
131
+
132
+ +
133
+ +// ~2GB per session for now
134
+ +#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024)
135
+ +// Max to 3.5GB
136
+ +#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024 + 512ULL * 1024 * 1024)
137
+ +
138
+ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
139
+ - // ~2GB per session for now
140
+ - *free = 2ULL * 1024 * 1024 * 1024;
141
+ - *total = *free;
142
+ + const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY");
143
+ + if (str_mem) {
144
+ + *free = std::stoull(str_mem);
145
+ + if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) {
146
+ + *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
147
+ + } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) {
148
+ + *free = GGML_HEXAGON_SESSION_MEMORY_MAX;
149
+ + }
150
+ + } else {
151
+ + *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
152
+ + }
153
+
154
+ + *total = *free;
155
+ GGML_UNUSED(dev);
156
+ }
157
+
158
+ @@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
159
+ }
160
+ }
161
+
162
+ +#if defined(__ANDROID__)
163
+ if(opt_arch < 75) {
164
+ opt_ndev = 1;
165
+ - GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
166
+ + GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n");
167
+ + }
168
+ +#else
169
+ + if(opt_arch < 73) {
170
+ + opt_ndev = 1;
171
+ + GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n");
172
+ }
173
+ +#endif
174
+
175
+ GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
176
+
177
+ @@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
129
178
  } catch (std::exception const &exc) {
130
179
  GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
131
180
  devices[i].context = nullptr;
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
10
10
  const auto &prob = probs[i];
11
11
  Napi::Object token_obj = Napi::Object::New(env);
12
12
 
13
- std::string token_str = common_token_to_piece(ctx, prob.tok);
13
+ std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
14
14
  token_obj.Set("content", Napi::String::New(env, token_str));
15
15
 
16
16
  Napi::Array token_probs = Napi::Array::New(env);
17
17
  for (size_t j = 0; j < prob.probs.size(); j++) {
18
18
  const auto &p = prob.probs[j];
19
19
  Napi::Object prob_obj = Napi::Object::New(env);
20
- std::string tok_str = common_token_to_piece(ctx, p.tok);
20
+ std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
21
21
  prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
22
22
  prob_obj.Set("prob", Napi::Number::New(env, p.prob));
23
23
  token_probs.Set(j, prob_obj);
@@ -321,15 +321,20 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
321
321
 
322
322
  // Parse devices array
323
323
  if (options.Has("devices") && options.Get("devices").IsArray()) {
324
+ std::vector<ggml_backend_dev_t> devs;
324
325
  auto devices_array = options.Get("devices").As<Napi::Array>();
325
326
  for (size_t i = 0; i < devices_array.Length(); i++) {
326
327
  auto device_name = devices_array.Get(i).ToString().Utf8Value();
327
328
  auto * dev = ggml_backend_dev_by_name(device_name.c_str());
328
329
  if (dev) {
329
- params.devices.push_back(dev);
330
+ devs.push_back(dev);
330
331
  }
331
332
  // Skip invalid device names silently
332
333
  }
334
+ if (!devs.empty()) {
335
+ params.devices = devs;
336
+ params.devices.push_back(nullptr); // nullptr terminator required by llama.cpp
337
+ }
333
338
  }
334
339
 
335
340
  std::vector<common_adapter_lora_info> lora;
@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
980
980
  [](common_params & params) {
981
981
  params.kv_unified = true;
982
982
  }
983
- ).set_env("LLAMA_ARG_KV_SPLIT"));
983
+ ).set_env("LLAMA_ARG_KV_UNIFIED"));
984
984
  add_opt(common_arg(
985
985
  {"--no-context-shift"},
986
986
  string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),