@fugood/llama.node 1.3.8 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/lib/binding.js +18 -1
  2. package/lib/binding.ts +19 -1
  3. package/lib/index.js +3 -3
  4. package/lib/index.ts +1 -1
  5. package/package.json +15 -15
  6. package/scripts/llama.cpp.patch +2 -2
  7. package/src/LlamaCompletionWorker.cpp +2 -2
  8. package/src/llama.cpp/common/arg.cpp +1 -1
  9. package/src/llama.cpp/common/chat-parser.cpp +968 -0
  10. package/src/llama.cpp/common/chat.cpp +0 -952
  11. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
  12. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  13. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
  14. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
  15. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
  16. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
  17. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
  18. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
  19. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
  20. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
  21. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  22. package/src/llama.cpp/src/llama-arch.cpp +48 -3
  23. package/src/llama.cpp/src/llama-arch.h +2 -0
  24. package/src/llama.cpp/src/llama-context.cpp +6 -2
  25. package/src/llama.cpp/src/llama-hparams.h +1 -1
  26. package/src/llama.cpp/src/llama-model.cpp +102 -5
  27. package/src/llama.cpp/src/llama-model.h +4 -0
  28. package/src/llama.cpp/src/llama-quant.cpp +13 -5
  29. package/src/llama.cpp/src/models/lfm2.cpp +5 -3
  30. package/src/llama.cpp/src/models/models.h +51 -1
  31. package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
package/lib/binding.js CHANGED
@@ -41,8 +41,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
41
41
  step((generator = generator.apply(thisArg, _arguments || [])).next());
42
42
  });
43
43
  };
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
44
47
  Object.defineProperty(exports, "__esModule", { value: true });
45
48
  exports.isLibVariantAvailable = exports.loadModule = void 0;
49
+ const path_1 = __importDefault(require("path"));
46
50
  const getPlatformPackageName = (variant) => {
47
51
  const platform = process.platform;
48
52
  const arch = process.arch;
@@ -58,7 +62,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
58
62
  }
59
63
  });
60
64
  const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
61
- let module = yield loadPlatformPackage(getPlatformPackageName(variant));
65
+ const packageName = getPlatformPackageName(variant);
66
+ // Set ADSP_LIBRARY_PATH for load HTP libs
67
+ if (variant === 'snapdragon') {
68
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
69
+ if (!adspLibraryPath) {
70
+ try {
71
+ process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
72
+ }
73
+ catch (_a) {
74
+ /* no-op */
75
+ }
76
+ }
77
+ }
78
+ let module = yield loadPlatformPackage(packageName);
62
79
  if (module) {
63
80
  return module;
64
81
  }
package/lib/binding.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import path from 'path'
2
+
1
3
  export type MessagePart = {
2
4
  type: string
3
5
  text?: string
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
600
602
  }
601
603
 
602
604
  export const loadModule = async (variant?: LibVariant): Promise<Module> => {
603
- let module = await loadPlatformPackage(getPlatformPackageName(variant))
605
+ const packageName = getPlatformPackageName(variant)
606
+
607
+ // Set ADSP_LIBRARY_PATH for load HTP libs
608
+ if (variant === 'snapdragon') {
609
+ const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
610
+ if (!adspLibraryPath) {
611
+ try {
612
+ process.env.ADSP_LIBRARY_PATH = path.dirname(
613
+ require.resolve(packageName),
614
+ )
615
+ } catch {
616
+ /* no-op */
617
+ }
618
+ }
619
+ }
620
+
621
+ let module = await loadPlatformPackage(packageName)
604
622
  if (module) {
605
623
  return module
606
624
  }
package/lib/index.js CHANGED
@@ -87,9 +87,9 @@ class LlamaContextWrapper {
87
87
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
88
88
  }
89
89
  getFormattedChat(messages, template, params) {
90
- var _a;
90
+ var _a, _b;
91
91
  const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
92
- const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
92
+ const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
93
93
  let tmpl;
94
94
  if (template)
95
95
  tmpl = template; // Force replace if provided
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
99
99
  tools: params === null || params === void 0 ? void 0 : params.tools,
100
100
  parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
101
101
  tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
102
- enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
102
+ enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
103
103
  add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
104
104
  now: params === null || params === void 0 ? void 0 : params.now,
105
105
  chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)
package/lib/index.ts CHANGED
@@ -124,7 +124,7 @@ class LlamaContextWrapper {
124
124
  ): FormattedChatResult {
125
125
  const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
126
126
 
127
- const useJinja = this.isJinjaSupported() && params?.jinja
127
+ const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
128
128
  let tmpl
129
129
  if (template) tmpl = template // Force replace if provided
130
130
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.8",
4
+ "version": "1.4.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.8",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.8",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.8",
78
- "@fugood/node-llama-linux-arm64-snapdragon": "1.3.8",
79
- "@fugood/node-llama-linux-arm64": "1.3.8",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.8",
81
- "@fugood/node-llama-linux-arm64-cuda": "1.3.8",
82
- "@fugood/node-llama-win32-x64": "1.3.8",
83
- "@fugood/node-llama-win32-x64-vulkan": "1.3.8",
84
- "@fugood/node-llama-win32-x64-cuda": "1.3.8",
85
- "@fugood/node-llama-win32-arm64": "1.3.8",
86
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.8",
87
- "@fugood/node-llama-darwin-x64": "1.3.8",
88
- "@fugood/node-llama-darwin-arm64": "1.3.8"
75
+ "@fugood/node-llama-linux-x64": "1.4.0",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.0",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.4.0",
78
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.0",
79
+ "@fugood/node-llama-linux-arm64": "1.4.0",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.0",
81
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.0",
82
+ "@fugood/node-llama-win32-x64": "1.4.0",
83
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.0",
84
+ "@fugood/node-llama-win32-x64-cuda": "1.4.0",
85
+ "@fugood/node-llama-win32-arm64": "1.4.0",
86
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.0",
87
+ "@fugood/node-llama-darwin-x64": "1.4.0",
88
+ "@fugood/node-llama-darwin-arm64": "1.4.0"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index 6fa05a604..87dfa7a8b 100644
24
+ index b4a0f985e..2383d2ea9 100644
25
25
  --- a/src/llama.cpp/common/chat.cpp
26
26
  +++ b/src/llama.cpp/common/chat.cpp
27
27
  @@ -6,9 +6,6 @@
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
51
51
  struct templates_params {
52
52
  json messages;
53
53
  json tools;
54
- @@ -817,7 +804,7 @@ static std::string apply(
54
+ @@ -709,7 +696,7 @@ static std::string apply(
55
55
  tmpl_inputs.extra_context.merge_patch(*additional_context);
56
56
  }
57
57
  // TODO: add flag to control date/time, if only for testing purposes.
@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
10
10
  const auto &prob = probs[i];
11
11
  Napi::Object token_obj = Napi::Object::New(env);
12
12
 
13
- std::string token_str = common_token_to_piece(ctx, prob.tok);
13
+ std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
14
14
  token_obj.Set("content", Napi::String::New(env, token_str));
15
15
 
16
16
  Napi::Array token_probs = Napi::Array::New(env);
17
17
  for (size_t j = 0; j < prob.probs.size(); j++) {
18
18
  const auto &p = prob.probs[j];
19
19
  Napi::Object prob_obj = Napi::Object::New(env);
20
- std::string tok_str = common_token_to_piece(ctx, p.tok);
20
+ std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
21
21
  prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
22
22
  prob_obj.Set("prob", Napi::Number::New(env, p.prob));
23
23
  token_probs.Set(j, prob_obj);
@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
980
980
  [](common_params & params) {
981
981
  params.kv_unified = true;
982
982
  }
983
- ).set_env("LLAMA_ARG_KV_SPLIT"));
983
+ ).set_env("LLAMA_ARG_KV_UNIFIED"));
984
984
  add_opt(common_arg(
985
985
  {"--no-context-shift"},
986
986
  string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),