@fugood/llama.node 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.js CHANGED
@@ -15,13 +15,23 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
15
15
  }) : function(o, v) {
16
16
  o["default"] = v;
17
17
  });
18
- var __importStar = (this && this.__importStar) || function (mod) {
19
- if (mod && mod.__esModule) return mod;
20
- var result = {};
21
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
- __setModuleDefault(result, mod);
23
- return result;
24
- };
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
25
35
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
36
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
37
  return new (P || (P = Promise))(function (resolve, reject) {
package/lib/binding.ts CHANGED
@@ -129,6 +129,7 @@ export type LlamaCompletionResult = {
129
129
  text: string
130
130
  reasoning_content?: string
131
131
  content?: string
132
+ chat_format: number
132
133
  tokens_predicted: number
133
134
  tokens_evaluated: number
134
135
  truncated: boolean
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.1.0",
4
+ "version": "1.1.1",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -71,19 +71,19 @@
71
71
  "CMakeLists.txt"
72
72
  ],
73
73
  "optionalDependencies": {
74
- "@fugood/node-llama-linux-x64": "1.1.0",
75
- "@fugood/node-llama-linux-x64-vulkan": "1.1.0",
76
- "@fugood/node-llama-linux-x64-cuda": "1.1.0",
77
- "@fugood/node-llama-linux-arm64": "1.1.0",
78
- "@fugood/node-llama-linux-arm64-vulkan": "1.1.0",
79
- "@fugood/node-llama-linux-arm64-cuda": "1.1.0",
80
- "@fugood/node-llama-win32-x64": "1.1.0",
81
- "@fugood/node-llama-win32-x64-vulkan": "1.1.0",
82
- "@fugood/node-llama-win32-x64-cuda": "1.1.0",
83
- "@fugood/node-llama-win32-arm64": "1.1.0",
84
- "@fugood/node-llama-win32-arm64-vulkan": "1.1.0",
85
- "@fugood/node-llama-darwin-x64": "1.1.0",
86
- "@fugood/node-llama-darwin-arm64": "1.1.0"
74
+ "@fugood/node-llama-linux-x64": "1.1.1",
75
+ "@fugood/node-llama-linux-x64-vulkan": "1.1.1",
76
+ "@fugood/node-llama-linux-x64-cuda": "1.1.1",
77
+ "@fugood/node-llama-linux-arm64": "1.1.1",
78
+ "@fugood/node-llama-linux-arm64-vulkan": "1.1.1",
79
+ "@fugood/node-llama-linux-arm64-cuda": "1.1.1",
80
+ "@fugood/node-llama-win32-x64": "1.1.1",
81
+ "@fugood/node-llama-win32-x64-vulkan": "1.1.1",
82
+ "@fugood/node-llama-win32-x64-cuda": "1.1.1",
83
+ "@fugood/node-llama-win32-arm64": "1.1.1",
84
+ "@fugood/node-llama-win32-arm64-vulkan": "1.1.1",
85
+ "@fugood/node-llama-darwin-x64": "1.1.1",
86
+ "@fugood/node-llama-darwin-arm64": "1.1.1"
87
87
  },
88
88
  "devDependencies": {
89
89
  "@babel/preset-env": "^7.24.4",
@@ -157,10 +157,26 @@ void LlamaCompletionWorker::Execute() {
157
157
  // For multimodal input, n_past might already be set
158
158
  // Only decode text tokens if we have any input left
159
159
  if (n_input > 0) {
160
- int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
161
- if (ret < 0) {
162
- SetError("Failed to decode token, code: " + std::to_string(ret));
163
- break;
160
+ // Decode tokens in batches using n_batch as chunk size
161
+ int n_past_batch = n_cur;
162
+ int n_remaining = n_input;
163
+
164
+ while (n_remaining > 0) {
165
+ int n_eval = n_remaining;
166
+ if (n_eval > _params.n_batch) {
167
+ n_eval = _params.n_batch;
168
+ }
169
+
170
+ int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
171
+ if (ret < 0) {
172
+ SetError("Failed to decode token batch, code: " + std::to_string(ret) +
173
+ ", n_eval: " + std::to_string(n_eval) +
174
+ ", n_past_batch: " + std::to_string(n_past_batch));
175
+ break;
176
+ }
177
+
178
+ n_past_batch += n_eval;
179
+ n_remaining -= n_eval;
164
180
  }
165
181
  }
166
182
 
@@ -255,6 +271,8 @@ void LlamaCompletionWorker::OnOK() {
255
271
  try {
256
272
  common_chat_syntax chat_syntax;
257
273
  chat_syntax.format = static_cast<common_chat_format>(_chat_format);
274
+ result.Set("chat_format", Napi::Number::New(env, _chat_format));
275
+
258
276
  chat_syntax.thinking_forced_open = _thinking_forced_open;
259
277
 
260
278
  if (_reasoning_format == "deepseek") {
@@ -247,7 +247,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
247
247
  params.cache_type_v = kv_cache_type_from_str(
248
248
  get_option<std::string>(options, "cache_type_v", "f16").c_str());
249
249
  params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
250
- params.kv_unified = get_option<bool>(options, "kv_unified", true);
250
+ params.kv_unified = get_option<bool>(options, "kv_unified", false);
251
251
 
252
252
  params.use_mlock = get_option<bool>(options, "use_mlock", false);
253
253
  params.use_mmap = get_option<bool>(options, "use_mmap", true);