@fugood/llama.node 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.ts CHANGED
@@ -309,6 +309,45 @@ export type BackendDeviceInfo = {
309
309
  metadata?: Record<string, any>
310
310
  }
311
311
 
312
+ export type BenchResult = {
313
+ /** Maximum KV cache size */
314
+ nKvMax: number
315
+ /** Batch size */
316
+ nBatch: number
317
+ /** Micro-batch size */
318
+ nUBatch: number
319
+ /** Flash attention type (0=disabled, 1=enabled, 2=auto) */
320
+ flashAttn: number
321
+ /** Whether prompt processing is shared */
322
+ isPpShared: boolean
323
+ /** Number of GPU layers */
324
+ nGpuLayers: number
325
+ /** Number of threads */
326
+ nThreads: number
327
+ /** Number of threads for batch processing */
328
+ nThreadsBatch: number
329
+ /** Prompt processing tokens count */
330
+ pp: number
331
+ /** Text generation tokens count */
332
+ tg: number
333
+ /** Parallel level */
334
+ pl: number
335
+ /** KV cache used */
336
+ nKv: number
337
+ /** Time for prompt processing (ms) */
338
+ tPp: number
339
+ /** Speed of prompt processing (tokens/sec) */
340
+ speedPp: number
341
+ /** Time for text generation (ms) */
342
+ tTg: number
343
+ /** Speed of text generation (tokens/sec) */
344
+ speedTg: number
345
+ /** Total time (ms) */
346
+ t: number
347
+ /** Overall speed (tokens/sec) */
348
+ speed: number
349
+ }
350
+
312
351
  export type ModelInfo = {
313
352
  desc: string
314
353
  nEmbd: number
@@ -573,6 +612,16 @@ export interface LlamaContext {
573
612
  */
574
613
  clearCache(clearData?: boolean): void
575
614
 
615
+ /**
616
+ * Run a benchmark to measure model performance
617
+ * @param pp Number of tokens to process for prompt processing benchmark
618
+ * @param tg Number of tokens to generate for text generation benchmark
619
+ * @param pl Parallel level (number of sequences)
620
+ * @param nr Number of repetitions
621
+ * @returns Benchmark results
622
+ */
623
+ bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>
624
+
576
625
  // static
577
626
  loadModelInfo(path: string, skip: string[]): Promise<GGUFModelInfo>
578
627
  toggleNativeLog(
package/lib/index.js CHANGED
@@ -204,6 +204,19 @@ class LlamaContextWrapper {
204
204
  clearCache(clearData) {
205
205
  this.ctx.clearCache(clearData);
206
206
  }
207
+ /**
208
+ * Run a benchmark to measure model performance
209
+ * @param pp Number of tokens to process for prompt processing benchmark
210
+ * @param tg Number of tokens to generate for text generation benchmark
211
+ * @param pl Parallel level (number of sequences)
212
+ * @param nr Number of repetitions
213
+ * @returns Benchmark results including timing and speed metrics
214
+ */
215
+ bench(pp, tg, pl, nr) {
216
+ return __awaiter(this, void 0, void 0, function* () {
217
+ return this.ctx.bench(pp, tg, pl, nr);
218
+ });
219
+ }
207
220
  }
208
221
  const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, function* () {
209
222
  var _a, _b;
package/lib/index.ts CHANGED
@@ -16,6 +16,7 @@ import type {
16
16
  JinjaFormattedChatResult,
17
17
  Tool,
18
18
  GGUFModelInfo,
19
+ BenchResult,
19
20
  } from './binding'
20
21
  import { BUILD_NUMBER, BUILD_COMMIT } from './version'
21
22
  import { LlamaParallelAPI } from './parallel'
@@ -309,6 +310,18 @@ class LlamaContextWrapper {
309
310
  clearCache(clearData?: boolean): void {
310
311
  this.ctx.clearCache(clearData)
311
312
  }
313
+
314
+ /**
315
+ * Run a benchmark to measure model performance
316
+ * @param pp Number of tokens to process for prompt processing benchmark
317
+ * @param tg Number of tokens to generate for text generation benchmark
318
+ * @param pl Parallel level (number of sequences)
319
+ * @param nr Number of repetitions
320
+ * @returns Benchmark results including timing and speed metrics
321
+ */
322
+ async bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult> {
323
+ return this.ctx.bench(pp, tg, pl, nr)
324
+ }
312
325
  }
313
326
 
314
327
  export const loadModel = async (
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.4",
4
+ "version": "1.4.6",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.4.4",
76
- "@fugood/node-llama-darwin-x64": "1.4.4",
77
- "@fugood/node-llama-linux-arm64": "1.4.4",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.4.4",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.4",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.4",
81
- "@fugood/node-llama-linux-x64": "1.4.4",
82
- "@fugood/node-llama-linux-x64-cuda": "1.4.4",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.4.4",
84
- "@fugood/node-llama-win32-arm64": "1.4.4",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.4",
86
- "@fugood/node-llama-win32-x64": "1.4.4",
87
- "@fugood/node-llama-win32-x64-cuda": "1.4.4",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.4.4"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.6",
76
+ "@fugood/node-llama-darwin-x64": "1.4.6",
77
+ "@fugood/node-llama-linux-arm64": "1.4.6",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.6",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.6",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.6",
81
+ "@fugood/node-llama-linux-x64": "1.4.6",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.6",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.6",
84
+ "@fugood/node-llama-win32-arm64": "1.4.6",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.6",
86
+ "@fugood/node-llama-win32-x64": "1.4.6",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.6",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.6"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -203,6 +203,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
203
203
  static_cast<napi_property_attributes>(napi_enumerable)),
204
204
  InstanceMethod<&LlamaContext::ClearCache>(
205
205
  "clearCache",
206
+ static_cast<napi_property_attributes>(napi_enumerable)),
207
+ InstanceMethod<&LlamaContext::Bench>(
208
+ "bench",
206
209
  static_cast<napi_property_attributes>(napi_enumerable))});
207
210
  Napi::FunctionReference *constructor = new Napi::FunctionReference();
208
211
  *constructor = Napi::Persistent(func);
@@ -1529,3 +1532,69 @@ void LlamaContext::ClearCache(const Napi::CallbackInfo &info) {
1529
1532
 
1530
1533
  _rn_ctx->clearCache(clear_data);
1531
1534
  }
1535
+
1536
+ // bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>
1537
+ Napi::Value LlamaContext::Bench(const Napi::CallbackInfo &info) {
1538
+ Napi::Env env = info.Env();
1539
+
1540
+ if (info.Length() < 4) {
1541
+ Napi::TypeError::New(env, "Expected 4 arguments: pp, tg, pl, nr")
1542
+ .ThrowAsJavaScriptException();
1543
+ return env.Undefined();
1544
+ }
1545
+
1546
+ if (!_rn_ctx) {
1547
+ Napi::TypeError::New(env, "Context is disposed").ThrowAsJavaScriptException();
1548
+ return env.Undefined();
1549
+ }
1550
+
1551
+ if (!_rn_ctx->completion) {
1552
+ Napi::TypeError::New(env, "Completion context not initialized")
1553
+ .ThrowAsJavaScriptException();
1554
+ return env.Undefined();
1555
+ }
1556
+
1557
+ int pp = info[0].ToNumber().Int32Value();
1558
+ int tg = info[1].ToNumber().Int32Value();
1559
+ int pl = info[2].ToNumber().Int32Value();
1560
+ int nr = info[3].ToNumber().Int32Value();
1561
+
1562
+ std::string result;
1563
+ try {
1564
+ result = _rn_ctx->completion->bench(pp, tg, pl, nr);
1565
+ } catch (const std::exception &e) {
1566
+ Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
1567
+ return env.Undefined();
1568
+ }
1569
+
1570
+ // Parse the JSON result and return as object
1571
+ try {
1572
+ auto parsed = json::parse(result);
1573
+ Napi::Object benchResult = Napi::Object::New(env);
1574
+
1575
+ benchResult.Set("nKvMax", Napi::Number::New(env, parsed["n_kv_max"].get<int>()));
1576
+ benchResult.Set("nBatch", Napi::Number::New(env, parsed["n_batch"].get<int>()));
1577
+ benchResult.Set("nUBatch", Napi::Number::New(env, parsed["n_ubatch"].get<int>()));
1578
+ benchResult.Set("flashAttn", Napi::Number::New(env, parsed["flash_attn"].get<int>()));
1579
+ benchResult.Set("isPpShared", Napi::Boolean::New(env, parsed["is_pp_shared"].get<int>() != 0));
1580
+ benchResult.Set("nGpuLayers", Napi::Number::New(env, parsed["n_gpu_layers"].get<int>()));
1581
+ benchResult.Set("nThreads", Napi::Number::New(env, parsed["n_threads"].get<int>()));
1582
+ benchResult.Set("nThreadsBatch", Napi::Number::New(env, parsed["n_threads_batch"].get<int>()));
1583
+ benchResult.Set("pp", Napi::Number::New(env, parsed["pp"].get<int>()));
1584
+ benchResult.Set("tg", Napi::Number::New(env, parsed["tg"].get<int>()));
1585
+ benchResult.Set("pl", Napi::Number::New(env, parsed["pl"].get<int>()));
1586
+ benchResult.Set("nKv", Napi::Number::New(env, parsed["n_kv"].get<int>()));
1587
+ benchResult.Set("tPp", Napi::Number::New(env, parsed["t_pp"].get<double>()));
1588
+ benchResult.Set("speedPp", Napi::Number::New(env, parsed["speed_pp"].get<double>()));
1589
+ benchResult.Set("tTg", Napi::Number::New(env, parsed["t_tg"].get<double>()));
1590
+ benchResult.Set("speedTg", Napi::Number::New(env, parsed["speed_tg"].get<double>()));
1591
+ benchResult.Set("t", Napi::Number::New(env, parsed["t"].get<double>()));
1592
+ benchResult.Set("speed", Napi::Number::New(env, parsed["speed"].get<double>()));
1593
+
1594
+ return benchResult;
1595
+ } catch (const std::exception &e) {
1596
+ Napi::Error::New(env, std::string("Failed to parse benchmark result: ") + e.what())
1597
+ .ThrowAsJavaScriptException();
1598
+ return env.Undefined();
1599
+ }
1600
+ }
@@ -72,6 +72,9 @@ private:
72
72
  // Cache management
73
73
  void ClearCache(const Napi::CallbackInfo &info);
74
74
 
75
+ // Benchmarking
76
+ Napi::Value Bench(const Napi::CallbackInfo &info);
77
+
75
78
  std::string _info;
76
79
  std::vector<std::string> _used_devices;
77
80
  Napi::Object _meta;