@fugood/llama.node 0.0.1-alpha.1 → 0.0.1-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,16 +7,16 @@ Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
7
7
  ## Installation
8
8
 
9
9
  ```sh
10
- npm install llama.node
10
+ npm install @fugood/llama.node
11
11
  ```
12
12
 
13
13
  ## Usage
14
14
 
15
15
  ```js
16
- import { loadModel } from 'llama.node'
16
+ import { loadModel } from '@fugood/llama.node'
17
17
 
18
18
  // Initial a Llama context with the model (may take a while)
19
- const context = loadModel({
19
+ const context = await loadModel({
20
20
  model: 'path/to/gguf/model',
21
21
  use_mlock: true,
22
22
  n_ctx: 2048,
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.js CHANGED
@@ -1,13 +1,45 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
+ return new (P || (P = Promise))(function (resolve, reject) {
28
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
29
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
30
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
31
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
32
+ });
33
+ };
2
34
  Object.defineProperty(exports, "__esModule", { value: true });
3
35
  exports.loadModule = void 0;
4
- const loadModule = (variant) => {
36
+ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
5
37
  try {
6
38
  if (variant) {
7
- return require(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`);
39
+ return yield Promise.resolve(`${`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
8
40
  }
9
41
  }
10
42
  catch (_a) { } // ignore errors and try the common path
11
- return require(`../bin/${process.platform}/${process.arch}/llama-node.node`);
12
- };
43
+ return yield Promise.resolve(`${`../bin/${process.platform}/${process.arch}/llama-node.node`}`).then(s => __importStar(require(s)));
44
+ });
13
45
  exports.loadModule = loadModule;
package/lib/binding.ts CHANGED
@@ -41,17 +41,18 @@ export interface LlamaContext {
41
41
  stopCompletion(): void
42
42
  saveSession(path: string): Promise<void>
43
43
  loadSession(path: string): Promise<void>
44
+ release(): Promise<void>
44
45
  }
45
46
 
46
47
  export interface Module {
47
48
  LlamaContext: LlamaContext
48
49
  }
49
50
 
50
- export const loadModule = (variant?: string): Module => {
51
+ export const loadModule = async (variant?: string): Promise<Module> => {
51
52
  try {
52
53
  if (variant) {
53
- return require(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
54
+ return await import(`../bin/${process.platform}-${variant}/${process.arch}/llama-node.node`) as Module
54
55
  }
55
56
  } catch {} // ignore errors and try the common path
56
- return require(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
57
+ return await import(`../bin/${process.platform}/${process.arch}/llama-node.node`) as Module
57
58
  }
package/lib/index.js CHANGED
@@ -13,12 +13,22 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
13
13
  var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
17
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
18
+ return new (P || (P = Promise))(function (resolve, reject) {
19
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
20
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
21
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
22
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
23
+ });
24
+ };
16
25
  Object.defineProperty(exports, "__esModule", { value: true });
17
26
  exports.loadModel = void 0;
18
27
  const binding_1 = require("./binding");
19
28
  __exportStar(require("./binding"), exports);
20
- const loadModel = (options) => {
21
- const { LlamaContext } = (0, binding_1.loadModule)(options.lib_variant);
22
- return new LlamaContext(options);
23
- };
29
+ let module = null;
30
+ const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
31
+ module !== null && module !== void 0 ? module : (module = yield (0, binding_1.loadModule)(options.lib_variant));
32
+ return new module.LlamaContext(options);
33
+ });
24
34
  exports.loadModel = loadModel;
package/lib/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { loadModule, LlamaModelOptions } from './binding'
2
- import type { LlamaContext } from './binding'
2
+ import type { Module, LlamaContext } from './binding'
3
3
 
4
4
  export * from './binding'
5
5
 
@@ -7,7 +7,9 @@ export interface LlamaModelOptionsExtended extends LlamaModelOptions {
7
7
  lib_variant?: string
8
8
  }
9
9
 
10
- export const loadModel = (options: LlamaModelOptionsExtended): LlamaContext => {
11
- const { LlamaContext } = loadModule(options.lib_variant)
12
- return new LlamaContext(options)
10
+ let module: Module | null = null
11
+
12
+ export const loadModel = async (options: LlamaModelOptionsExtended): Promise<LlamaContext> => {
13
+ module ??= await loadModule(options.lib_variant)
14
+ return new module.LlamaContext(options)
13
15
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.0.1-alpha.1",
4
+ "version": "0.0.1-alpha.2",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -30,7 +30,7 @@
30
30
  },
31
31
  "homepage": "https://github.com/mybigday/llama.node#readme",
32
32
  "publishConfig": {
33
- "registry": "https://registry.npmjs.org"
33
+ "registry": "https://registry.npmjs.org"
34
34
  },
35
35
  "binary": {
36
36
  "napi_versions": [
package/src/addons.cpp CHANGED
@@ -23,6 +23,34 @@ size_t common_part(const std::vector<llama_token> &a,
23
23
  return i;
24
24
  }
25
25
 
26
+ template <typename T>
27
+ constexpr T get_option(const Napi::Object &options, const std::string &name,
28
+ const T default_value) {
29
+ if (options.Has(name) && !options.Get(name).IsUndefined() &&
30
+ !options.Get(name).IsNull()) {
31
+ if constexpr (std::is_same<T, std::string>::value) {
32
+ return options.Get(name).ToString().operator T();
33
+ } else if constexpr (std::is_same<T, int32_t>::value ||
34
+ std::is_same<T, uint32_t>::value ||
35
+ std::is_same<T, float>::value ||
36
+ std::is_same<T, double>::value) {
37
+ return options.Get(name).ToNumber().operator T();
38
+ } else if constexpr (std::is_same<T, bool>::value) {
39
+ return options.Get(name).ToBoolean().operator T();
40
+ } else {
41
+ static_assert(std::is_same<T, std::string>::value ||
42
+ std::is_same<T, int32_t>::value ||
43
+ std::is_same<T, uint32_t>::value ||
44
+ std::is_same<T, float>::value ||
45
+ std::is_same<T, double>::value ||
46
+ std::is_same<T, bool>::value,
47
+ "Unsupported type");
48
+ }
49
+ } else {
50
+ return default_value;
51
+ }
52
+ }
53
+
26
54
  class LlamaCompletionWorker;
27
55
 
28
56
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
@@ -37,37 +65,21 @@ public:
37
65
  }
38
66
  auto options = info[0].As<Napi::Object>();
39
67
 
40
- if (options.Has("model")) {
41
- params.model = options.Get("model").ToString();
42
- }
43
- if (options.Has("embedding")) {
44
- params.embedding = options.Get("embedding").ToBoolean();
45
- }
46
- if (options.Has("n_ctx")) {
47
- params.n_ctx = options.Get("n_ctx").ToNumber();
48
- }
49
- if (options.Has("n_batch")) {
50
- params.n_batch = options.Get("n_batch").ToNumber();
51
- }
52
- if (options.Has("n_threads")) {
53
- params.n_threads = options.Get("n_threads").ToNumber();
54
- }
55
- if (options.Has("n_gpu_layers")) {
56
- params.n_gpu_layers = options.Get("n_gpu_layers").ToNumber();
57
- }
58
- if (options.Has("use_mlock")) {
59
- params.use_mlock = options.Get("use_mlock").ToBoolean();
60
- }
61
- if (options.Has("use_mmap")) {
62
- params.use_mmap = options.Get("use_mmap").ToBoolean();
63
- }
64
- if (options.Has("numa")) {
65
- int numa = options.Get("numa").ToNumber();
66
- params.numa = static_cast<ggml_numa_strategy>(numa);
67
- }
68
- if (options.Has("seed")) {
69
- params.seed = options.Get("seed").ToNumber();
68
+ params.model = get_option<std::string>(options, "model", "");
69
+ if (params.model.empty()) {
70
+ Napi::TypeError::New(env, "Model is required")
71
+ .ThrowAsJavaScriptException();
70
72
  }
73
+ params.embedding = get_option<bool>(options, "embedding", false);
74
+ params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
75
+ params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
76
+ params.n_threads =
77
+ get_option<int32_t>(options, "n_threads", get_math_cpu_count() / 2);
78
+ params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
79
+ params.use_mlock = get_option<bool>(options, "use_mlock", false);
80
+ params.use_mmap = get_option<bool>(options, "use_mmap", true);
81
+ params.numa = static_cast<ggml_numa_strategy>(
82
+ get_option<uint32_t>(options, "numa", 0));
71
83
 
72
84
  llama_backend_init();
73
85
  llama_numa_init(params.numa);
@@ -99,6 +111,9 @@ public:
99
111
  static_cast<napi_property_attributes>(napi_enumerable)),
100
112
  InstanceMethod<&LlamaContext::LoadSession>(
101
113
  "loadSession",
114
+ static_cast<napi_property_attributes>(napi_enumerable)),
115
+ InstanceMethod<&LlamaContext::Release>(
116
+ "release",
102
117
  static_cast<napi_property_attributes>(napi_enumerable))});
103
118
  Napi::FunctionReference *constructor = new Napi::FunctionReference();
104
119
  *constructor = Napi::Persistent(func);
@@ -127,12 +142,21 @@ public:
127
142
 
128
143
  std::mutex &getMutex() { return mutex; }
129
144
 
145
+ void Dispose() {
146
+ std::lock_guard<std::mutex> lock(mutex);
147
+ compl_worker = nullptr;
148
+ ctx.reset();
149
+ tokens.reset();
150
+ model.reset();
151
+ }
152
+
130
153
  private:
131
154
  Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
132
155
  Napi::Value Completion(const Napi::CallbackInfo &info);
133
156
  void StopCompletion(const Napi::CallbackInfo &info);
134
157
  Napi::Value SaveSession(const Napi::CallbackInfo &info);
135
158
  Napi::Value LoadSession(const Napi::CallbackInfo &info);
159
+ Napi::Value Release(const Napi::CallbackInfo &info);
136
160
 
137
161
  gpt_params params;
138
162
  LlamaCppModel model{nullptr, llama_free_model};
@@ -270,7 +294,6 @@ protected:
270
294
  tokens_predicted += 1;
271
295
  n_input = 1;
272
296
  if (_has_callback) {
273
- // _cb.Call({ Napi::String::New(AsyncWorker::Env(), token) });
274
297
  const char *c_token = strdup(token.c_str());
275
298
  _tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
276
299
  const char *value) {
@@ -378,6 +401,26 @@ protected:
378
401
  void OnError(const Napi::Error &err) { Reject(err.Value()); }
379
402
  };
380
403
 
404
+ class DisposeWorker : public Napi::AsyncWorker, public Napi::Promise::Deferred {
405
+ public:
406
+ DisposeWorker(Napi::Env env, LlamaContext *ctx)
407
+ : AsyncWorker(env), Deferred(env), ctx_(ctx) {
408
+ ctx_->Ref();
409
+ }
410
+
411
+ ~DisposeWorker() { ctx_->Unref(); }
412
+
413
+ protected:
414
+ void Execute() override { ctx_->Dispose(); }
415
+
416
+ void OnOK() override { Resolve(AsyncWorker::Env().Undefined()); }
417
+
418
+ void OnError(const Napi::Error &err) override { Reject(err.Value()); }
419
+
420
+ private:
421
+ LlamaContext *ctx_;
422
+ };
423
+
381
424
  // getSystemInfo(): string
382
425
  Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
383
426
  return Napi::String::New(info.Env(), get_system_info(params).c_str());
@@ -396,63 +439,41 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
396
439
  auto options = info[0].As<Napi::Object>();
397
440
 
398
441
  gpt_params params;
399
- if (options.Has("prompt")) {
400
- params.prompt = options.Get("prompt").ToString();
401
- } else {
442
+ params.prompt = get_option<std::string>(options, "prompt", "");
443
+ if (params.prompt.empty()) {
402
444
  Napi::TypeError::New(env, "Prompt is required")
403
445
  .ThrowAsJavaScriptException();
404
446
  }
405
- params.n_predict =
406
- options.Has("n_predict") ? options.Get("n_predict").ToNumber() : -1;
407
- params.sparams.temp = options.Has("temperature")
408
- ? options.Get("temperature").ToNumber()
409
- : 0.80f;
410
- params.sparams.top_k =
411
- options.Has("top_k") ? options.Get("top_k").ToNumber() : 40;
412
- params.sparams.top_p =
413
- options.Has("top_p") ? options.Get("top_p").ToNumber() : 0.95f;
414
- params.sparams.min_p =
415
- options.Has("min_p") ? options.Get("min_p").ToNumber() : 0.05f;
416
- params.sparams.tfs_z =
417
- options.Has("tfs_z") ? options.Get("tfs_z").ToNumber() : 1.00f;
418
- params.sparams.mirostat =
419
- options.Has("mirostat") ? options.Get("mirostat").ToNumber() : 0;
420
- params.sparams.mirostat_tau = options.Has("mirostat_tau")
421
- ? options.Get("mirostat_tau").ToNumber()
422
- : 5.00f;
423
- params.sparams.mirostat_eta = options.Has("mirostat_eta")
424
- ? options.Get("mirostat_eta").ToNumber()
425
- : 0.10f;
426
- params.sparams.penalty_last_n = options.Has("penalty_last_n")
427
- ? options.Get("penalty_last_n").ToNumber()
428
- : 64;
429
- params.sparams.penalty_repeat = options.Has("penalty_repeat")
430
- ? options.Get("penalty_repeat").ToNumber()
431
- : 1.00f;
432
- params.sparams.penalty_freq = options.Has("penalty_freq")
433
- ? options.Get("penalty_freq").ToNumber()
434
- : 0.00f;
447
+ params.n_predict = get_option<int32_t>(options, "n_predict", -1);
448
+ params.sparams.temp = get_option<float>(options, "temperature", 0.80f);
449
+ params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
450
+ params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
451
+ params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
452
+ params.sparams.tfs_z = get_option<float>(options, "tfs_z", 1.00f);
453
+ params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
454
+ params.sparams.mirostat_tau =
455
+ get_option<float>(options, "mirostat_tau", 5.00f);
456
+ params.sparams.mirostat_eta =
457
+ get_option<float>(options, "mirostat_eta", 0.10f);
458
+ params.sparams.penalty_last_n =
459
+ get_option<int32_t>(options, "penalty_last_n", 64);
460
+ params.sparams.penalty_repeat =
461
+ get_option<float>(options, "penalty_repeat", 1.00f);
462
+ params.sparams.penalty_freq =
463
+ get_option<float>(options, "penalty_freq", 0.00f);
435
464
  params.sparams.penalty_present =
436
- options.Has("penalty_present") ? options.Get("penalty_present").ToNumber()
437
- : 0.00f;
438
- params.sparams.penalize_nl = options.Has("penalize_nl")
439
- ? options.Get("penalize_nl").ToBoolean()
440
- : false;
441
- params.sparams.typical_p =
442
- options.Has("typical_p") ? options.Get("typical_p").ToNumber() : 1.00f;
443
- params.ignore_eos =
444
- options.Has("ignore_eos") ? options.Get("ignore_eos").ToBoolean() : false;
445
- params.sparams.grammar = options.Has("grammar")
446
- ? options.Get("grammar").ToString().Utf8Value()
447
- : "";
448
- params.n_keep = options.Has("n_keep") ? options.Get("n_keep").ToNumber() : 0;
449
- params.seed =
450
- options.Has("seed") ? options.Get("seed").ToNumber() : LLAMA_DEFAULT_SEED;
465
+ get_option<float>(options, "penalty_present", 0.00f);
466
+ params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
467
+ params.sparams.typical_p = get_option<float>(options, "typical_p", 1.00f);
468
+ params.ignore_eos = get_option<float>(options, "ignore_eos", false);
469
+ params.sparams.grammar = get_option<std::string>(options, "grammar", "");
470
+ params.n_keep = get_option<int32_t>(options, "n_keep", 0);
471
+ params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
451
472
  std::vector<std::string> stop_words;
452
- if (options.Has("stop")) {
473
+ if (options.Has("stop") && options.Get("stop").IsArray()) {
453
474
  auto stop_words_array = options.Get("stop").As<Napi::Array>();
454
475
  for (size_t i = 0; i < stop_words_array.Length(); i++) {
455
- stop_words.push_back(stop_words_array.Get(i).ToString());
476
+ stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
456
477
  }
457
478
  }
458
479
 
@@ -498,6 +519,16 @@ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
498
519
  return worker->Promise();
499
520
  }
500
521
 
522
+ // release(): Promise<void>
523
+ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
524
+ if (compl_worker != nullptr) {
525
+ compl_worker->Stop();
526
+ }
527
+ auto *worker = new DisposeWorker(info.Env(), this);
528
+ worker->Queue();
529
+ return worker->Promise();
530
+ }
531
+
501
532
  Napi::Object Init(Napi::Env env, Napi::Object exports) {
502
533
  LlamaContext::Export(env, exports);
503
534
  return exports;