@fugood/llama.node 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.ts CHANGED
@@ -55,6 +55,10 @@ export type LlamaModelOptions = {
55
55
  * Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.
56
56
  */
57
57
  kv_unified?: boolean
58
+ /**
59
+ * Use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
60
+ */
61
+ swa_full?: boolean
58
62
  use_mlock?: boolean
59
63
  use_mmap?: boolean
60
64
  vocab_only?: boolean
@@ -102,6 +106,8 @@ export type LlamaCompletionOptions = {
102
106
  dry_base?: number
103
107
  dry_allowed_length?: number
104
108
  dry_penalty_last_n?: number
109
+ dry_sequence_breakers?: string[]
110
+ top_n_sigma?: number
105
111
  n_predict?: number
106
112
  max_length?: number
107
113
  max_tokens?: number
@@ -111,6 +117,9 @@ export type LlamaCompletionOptions = {
111
117
  grammar_lazy?: boolean
112
118
  grammar_triggers?: { type: number; value: string; token?: number }[]
113
119
  preserved_tokens?: string[]
120
+ json_schema?: string
121
+ logit_bias?: number[][]
122
+ ignore_eos?: boolean
114
123
  /**
115
124
  * Path(s) to media file(s) to process before generating text.
116
125
  * When provided, the media will be processed and added to the context.
@@ -134,6 +143,7 @@ export type LlamaCompletionResult = {
134
143
  tokens_evaluated: number
135
144
  truncated: boolean
136
145
  context_full: boolean
146
+ interrupted: boolean
137
147
  audio_tokens?: Array<number>
138
148
  timings: {
139
149
  prompt_n: number
@@ -265,6 +275,9 @@ export interface LlamaContext {
265
275
  parallel_tool_calls?: boolean
266
276
  tool_choice?: string
267
277
  enable_thinking?: boolean
278
+ add_generation_prompt?: boolean
279
+ now?: string | number
280
+ chat_template_kwargs?: Record<string, string>
268
281
  },
269
282
  ): JinjaFormattedChatResult | string
270
283
  completion(
package/lib/index.js CHANGED
@@ -145,6 +145,9 @@ class LlamaContextWrapper {
145
145
  parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
146
146
  tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
147
147
  enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
148
+ add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
149
+ now: params === null || params === void 0 ? void 0 : params.now,
150
+ chat_template_kwargs: params === null || params === void 0 ? void 0 : params.chat_template_kwargs,
148
151
  });
149
152
  if (!useJinja) {
150
153
  return {
package/lib/index.ts CHANGED
@@ -166,6 +166,9 @@ class LlamaContextWrapper {
166
166
  parallel_tool_calls?: boolean
167
167
  tool_choice?: string,
168
168
  enable_thinking?: boolean,
169
+ add_generation_prompt?: boolean,
170
+ now?: string | number,
171
+ chat_template_kwargs?: Record<string, string>,
169
172
  },
170
173
  ): FormattedChatResult {
171
174
  const {
@@ -186,6 +189,9 @@ class LlamaContextWrapper {
186
189
  parallel_tool_calls: params?.parallel_tool_calls,
187
190
  tool_choice: params?.tool_choice,
188
191
  enable_thinking: params?.enable_thinking ?? true,
192
+ add_generation_prompt: params?.add_generation_prompt,
193
+ now: params?.now,
194
+ chat_template_kwargs: params?.chat_template_kwargs,
189
195
  })
190
196
 
191
197
  if (!useJinja) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.1.1",
4
+ "version": "1.1.3",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -71,19 +71,19 @@
71
71
  "CMakeLists.txt"
72
72
  ],
73
73
  "optionalDependencies": {
74
- "@fugood/node-llama-linux-x64": "1.1.1",
75
- "@fugood/node-llama-linux-x64-vulkan": "1.1.1",
76
- "@fugood/node-llama-linux-x64-cuda": "1.1.1",
77
- "@fugood/node-llama-linux-arm64": "1.1.1",
78
- "@fugood/node-llama-linux-arm64-vulkan": "1.1.1",
79
- "@fugood/node-llama-linux-arm64-cuda": "1.1.1",
80
- "@fugood/node-llama-win32-x64": "1.1.1",
81
- "@fugood/node-llama-win32-x64-vulkan": "1.1.1",
82
- "@fugood/node-llama-win32-x64-cuda": "1.1.1",
83
- "@fugood/node-llama-win32-arm64": "1.1.1",
84
- "@fugood/node-llama-win32-arm64-vulkan": "1.1.1",
85
- "@fugood/node-llama-darwin-x64": "1.1.1",
86
- "@fugood/node-llama-darwin-arm64": "1.1.1"
74
+ "@fugood/node-llama-linux-x64": "1.1.3",
75
+ "@fugood/node-llama-linux-x64-vulkan": "1.1.3",
76
+ "@fugood/node-llama-linux-x64-cuda": "1.1.3",
77
+ "@fugood/node-llama-linux-arm64": "1.1.3",
78
+ "@fugood/node-llama-linux-arm64-vulkan": "1.1.3",
79
+ "@fugood/node-llama-linux-arm64-cuda": "1.1.3",
80
+ "@fugood/node-llama-win32-x64": "1.1.3",
81
+ "@fugood/node-llama-win32-x64-vulkan": "1.1.3",
82
+ "@fugood/node-llama-win32-x64-cuda": "1.1.3",
83
+ "@fugood/node-llama-win32-arm64": "1.1.3",
84
+ "@fugood/node-llama-win32-arm64-vulkan": "1.1.3",
85
+ "@fugood/node-llama-darwin-x64": "1.1.3",
86
+ "@fugood/node-llama-darwin-arm64": "1.1.3"
87
87
  },
88
88
  "devDependencies": {
89
89
  "@babel/preset-env": "^7.24.4",
@@ -129,7 +129,7 @@ void LlamaCompletionWorker::Execute() {
129
129
  _sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
130
130
 
131
131
  auto embd = _sess->tokens_ptr();
132
- for (int i = 0; (i < max_len || _stop) && !_params.vocab_only; i++) {
132
+ for (int i = 0; (i < max_len || _interrupted) && !_params.vocab_only; i++) {
133
133
  // check if we need to remove some tokens
134
134
  if (embd->size() >= _params.n_ctx) {
135
135
  if (!_params.ctx_shift) {
@@ -256,6 +256,7 @@ void LlamaCompletionWorker::OnOK() {
256
256
  _result.tokens_predicted));
257
257
  result.Set("truncated", Napi::Boolean::New(env, _result.truncated));
258
258
  result.Set("context_full", Napi::Boolean::New(env, _result.context_full));
259
+ result.Set("interrupted", Napi::Boolean::New(env, _interrupted));
259
260
  result.Set("text", Napi::String::New(env, _result.text.c_str()));
260
261
  result.Set("stopped_eos", Napi::Boolean::New(env, _result.stopped_eos));
261
262
  result.Set("stopped_words", Napi::Boolean::New(env, _result.stopped_words));
@@ -267,7 +268,7 @@ void LlamaCompletionWorker::OnOK() {
267
268
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
268
269
  std::string reasoning_content = "";
269
270
  std::string content;
270
- if (!_stop) {
271
+ if (!_interrupted) {
271
272
  try {
272
273
  common_chat_syntax chat_syntax;
273
274
  chat_syntax.format = static_cast<common_chat_format>(_chat_format);
@@ -34,7 +34,7 @@ public:
34
34
 
35
35
  void OnComplete(std::function<void()> cb) { _onComplete = cb; }
36
36
 
37
- void SetStop() { _stop = true; }
37
+ void SetStop() { _interrupted = true; }
38
38
 
39
39
  protected:
40
40
  void Execute() override;
@@ -52,7 +52,7 @@ private:
52
52
  std::vector<llama_token> _guide_tokens;
53
53
  std::function<void()> _onComplete;
54
54
  bool _has_callback = false;
55
- bool _stop = false;
55
+ bool _interrupted = false;
56
56
  Napi::ThreadSafeFunction _tsfn;
57
57
  bool _next_token_uses_guide_token = true;
58
58
  bool _has_vocoder;
@@ -248,6 +248,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
248
248
  get_option<std::string>(options, "cache_type_v", "f16").c_str());
249
249
  params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
250
250
  params.kv_unified = get_option<bool>(options, "kv_unified", false);
251
+ params.swa_full = get_option<bool>(options, "swa_full", false);
251
252
 
252
253
  params.use_mlock = get_option<bool>(options, "use_mlock", false);
253
254
  params.use_mmap = get_option<bool>(options, "use_mmap", true);
@@ -504,7 +505,10 @@ common_chat_params getFormattedChatWithJinja(
504
505
  const std::string &chat_template, const std::string &json_schema,
505
506
  const std::string &tools, const bool &parallel_tool_calls,
506
507
  const std::string &tool_choice,
507
- const bool &enable_thinking
508
+ const bool &enable_thinking,
509
+ const bool &add_generation_prompt,
510
+ const std::string &now_str,
511
+ const std::map<std::string, std::string> &chat_template_kwargs
508
512
  ) {
509
513
  common_chat_templates_inputs inputs;
510
514
  inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
@@ -520,6 +524,21 @@ common_chat_params getFormattedChatWithJinja(
520
524
  inputs.json_schema = json::parse(json_schema);
521
525
  }
522
526
  inputs.enable_thinking = enable_thinking;
527
+ inputs.add_generation_prompt = add_generation_prompt;
528
+
529
+ // Handle now parameter - parse timestamp or use current time
530
+ if (!now_str.empty()) {
531
+ try {
532
+ // Try to parse as timestamp (seconds since epoch)
533
+ auto timestamp = std::stoll(now_str);
534
+ inputs.now = std::chrono::system_clock::from_time_t(timestamp);
535
+ } catch (...) {
536
+ // If parsing fails, use current time
537
+ inputs.now = std::chrono::system_clock::now();
538
+ }
539
+ }
540
+
541
+ inputs.chat_template_kwargs = chat_template_kwargs;
523
542
 
524
543
  // If chat_template is provided, create new one and use it (probably slow)
525
544
  if (!chat_template.empty()) {
@@ -594,12 +613,26 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
594
613
  get_option<bool>(params, "parallel_tool_calls", false);
595
614
  auto tool_choice = get_option<std::string>(params, "tool_choice", "");
596
615
  auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
616
+ auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
617
+ auto now_str = get_option<std::string>(params, "now", "");
618
+
619
+ std::map<std::string, std::string> chat_template_kwargs;
620
+ if (params.Has("chat_template_kwargs") && params.Get("chat_template_kwargs").IsObject()) {
621
+ auto kwargs_obj = params.Get("chat_template_kwargs").As<Napi::Object>();
622
+ auto props = kwargs_obj.GetPropertyNames();
623
+ for (uint32_t i = 0; i < props.Length(); i++) {
624
+ auto key = props.Get(i).ToString().Utf8Value();
625
+ auto val = kwargs_obj.Get(key).ToString().Utf8Value();
626
+ chat_template_kwargs[key] = val;
627
+ }
628
+ }
597
629
 
598
630
  common_chat_params chatParams;
599
631
  try {
600
632
  chatParams = getFormattedChatWithJinja(
601
633
  _sess, _templates, messages, chat_template, json_schema_str, tools_str,
602
- parallel_tool_calls, tool_choice, enable_thinking);
634
+ parallel_tool_calls, tool_choice, enable_thinking,
635
+ add_generation_prompt, now_str, chat_template_kwargs);
603
636
  } catch (const std::exception &e) {
604
637
  Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
605
638
  return env.Undefined();
@@ -808,13 +841,27 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
808
841
  auto tool_choice =
809
842
  get_option<std::string>(options, "tool_choice", "none");
810
843
  auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
844
+ auto add_generation_prompt = get_option<bool>(options, "add_generation_prompt", true);
845
+ auto now_str = get_option<std::string>(options, "now", "");
846
+
847
+ std::map<std::string, std::string> chat_template_kwargs;
848
+ if (options.Has("chat_template_kwargs") && options.Get("chat_template_kwargs").IsObject()) {
849
+ auto kwargs_obj = options.Get("chat_template_kwargs").As<Napi::Object>();
850
+ auto props = kwargs_obj.GetPropertyNames();
851
+ for (uint32_t i = 0; i < props.Length(); i++) {
852
+ auto key = props.Get(i).ToString().Utf8Value();
853
+ auto val = kwargs_obj.Get(key).ToString().Utf8Value();
854
+ chat_template_kwargs[key] = val;
855
+ }
856
+ }
811
857
 
812
858
  common_chat_params chatParams;
813
859
 
814
860
  try {
815
861
  chatParams = getFormattedChatWithJinja(
816
862
  _sess, _templates, json_stringify(messages), chat_template,
817
- json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking);
863
+ json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking,
864
+ add_generation_prompt, now_str, chat_template_kwargs);
818
865
  } catch (const std::exception &e) {
819
866
  Napi::Error::New(env, e.what()).ThrowAsJavaScriptException();
820
867
  return env.Undefined();