node-llama-cpp 2.8.8 → 2.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/llama/addon.cpp CHANGED
@@ -157,7 +157,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
157
157
  }
158
158
 
159
159
  if (options.Has("embedding")) {
160
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
160
+ context_params.embeddings = options.Get("embedding").As<Napi::Boolean>().Value();
161
161
  }
162
162
 
163
163
  if (options.Has("threads")) {
@@ -167,10 +167,10 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
167
167
  }
168
168
 
169
169
  ctx = llama_new_context_with_model(model->model, context_params);
170
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
170
+ Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_state_get_size(ctx));
171
171
  }
172
172
  ~LLAMAContext() {
173
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
173
+ Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_state_get_size(ctx));
174
174
  llama_free(ctx);
175
175
  model->Unref();
176
176
  }
@@ -409,7 +409,7 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
409
409
  llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
410
410
  llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
411
411
  llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
412
- llama_sample_temperature(ctx->ctx, &candidates_p, temperature);
412
+ llama_sample_temp(ctx->ctx, &candidates_p, temperature);
413
413
  new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
414
414
  }
415
415
 
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b2249"
2
+ "release": "b2750"
3
3
  }
Binary file
@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
89
89
  ```
90
90
  ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
91
91
  ```
92
+
93
+ ## Troubleshooting
94
+
95
+ Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
96
+
97
+ ### Efficient optional repetitions
98
+
99
+ A common pattern is to allow repetitions of a pattern `x` up to N times.
100
+
101
+ While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)
@@ -15,7 +15,7 @@ array ::=
15
15
 
16
16
  string ::=
17
17
  "\"" (
18
- [^"\\] |
18
+ [^"\\\x7F\x00-\x1F] |
19
19
  "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
20
20
  )* "\"" ws
21
21
 
@@ -24,7 +24,7 @@ array ::=
24
24
 
25
25
  string ::=
26
26
  "\"" (
27
- [^"\\] |
27
+ [^"\\\x7F\x00-\x1F] |
28
28
  "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
29
29
  )* "\"" ws
30
30
 
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-llama-cpp",
3
- "version": "2.8.8",
3
+ "version": "2.8.10",
4
4
  "description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",