npm - node-llama-cpp - Versions diffs - 2.8.8 → 2.8.10 - Mend

node-llama-cpp 2.8.8 → 2.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/llama/addon.cpp +4 -4
package/llama/binariesGithubRelease.json +1 -1
package/llama/gitRelease.bundle +0 -0
package/llama/grammars/README.md +10 -0
package/llama/grammars/json.gbnf +1 -1
package/llama/grammars/json_arr.gbnf +1 -1
package/llamaBins/linux-arm64/llama-addon.node +0 -0
package/llamaBins/linux-armv7l/llama-addon.node +0 -0
package/llamaBins/linux-x64/llama-addon.node +0 -0
package/llamaBins/mac-arm64/default.metallib +0 -0
package/llamaBins/mac-arm64/llama-addon.node +0 -0
package/llamaBins/mac-x64/default.metallib +0 -0
package/llamaBins/mac-x64/llama-addon.node +0 -0
package/llamaBins/win-x64/llama-addon.node +0 -0
package/package.json +1 -1
package/llamaBins/mac-arm64/ggml-metal.metal +0 -6718
package/llamaBins/mac-x64/ggml-metal.metal +0 -6718

package/llama/addon.cpp CHANGED Viewed

@@ -157,7 +157,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
       }
       if (options.Has("embedding")) {
-        context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
+        context_params.embeddings = options.Get("embedding").As<Napi::Boolean>().Value();
       }
       if (options.Has("threads")) {
@@ -167,10 +167,10 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
     }
     ctx = llama_new_context_with_model(model->model, context_params);
-    Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
+    Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_state_get_size(ctx));
   }
   ~LLAMAContext() {
-    Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
+    Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_state_get_size(ctx));
     llama_free(ctx);
     model->Unref();
   }
@@ -409,7 +409,7 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
         llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
         llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
         llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
-        llama_sample_temperature(ctx->ctx, &candidates_p, temperature);
+        llama_sample_temp(ctx->ctx, &candidates_p, temperature);
         new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
     }

package/llama/binariesGithubRelease.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-    "release": "b2249"
+    "release": "b2750"
 }

package/llama/gitRelease.bundle CHANGED Viewed

Binary file

package/llama/grammars/README.md CHANGED Viewed

@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
 ```
 ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
 ```
+## Troubleshooting
+Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
+### Efficient optional repetitions
+A common pattern is to allow repetitions of a pattern `x` up to N times.
+While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)

package/llama/grammars/json.gbnf CHANGED Viewed

@@ -15,7 +15,7 @@ array  ::=
 string ::=
   "\"" (
-    [^"\\] |
+    [^"\\\x7F\x00-\x1F] |
     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
   )* "\"" ws

package/llama/grammars/json_arr.gbnf CHANGED Viewed

@@ -24,7 +24,7 @@ array  ::=
 string ::=
   "\"" (
-    [^"\\] |
+    [^"\\\x7F\x00-\x1F] |
     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
   )* "\"" ws

package/llamaBins/linux-arm64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-armv7l/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/linux-x64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/mac-arm64/default.metallib ADDED Viewed

Binary file

package/llamaBins/mac-arm64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/mac-x64/default.metallib ADDED Viewed

Binary file

package/llamaBins/mac-x64/llama-addon.node CHANGED Viewed

Binary file

package/llamaBins/win-x64/llama-addon.node CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "node-llama-cpp",
-  "version": "2.8.8",
+  "version": "2.8.10",
   "description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
   "main": "dist/index.js",
   "type": "module",