node-llama-cpp 2.8.8 → 2.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/llama/addon.cpp +4 -4
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +10 -0
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/default.metallib +0 -0
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/default.metallib +0 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/package.json +1 -1
- package/llamaBins/mac-arm64/ggml-metal.metal +0 -6718
- package/llamaBins/mac-x64/ggml-metal.metal +0 -6718
package/llama/addon.cpp
CHANGED
|
@@ -157,7 +157,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
157
157
|
}
|
|
158
158
|
|
|
159
159
|
if (options.Has("embedding")) {
|
|
160
|
-
context_params.
|
|
160
|
+
context_params.embeddings = options.Get("embedding").As<Napi::Boolean>().Value();
|
|
161
161
|
}
|
|
162
162
|
|
|
163
163
|
if (options.Has("threads")) {
|
|
@@ -167,10 +167,10 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
167
167
|
}
|
|
168
168
|
|
|
169
169
|
ctx = llama_new_context_with_model(model->model, context_params);
|
|
170
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(),
|
|
170
|
+
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_state_get_size(ctx));
|
|
171
171
|
}
|
|
172
172
|
~LLAMAContext() {
|
|
173
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)
|
|
173
|
+
Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_state_get_size(ctx));
|
|
174
174
|
llama_free(ctx);
|
|
175
175
|
model->Unref();
|
|
176
176
|
}
|
|
@@ -409,7 +409,7 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
|
409
409
|
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
410
410
|
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
411
411
|
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
412
|
-
|
|
412
|
+
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
413
413
|
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
414
414
|
}
|
|
415
415
|
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
package/llama/grammars/README.md
CHANGED
|
@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
|
|
|
89
89
|
```
|
|
90
90
|
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
|
|
91
91
|
```
|
|
92
|
+
|
|
93
|
+
## Troubleshooting
|
|
94
|
+
|
|
95
|
+
Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
|
|
96
|
+
|
|
97
|
+
### Efficient optional repetitions
|
|
98
|
+
|
|
99
|
+
A common pattern is to allow repetitions of a pattern `x` up to N times.
|
|
100
|
+
|
|
101
|
+
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)
|
package/llama/grammars/json.gbnf
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.10",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|