@fugood/llama.node 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +11 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +16 -0
- package/src/LlamaContext.cpp +5 -0
package/CMakeLists.txt
CHANGED
|
@@ -7,7 +7,7 @@ project (llama-node)
|
|
|
7
7
|
set(CMAKE_CXX_STANDARD 17)
|
|
8
8
|
|
|
9
9
|
execute_process(COMMAND
|
|
10
|
-
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/
|
|
10
|
+
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/llama.cpp.patch
|
|
11
11
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
12
12
|
)
|
|
13
13
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -16,6 +16,7 @@ export type LlamaModelOptions = {
|
|
|
16
16
|
n_gpu_layers?: number
|
|
17
17
|
use_mlock?: boolean
|
|
18
18
|
use_mmap?: boolean
|
|
19
|
+
vocab_only?: boolean
|
|
19
20
|
}
|
|
20
21
|
|
|
21
22
|
export type LlamaCompletionOptions = {
|
|
@@ -53,6 +54,16 @@ export type LlamaCompletionResult = {
|
|
|
53
54
|
tokens_predicted: number
|
|
54
55
|
tokens_evaluated: number
|
|
55
56
|
truncated: boolean
|
|
57
|
+
timings: {
|
|
58
|
+
prompt_n: number
|
|
59
|
+
prompt_ms: number
|
|
60
|
+
prompt_per_token_ms: number
|
|
61
|
+
prompt_per_second: number
|
|
62
|
+
predicted_n: number
|
|
63
|
+
predicted_ms: number
|
|
64
|
+
predicted_per_token_ms: number
|
|
65
|
+
predicted_per_second: number
|
|
66
|
+
}
|
|
56
67
|
}
|
|
57
68
|
|
|
58
69
|
export type LlamaCompletionToken = {
|
package/package.json
CHANGED
|
@@ -159,6 +159,22 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
159
159
|
Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
|
|
160
160
|
result.Set("text",
|
|
161
161
|
Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
|
|
162
|
+
|
|
163
|
+
auto ctx = _sess->context();
|
|
164
|
+
const auto timings_token = llama_perf_context(ctx);
|
|
165
|
+
|
|
166
|
+
auto timingsResult = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
167
|
+
timingsResult.Set("prompt_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_p_eval));
|
|
168
|
+
timingsResult.Set("prompt_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms));
|
|
169
|
+
timingsResult.Set("prompt_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms / timings_token.n_p_eval));
|
|
170
|
+
timingsResult.Set("prompt_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_p_eval_ms * timings_token.n_p_eval));
|
|
171
|
+
timingsResult.Set("predicted_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_eval));
|
|
172
|
+
timingsResult.Set("predicted_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms));
|
|
173
|
+
timingsResult.Set("predicted_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms / timings_token.n_eval));
|
|
174
|
+
timingsResult.Set("predicted_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_eval_ms * timings_token.n_eval));
|
|
175
|
+
|
|
176
|
+
result.Set("timings", timingsResult);
|
|
177
|
+
|
|
162
178
|
Napi::Promise::Deferred::Resolve(result);
|
|
163
179
|
}
|
|
164
180
|
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -76,6 +76,11 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
76
76
|
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
params.vocab_only = get_option<bool>(options, "vocab_only", false);
|
|
80
|
+
if (params.vocab_only) {
|
|
81
|
+
params.warmup = false;
|
|
82
|
+
}
|
|
83
|
+
|
|
79
84
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
80
85
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
81
86
|
params.embedding = get_option<bool>(options, "embedding", false);
|