@fugood/llama.node 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -7,7 +7,7 @@ project (llama-node)
7
7
  set(CMAKE_CXX_STANDARD 17)
8
8
 
9
9
  execute_process(COMMAND
10
- git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
10
+ git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/llama.cpp.patch
11
11
  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
12
12
  )
13
13
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -16,6 +16,7 @@ export type LlamaModelOptions = {
16
16
  n_gpu_layers?: number
17
17
  use_mlock?: boolean
18
18
  use_mmap?: boolean
19
+ vocab_only?: boolean
19
20
  }
20
21
 
21
22
  export type LlamaCompletionOptions = {
@@ -53,6 +54,16 @@ export type LlamaCompletionResult = {
53
54
  tokens_predicted: number
54
55
  tokens_evaluated: number
55
56
  truncated: boolean
57
+ timings: {
58
+ prompt_n: number
59
+ prompt_ms: number
60
+ prompt_per_token_ms: number
61
+ prompt_per_second: number
62
+ predicted_n: number
63
+ predicted_ms: number
64
+ predicted_per_token_ms: number
65
+ predicted_per_second: number
66
+ }
56
67
  }
57
68
 
58
69
  export type LlamaCompletionToken = {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.4",
4
+ "version": "0.3.5",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -159,6 +159,22 @@ void LlamaCompletionWorker::OnOK() {
159
159
  Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
160
160
  result.Set("text",
161
161
  Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));
162
+
163
+ auto ctx = _sess->context();
164
+ const auto timings_token = llama_perf_context(ctx);
165
+
166
+ auto timingsResult = Napi::Object::New(Napi::AsyncWorker::Env());
167
+ timingsResult.Set("prompt_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_p_eval));
168
+ timingsResult.Set("prompt_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms));
169
+ timingsResult.Set("prompt_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms / timings_token.n_p_eval));
170
+ timingsResult.Set("prompt_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_p_eval_ms * timings_token.n_p_eval));
171
+ timingsResult.Set("predicted_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_eval));
172
+ timingsResult.Set("predicted_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms));
173
+ timingsResult.Set("predicted_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms / timings_token.n_eval));
174
+ timingsResult.Set("predicted_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_eval_ms * timings_token.n_eval));
175
+
176
+ result.Set("timings", timingsResult);
177
+
162
178
  Napi::Promise::Deferred::Resolve(result);
163
179
  }
164
180
 
@@ -76,6 +76,11 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
76
76
  Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
77
77
  }
78
78
 
79
+ params.vocab_only = get_option<bool>(options, "vocab_only", false);
80
+ if (params.vocab_only) {
81
+ params.warmup = false;
82
+ }
83
+
79
84
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
80
85
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
81
86
  params.embedding = get_option<bool>(options, "embedding", false);