@fugood/llama.node 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -114,6 +114,9 @@ set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
114
114
  set(LLAMA_CURL OFF CACHE BOOL "Build curl")
115
115
 
116
116
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
117
+
118
+ add_definitions(-DGGML_MAX_NAME=80)
119
+
117
120
  add_subdirectory("src/llama.cpp")
118
121
  add_subdirectory("src/llama.cpp/tools/mtmd")
119
122
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.1.3",
4
+ "version": "1.1.4",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -71,19 +71,19 @@
71
71
  "CMakeLists.txt"
72
72
  ],
73
73
  "optionalDependencies": {
74
- "@fugood/node-llama-linux-x64": "1.1.3",
75
- "@fugood/node-llama-linux-x64-vulkan": "1.1.3",
76
- "@fugood/node-llama-linux-x64-cuda": "1.1.3",
77
- "@fugood/node-llama-linux-arm64": "1.1.3",
78
- "@fugood/node-llama-linux-arm64-vulkan": "1.1.3",
79
- "@fugood/node-llama-linux-arm64-cuda": "1.1.3",
80
- "@fugood/node-llama-win32-x64": "1.1.3",
81
- "@fugood/node-llama-win32-x64-vulkan": "1.1.3",
82
- "@fugood/node-llama-win32-x64-cuda": "1.1.3",
83
- "@fugood/node-llama-win32-arm64": "1.1.3",
84
- "@fugood/node-llama-win32-arm64-vulkan": "1.1.3",
85
- "@fugood/node-llama-darwin-x64": "1.1.3",
86
- "@fugood/node-llama-darwin-arm64": "1.1.3"
74
+ "@fugood/node-llama-linux-x64": "1.1.4",
75
+ "@fugood/node-llama-linux-x64-vulkan": "1.1.4",
76
+ "@fugood/node-llama-linux-x64-cuda": "1.1.4",
77
+ "@fugood/node-llama-linux-arm64": "1.1.4",
78
+ "@fugood/node-llama-linux-arm64-vulkan": "1.1.4",
79
+ "@fugood/node-llama-linux-arm64-cuda": "1.1.4",
80
+ "@fugood/node-llama-win32-x64": "1.1.4",
81
+ "@fugood/node-llama-win32-x64-vulkan": "1.1.4",
82
+ "@fugood/node-llama-win32-x64-cuda": "1.1.4",
83
+ "@fugood/node-llama-win32-arm64": "1.1.4",
84
+ "@fugood/node-llama-win32-arm64-vulkan": "1.1.4",
85
+ "@fugood/node-llama-darwin-x64": "1.1.4",
86
+ "@fugood/node-llama-darwin-arm64": "1.1.4"
87
87
  },
88
88
  "devDependencies": {
89
89
  "@babel/preset-env": "^7.24.4",
@@ -64,6 +64,7 @@ void LlamaCompletionWorker::Execute() {
64
64
  size_t n_input = 0;
65
65
  const auto model = _sess->model();
66
66
  auto vocab = llama_model_get_vocab(model);
67
+ const bool is_enc_dec = llama_model_has_encoder(model);
67
68
 
68
69
  const bool add_bos = llama_vocab_get_add_bos(vocab);
69
70
  auto ctx = _sess->context();
@@ -110,7 +111,7 @@ void LlamaCompletionWorker::Execute() {
110
111
  } else {
111
112
  // Text-only path
112
113
  std::vector<llama_token> prompt_tokens =
113
- ::common_tokenize(ctx, _params.prompt, add_bos, true);
114
+ ::common_tokenize(ctx, _params.prompt, add_bos || is_enc_dec, true);
114
115
  n_input = prompt_tokens.size();
115
116
 
116
117
  if (_sess->tokens_ptr()->size() > 0) {
@@ -126,9 +127,47 @@ void LlamaCompletionWorker::Execute() {
126
127
  }
127
128
 
128
129
  const int max_len = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
129
- _sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
130
-
131
130
  auto embd = _sess->tokens_ptr();
131
+ embd->reserve(embd->size() + max_len);
132
+
133
+ if (is_enc_dec) {
134
+ if (n_input > 0) {
135
+ // Decode tokens in batches using n_batch as chunk size
136
+ int n_past_batch = n_cur;
137
+ int n_remaining = n_input;
138
+
139
+ while (n_remaining > 0) {
140
+ int n_eval = n_remaining;
141
+ if (n_eval > _params.n_batch) {
142
+ n_eval = _params.n_batch;
143
+ }
144
+
145
+ int ret = llama_encode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
146
+ if (ret < 0) {
147
+ SetError("Failed to encode token batch, code: " + std::to_string(ret) +
148
+ ", n_eval: " + std::to_string(n_eval) +
149
+ ", n_past_batch: " + std::to_string(n_past_batch));
150
+ _sess->get_mutex().unlock();
151
+ return;
152
+ }
153
+
154
+ n_past_batch += n_eval;
155
+ n_remaining -= n_eval;
156
+ n_cur += n_eval;
157
+ }
158
+ }
159
+ _result.tokens_evaluated += n_input;
160
+
161
+ llama_token decode_bos = llama_model_decoder_start_token(model);
162
+ if (decode_bos == LLAMA_TOKEN_NULL) {
163
+ decode_bos = llama_vocab_bos(vocab);
164
+ }
165
+
166
+ embd->emplace_back(decode_bos);
167
+ common_sampler_accept(sampling.get(), decode_bos, false);
168
+ n_input = 1;
169
+ }
170
+
132
171
  for (int i = 0; (i < max_len || _interrupted) && !_params.vocab_only; i++) {
133
172
  // check if we need to remove some tokens
134
173
  if (embd->size() >= _params.n_ctx) {
@@ -166,13 +205,14 @@ void LlamaCompletionWorker::Execute() {
166
205
  if (n_eval > _params.n_batch) {
167
206
  n_eval = _params.n_batch;
168
207
  }
169
-
208
+
170
209
  int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
171
210
  if (ret < 0) {
172
211
  SetError("Failed to decode token batch, code: " + std::to_string(ret) +
173
212
  ", n_eval: " + std::to_string(n_eval) +
174
213
  ", n_past_batch: " + std::to_string(n_past_batch));
175
- break;
214
+ _sess->get_mutex().unlock();
215
+ return;
176
216
  }
177
217
 
178
218
  n_past_batch += n_eval;