@fugood/llama.node 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CMakeLists.txt +8 -3
  2. package/package.json +14 -14
  3. package/scripts/llama.cpp.patch +5 -5
  4. package/src/LlamaCompletionWorker.cpp +33 -33
  5. package/src/LlamaContext.cpp +17 -16
  6. package/src/llama.cpp/CMakeLists.txt +4 -0
  7. package/src/llama.cpp/common/CMakeLists.txt +6 -37
  8. package/src/llama.cpp/common/common.cpp +1 -5
  9. package/src/llama.cpp/common/download.cpp +47 -29
  10. package/src/llama.cpp/common/log.cpp +6 -0
  11. package/src/llama.cpp/common/log.h +2 -0
  12. package/src/llama.cpp/ggml/include/ggml.h +71 -0
  13. package/src/llama.cpp/ggml/src/CMakeLists.txt +16 -0
  14. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -3
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +29 -0
  16. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +283 -0
  17. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +1 -0
  18. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +235 -34
  19. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -277
  20. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +4 -0
  21. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +95 -42
  22. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +16 -0
  23. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +2 -0
  24. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +17 -0
  25. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +10 -0
  26. package/src/llama.cpp/src/CMakeLists.txt +6 -0
  27. package/src/llama.cpp/src/llama-arch.cpp +32 -0
  28. package/src/llama.cpp/src/llama-arch.h +2 -0
  29. package/src/llama.cpp/src/llama-graph.cpp +2 -1
  30. package/src/llama.cpp/src/llama-model.cpp +102 -0
  31. package/src/llama.cpp/src/llama-model.h +2 -0
  32. package/src/llama.cpp/src/llama-sampling.cpp +10 -5
  33. package/src/llama.cpp/src/llama-vocab.cpp +16 -1
  34. package/src/llama.cpp/src/llama-vocab.h +1 -0
  35. package/src/llama.cpp/src/models/afmoe.cpp +187 -0
  36. package/src/llama.cpp/src/models/models.h +4 -0
  37. package/src/llama.cpp/src/unicode.cpp +77 -0
package/CMakeLists.txt CHANGED
@@ -120,9 +120,14 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DEFINED GGML_OPENMP OR GGML_O
120
120
  endif()
121
121
 
122
122
  set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
123
-
123
+ set(LLAMA_BUILD_TOOLS OFF CACHE BOOL "Build tools")
124
+ set(LLAMA_BUILD_TESTS OFF CACHE BOOL "Build tests")
125
+ set(LLAMA_BUILD_SERVER OFF CACHE BOOL "Build server")
126
+ set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "Build examples")
124
127
  set(LLAMA_CURL OFF CACHE BOOL "Build curl")
125
128
 
129
+ set(LLAMA_INSTALL_VERSION "0.0.0") # TODO: Set the version number (0.0.<BUILD_NUMBER>)
130
+
126
131
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
127
132
 
128
133
  add_definitions(-DGGML_MAX_NAME=80)
@@ -172,7 +177,7 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
172
177
 
173
178
  add_library(win_dynamic_load ${WIN_DYNAMIC_LOAD_SRC})
174
179
  set_target_properties(win_dynamic_load PROPERTIES COMPILE_FLAGS "-Wno-implicit-function-declaration")
175
-
180
+
176
181
  unset(CMAKE_JS_SRC)
177
182
  unset(CMAKE_JS_LIB)
178
183
  unset(CMAKE_JS_NODELIB_DEF)
@@ -207,7 +212,7 @@ if(CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
207
212
  endif()
208
213
 
209
214
  if (GGML_METAL AND NOT GGML_METAL_EMBED_LIBRARY)
210
- # copy ${CMAKE_BINARY_DIR}/bin/default.metallib
215
+ # copy ${CMAKE_BINARY_DIR}/bin/default.metallib
211
216
  add_custom_command(
212
217
  TARGET copy_assets
213
218
  COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/default.metallib ${METAL_LIB_TARGET_PATH}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.2",
4
+ "version": "1.3.4",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,19 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.2",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.2",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.2",
78
- "@fugood/node-llama-linux-arm64": "1.3.2",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.2",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.2",
81
- "@fugood/node-llama-win32-x64": "1.3.2",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.2",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.2",
84
- "@fugood/node-llama-win32-arm64": "1.3.2",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.2",
86
- "@fugood/node-llama-darwin-x64": "1.3.2",
87
- "@fugood/node-llama-darwin-arm64": "1.3.2"
75
+ "@fugood/node-llama-linux-x64": "1.3.4",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.4",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.4",
78
+ "@fugood/node-llama-linux-arm64": "1.3.4",
79
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.4",
80
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.4",
81
+ "@fugood/node-llama-win32-x64": "1.3.4",
82
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.4",
83
+ "@fugood/node-llama-win32-x64-cuda": "1.3.4",
84
+ "@fugood/node-llama-win32-arm64": "1.3.4",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.4",
86
+ "@fugood/node-llama-darwin-x64": "1.3.4",
87
+ "@fugood/node-llama-darwin-arm64": "1.3.4"
88
88
  },
89
89
  "devDependencies": {
90
90
  "@babel/preset-env": "^7.24.4",
@@ -1,8 +1,8 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index 7086d08e5..9a727bcf8 100644
2
+ index 706fa32ee..248459903 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -172,9 +172,16 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -141,9 +141,16 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
@@ -85,10 +85,10 @@ index 50efb0d4e..f471a84c7 100644
85
85
  struct common_chat_tool_call {
86
86
  std::string name;
87
87
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
88
- index a8d709ab1..d8aed9c7e 100644
88
+ index 4dc95dcba..ea0ea86c0 100644
89
89
  --- a/src/llama.cpp/common/common.cpp
90
90
  +++ b/src/llama.cpp/common/common.cpp
91
- @@ -1159,6 +1159,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
91
+ @@ -1155,6 +1155,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
92
92
  mparams.n_gpu_layers = params.n_gpu_layers;
93
93
  }
94
94
 
@@ -109,7 +109,7 @@ index f42c083fa..c573cc812 100644
109
109
  int32_t n_ctx = 4096; // context size
110
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
111
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
112
- index a55191aed..53e318c62 100644
112
+ index e52e050a8..c1000c162 100644
113
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
114
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -9,10 +9,10 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
9
9
  for (size_t i = 0; i < probs.size(); i++) {
10
10
  const auto &prob = probs[i];
11
11
  Napi::Object token_obj = Napi::Object::New(env);
12
-
12
+
13
13
  std::string token_str = common_token_to_piece(ctx, prob.tok);
14
14
  token_obj.Set("content", Napi::String::New(env, token_str));
15
-
15
+
16
16
  Napi::Array token_probs = Napi::Array::New(env);
17
17
  for (size_t j = 0; j < prob.probs.size(); j++) {
18
18
  const auto &p = prob.probs[j];
@@ -83,10 +83,10 @@ void LlamaCompletionWorker::Execute() {
83
83
  }
84
84
 
85
85
  auto completion = _rn_ctx->completion;
86
-
86
+
87
87
  // Prepare completion context
88
88
  completion->rewind();
89
-
89
+
90
90
  // Set up parameters
91
91
  _rn_ctx->params.prompt = _params.prompt;
92
92
  _rn_ctx->params.sampling = _params.sampling;
@@ -95,50 +95,50 @@ void LlamaCompletionWorker::Execute() {
95
95
  _rn_ctx->params.n_ctx = _params.n_ctx;
96
96
  _rn_ctx->params.n_batch = _params.n_batch;
97
97
  _rn_ctx->params.ctx_shift = _params.ctx_shift;
98
-
98
+
99
99
  // Set prefill text
100
100
  completion->prefill_text = _prefill_text;
101
-
101
+
102
102
  // Set up TTS guide tokens if enabled
103
103
  if (_has_vocoder && _rn_ctx->tts_wrapper != nullptr) {
104
104
  _rn_ctx->tts_wrapper->guide_tokens = _guide_tokens;
105
105
  _rn_ctx->tts_wrapper->next_token_uses_guide_token = true;
106
106
  }
107
-
107
+
108
108
  // Initialize sampling
109
109
  if (!completion->initSampling()) {
110
110
  SetError("Failed to initialize sampling");
111
111
  return;
112
112
  }
113
-
113
+
114
114
  // Load prompt (handles both text-only and multimodal)
115
115
  completion->loadPrompt(_media_paths);
116
-
116
+
117
117
  // Check if context is full after loading prompt
118
118
  if (completion->context_full) {
119
119
  _result.context_full = true;
120
120
  return;
121
121
  }
122
-
122
+
123
123
  // Begin completion with chat format and reasoning settings
124
124
  completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open);
125
-
125
+
126
126
  // Main completion loop
127
127
  int token_count = 0;
128
128
  const int max_tokens = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
129
129
  while (completion->has_next_token && !_interrupted && token_count < max_tokens) {
130
130
  // Get next token using rn-llama completion
131
131
  rnllama::completion_token_output token_output = completion->doCompletion();
132
-
132
+
133
133
  if (token_output.tok == -1) {
134
134
  break;
135
135
  }
136
-
136
+
137
137
  token_count++;
138
-
138
+
139
139
  std::string token_text = common_token_to_piece(_rn_ctx->ctx, token_output.tok);
140
140
  _result.text += token_text;
141
-
141
+
142
142
  // Check for stopping strings after adding the token
143
143
  if (!_stop_words.empty()) {
144
144
  size_t stop_pos = completion->findStoppingStrings(_result.text, token_text.size(), rnllama::STOP_FULL);
@@ -148,7 +148,7 @@ void LlamaCompletionWorker::Execute() {
148
148
  break;
149
149
  }
150
150
  }
151
-
151
+
152
152
  // Handle streaming callback
153
153
  if (_has_callback && !completion->incomplete) {
154
154
  struct TokenData {
@@ -160,9 +160,9 @@ void LlamaCompletionWorker::Execute() {
160
160
  std::vector<rnllama::completion_token_output> completion_probabilities;
161
161
  llama_context* ctx;
162
162
  };
163
-
163
+
164
164
  auto partial_output = completion->parseChatOutput(true);
165
-
165
+
166
166
  // Extract completion probabilities if n_probs > 0, similar to iOS implementation
167
167
  std::vector<rnllama::completion_token_output> probs_output;
168
168
  if (_rn_ctx->params.sampling.n_probs > 0) {
@@ -171,23 +171,23 @@ void LlamaCompletionWorker::Execute() {
171
171
  size_t probs_stop_pos = std::min(_sent_token_probs_index + to_send_toks.size(), completion->generated_token_probs.size());
172
172
  if (probs_pos < probs_stop_pos) {
173
173
  probs_output = std::vector<rnllama::completion_token_output>(
174
- completion->generated_token_probs.begin() + probs_pos,
174
+ completion->generated_token_probs.begin() + probs_pos,
175
175
  completion->generated_token_probs.begin() + probs_stop_pos
176
176
  );
177
177
  }
178
178
  _sent_token_probs_index = probs_stop_pos;
179
179
  }
180
-
180
+
181
181
  TokenData *token_data = new TokenData{
182
- token_text,
183
- partial_output.content,
184
- partial_output.reasoning_content,
185
- partial_output.tool_calls,
182
+ token_text,
183
+ partial_output.content,
184
+ partial_output.reasoning_content,
185
+ partial_output.tool_calls,
186
186
  partial_output.accumulated_text,
187
187
  probs_output,
188
188
  _rn_ctx->ctx
189
189
  };
190
-
190
+
191
191
  _tsfn.BlockingCall(token_data, [](Napi::Env env, Napi::Function jsCallback,
192
192
  TokenData *data) {
193
193
  auto obj = Napi::Object::New(env);
@@ -216,25 +216,25 @@ void LlamaCompletionWorker::Execute() {
216
216
  obj.Set("tool_calls", tool_calls);
217
217
  }
218
218
  obj.Set("accumulated_text", Napi::String::New(env, data->accumulated_text));
219
-
219
+
220
220
  // Add completion_probabilities if available
221
221
  if (!data->completion_probabilities.empty()) {
222
222
  obj.Set("completion_probabilities", TokenProbsToArray(env, data->ctx, data->completion_probabilities));
223
223
  }
224
-
224
+
225
225
  delete data;
226
226
  jsCallback.Call({obj});
227
227
  });
228
228
  }
229
229
  }
230
-
230
+
231
231
  // Check stopping conditions
232
232
  if (token_count >= max_tokens) {
233
233
  _result.stopped_limited = true;
234
234
  } else if (!completion->has_next_token && completion->n_remain == 0) {
235
235
  _result.stopped_limited = true;
236
236
  }
237
-
237
+
238
238
  // Set completion results from rn-llama completion context
239
239
  // tokens_evaluated should include both prompt tokens and generated tokens that were processed
240
240
  _result.tokens_evaluated = completion->num_prompt_tokens + completion->num_tokens_predicted;
@@ -245,20 +245,20 @@ void LlamaCompletionWorker::Execute() {
245
245
  _result.stopped_words = completion->stopped_word;
246
246
  _result.stopping_word = completion->stopping_word;
247
247
  _result.stopped_limited = completion->stopped_limit;
248
-
248
+
249
249
  // Get audio tokens if TTS is enabled
250
250
  if (_has_vocoder && _rn_ctx->tts_wrapper != nullptr) {
251
251
  _result.audio_tokens = _rn_ctx->tts_wrapper->audio_tokens;
252
252
  }
253
-
253
+ common_perf_print(_rn_ctx->ctx, _rn_ctx->completion->ctx_sampling);
254
254
  // End completion
255
255
  completion->endCompletion();
256
-
256
+
257
257
  } catch (const std::exception &e) {
258
258
  SetError(e.what());
259
259
  return;
260
260
  }
261
-
261
+
262
262
  if (_onComplete) {
263
263
  _onComplete();
264
264
  }
@@ -376,6 +376,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
376
376
  _rn_ctx = nullptr;
377
377
  Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
378
378
  }
379
+ _rn_ctx->attachThreadpoolsIfAvailable();
379
380
 
380
381
  // Release progress callback after model is loaded
381
382
  if (has_progress_callback) {
@@ -386,7 +387,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
386
387
  if (!lora.empty()) {
387
388
  _rn_ctx->applyLoraAdapters(lora);
388
389
  }
389
-
390
+
390
391
  _info = common_params_get_system_info(params);
391
392
  }
392
393
 
@@ -636,7 +637,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
636
637
  auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
637
638
  auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
638
639
  auto now_str = get_option<std::string>(params, "now", "");
639
-
640
+
640
641
  std::map<std::string, std::string> chat_template_kwargs;
641
642
  if (params.Has("chat_template_kwargs") && params.Get("chat_template_kwargs").IsObject()) {
642
643
  auto kwargs_obj = params.Get("chat_template_kwargs").As<Napi::Object>();
@@ -873,7 +874,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
873
874
  auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
874
875
  auto add_generation_prompt = get_option<bool>(options, "add_generation_prompt", true);
875
876
  auto now_str = get_option<std::string>(options, "now", "");
876
-
877
+
877
878
  std::map<std::string, std::string> chat_template_kwargs;
878
879
  if (options.Has("chat_template_kwargs") && options.Get("chat_template_kwargs").IsObject()) {
879
880
  auto kwargs_obj = options.Get("chat_template_kwargs").As<Napi::Object>();
@@ -886,7 +887,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
886
887
  }
887
888
 
888
889
  common_chat_params chatParams;
889
-
890
+
890
891
  try {
891
892
  chatParams = _rn_ctx->getFormattedChatWithJinja(
892
893
  json_stringify(messages), chat_template,
@@ -1043,7 +1044,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
1043
1044
  }
1044
1045
  auto text = info[0].ToString().Utf8Value();
1045
1046
  std::vector<std::string> media_paths;
1046
-
1047
+
1047
1048
  if (info.Length() >= 2 && info[1].IsArray()) {
1048
1049
  // Direct array format: tokenize(text, [media_paths])
1049
1050
  auto media_paths_array = info[1].As<Napi::Array>();
@@ -1051,7 +1052,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
1051
1052
  media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
1052
1053
  }
1053
1054
  }
1054
-
1055
+
1055
1056
  auto *worker = new TokenizeWorker(info, _rn_ctx, text, media_paths);
1056
1057
  worker->Queue();
1057
1058
  return worker->Promise();
@@ -1072,7 +1073,7 @@ Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
1072
1073
  for (size_t i = 0; i < tokens.Length(); i++) {
1073
1074
  token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
1074
1075
  }
1075
-
1076
+
1076
1077
  auto *worker = new DetokenizeWorker(info, _rn_ctx, token_ids);
1077
1078
  worker->Queue();
1078
1079
  return worker->Promise();
@@ -1112,16 +1113,16 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
1112
1113
  Napi::TypeError::New(env, "Context is disposed")
1113
1114
  .ThrowAsJavaScriptException();
1114
1115
  }
1115
-
1116
+
1116
1117
  auto query = info[0].ToString().Utf8Value();
1117
1118
  auto documents_array = info[1].As<Napi::Array>();
1118
-
1119
+
1119
1120
  // Convert documents array to vector
1120
1121
  std::vector<std::string> documents;
1121
1122
  for (size_t i = 0; i < documents_array.Length(); i++) {
1122
1123
  documents.push_back(documents_array.Get(i).ToString().Utf8Value());
1123
1124
  }
1124
-
1125
+
1125
1126
  auto options = Napi::Object::New(env);
1126
1127
  if (info.Length() >= 3 && info[2].IsObject()) {
1127
1128
  options = info[2].As<Napi::Object>();
@@ -1130,7 +1131,7 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
1130
1131
  common_params rerankParams;
1131
1132
  rerankParams.embedding = true;
1132
1133
  rerankParams.embd_normalize = get_option<int32_t>(options, "normalize", -1);
1133
-
1134
+
1134
1135
  auto *worker = new RerankWorker(info, _rn_ctx, query, documents, rerankParams);
1135
1136
  worker->Queue();
1136
1137
  return worker->Promise();
@@ -1379,13 +1380,13 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
1379
1380
  }
1380
1381
  auto text = info[1].ToString().Utf8Value();
1381
1382
  auto speaker_json = info[0].IsString() ? info[0].ToString().Utf8Value() : "";
1382
-
1383
+
1383
1384
  if (!_rn_ctx->tts_wrapper) {
1384
1385
  Napi::Error::New(env, "Vocoder not initialized")
1385
1386
  .ThrowAsJavaScriptException();
1386
1387
  return env.Undefined();
1387
1388
  }
1388
-
1389
+
1389
1390
  auto result_data = _rn_ctx->tts_wrapper->getFormattedAudioCompletion(_rn_ctx, speaker_json, text);
1390
1391
  Napi::Object result = Napi::Object::New(env);
1391
1392
  result.Set("prompt", Napi::String::New(env, result_data.prompt));
@@ -1406,13 +1407,13 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
1406
1407
  return env.Undefined();
1407
1408
  }
1408
1409
  auto text = info[0].ToString().Utf8Value();
1409
-
1410
+
1410
1411
  if (!_rn_ctx->tts_wrapper) {
1411
1412
  Napi::Error::New(env, "Vocoder not initialized")
1412
1413
  .ThrowAsJavaScriptException();
1413
1414
  return env.Undefined();
1414
1415
  }
1415
-
1416
+
1416
1417
  auto result = _rn_ctx->tts_wrapper->getAudioCompletionGuideTokens(_rn_ctx, text);
1417
1418
  auto tokens = Napi::Int32Array::New(env, result.size());
1418
1419
  memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
@@ -1448,7 +1449,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
1448
1449
  .ThrowAsJavaScriptException();
1449
1450
  return env.Undefined();
1450
1451
  }
1451
-
1452
+
1452
1453
  auto *worker = new DecodeAudioTokenWorker(info, _rn_ctx, tokens);
1453
1454
  worker->Queue();
1454
1455
  return worker->Promise();
@@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
92
92
 
93
93
  # 3rd party libs
94
94
  option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
95
+ option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
95
96
  option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
96
97
  option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
97
98
 
@@ -200,6 +201,9 @@ endif()
200
201
 
201
202
  if (LLAMA_BUILD_COMMON)
202
203
  add_subdirectory(common)
204
+ if (LLAMA_HTTPLIB)
205
+ add_subdirectory(vendor/cpp-httplib)
206
+ endif()
203
207
  endif()
204
208
 
205
209
  if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
@@ -79,10 +79,11 @@ if (BUILD_SHARED_LIBS)
79
79
  set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
80
80
  endif()
81
81
 
82
+ # TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
82
83
  set(LLAMA_COMMON_EXTRA_LIBS build_info)
83
84
 
84
- # Use curl to download model url
85
85
  if (LLAMA_CURL)
86
+ # Use curl to download model url
86
87
  find_package(CURL)
87
88
  if (NOT CURL_FOUND)
88
89
  message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
@@ -90,42 +91,10 @@ if (LLAMA_CURL)
90
91
  target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
91
92
  include_directories(${CURL_INCLUDE_DIRS})
92
93
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
93
- endif()
94
-
95
- if (LLAMA_OPENSSL)
96
- find_package(OpenSSL)
97
- if (OpenSSL_FOUND)
98
- include(CheckCSourceCompiles)
99
- set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
100
- set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
101
- check_c_source_compiles("
102
- #include <openssl/opensslv.h>
103
- #if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
104
- # if OPENSSL_VERSION_NUMBER < 0x1010107f
105
- # error bad version
106
- # endif
107
- #else
108
- # if OPENSSL_VERSION_NUMBER < 0x30000000L
109
- # error bad version
110
- # endif
111
- #endif
112
- int main() { return 0; }
113
- " OPENSSL_VERSION_SUPPORTED)
114
- set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
115
- if (OPENSSL_VERSION_SUPPORTED)
116
- message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
117
- target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
118
- target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
119
- if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
120
- target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
121
- find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
122
- find_library(SECURITY_FRAMEWORK Security REQUIRED)
123
- target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
124
- endif()
125
- endif()
126
- else()
127
- message(STATUS "OpenSSL not found, SSL support disabled")
128
- endif()
94
+ elseif (LLAMA_HTTPLIB)
95
+ # otherwise, use cpp-httplib
96
+ target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
97
+ set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
129
98
  endif()
130
99
 
131
100
  if (LLAMA_LLGUIDANCE)
@@ -355,11 +355,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
355
355
  }
356
356
 
357
357
  void common_init() {
358
- llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) {
359
- if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
360
- common_log_add(common_log_main(), level, "%s", text);
361
- }
362
- }, NULL);
358
+ llama_log_set(common_log_default_callback, NULL);
363
359
 
364
360
  #ifdef NDEBUG
365
361
  const char * build_type = "";
@@ -20,7 +20,7 @@
20
20
  #if defined(LLAMA_USE_CURL)
21
21
  #include <curl/curl.h>
22
22
  #include <curl/easy.h>
23
- #else
23
+ #elif defined(LLAMA_USE_HTTPLIB)
24
24
  #include "http.h"
25
25
  #endif
26
26
 
@@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
467
467
  return { res_code, std::move(res_buffer) };
468
468
  }
469
469
 
470
- #else
470
+ #elif defined(LLAMA_USE_HTTPLIB)
471
471
 
472
472
  static bool is_output_a_tty() {
473
473
  #if defined(_WIN32)
@@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
713
713
 
714
714
  #endif // LLAMA_USE_CURL
715
715
 
716
+ #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
717
+
716
718
  static bool common_download_file_single(const std::string & url,
717
719
  const std::string & path,
718
720
  const std::string & bearer_token,
@@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
907
909
  return { hf_repo, ggufFile, mmprojFile };
908
910
  }
909
911
 
910
- std::vector<common_cached_model_info> common_list_cached_models() {
911
- std::vector<common_cached_model_info> models;
912
- const std::string cache_dir = fs_get_cache_directory();
913
- const std::vector<common_file_info> files = fs_list_files(cache_dir);
914
- for (const auto & file : files) {
915
- if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916
- common_cached_model_info model_info;
917
- model_info.manifest_path = file.path;
918
- std::string fname = file.name;
919
- string_replace_all(fname, ".json", ""); // remove extension
920
- auto parts = string_split<std::string>(fname, '=');
921
- if (parts.size() == 4) {
922
- // expect format: manifest=<user>=<model>=<tag>=<other>
923
- model_info.user = parts[1];
924
- model_info.model = parts[2];
925
- model_info.tag = parts[3];
926
- } else {
927
- // invalid format
928
- continue;
929
- }
930
- model_info.size = 0; // TODO: get GGUF size, not manifest size
931
- models.push_back(model_info);
932
- }
933
- }
934
- return models;
935
- }
936
-
937
912
  //
938
913
  // Docker registry functions
939
914
  //
@@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
1052
1027
  throw;
1053
1028
  }
1054
1029
  }
1030
+
1031
+ #else
1032
+
1033
+ common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
1034
+ throw std::runtime_error("download functionality is not enabled in this build");
1035
+ }
1036
+
1037
+ bool common_download_model(const common_params_model &, const std::string &, bool) {
1038
+ throw std::runtime_error("download functionality is not enabled in this build");
1039
+ }
1040
+
1041
+ std::string common_docker_resolve_model(const std::string &) {
1042
+ throw std::runtime_error("download functionality is not enabled in this build");
1043
+ }
1044
+
1045
+ #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
1046
+
1047
+ std::vector<common_cached_model_info> common_list_cached_models() {
1048
+ std::vector<common_cached_model_info> models;
1049
+ const std::string cache_dir = fs_get_cache_directory();
1050
+ const std::vector<common_file_info> files = fs_list_files(cache_dir);
1051
+ for (const auto & file : files) {
1052
+ if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053
+ common_cached_model_info model_info;
1054
+ model_info.manifest_path = file.path;
1055
+ std::string fname = file.name;
1056
+ string_replace_all(fname, ".json", ""); // remove extension
1057
+ auto parts = string_split<std::string>(fname, '=');
1058
+ if (parts.size() == 4) {
1059
+ // expect format: manifest=<user>=<model>=<tag>=<other>
1060
+ model_info.user = parts[1];
1061
+ model_info.model = parts[2];
1062
+ model_info.tag = parts[3];
1063
+ } else {
1064
+ // invalid format
1065
+ continue;
1066
+ }
1067
+ model_info.size = 0; // TODO: get GGUF size, not manifest size
1068
+ models.push_back(model_info);
1069
+ }
1070
+ }
1071
+ return models;
1072
+ }
@@ -442,3 +442,9 @@ void common_log_set_prefix(struct common_log * log, bool prefix) {
442
442
  void common_log_set_timestamps(struct common_log * log, bool timestamps) {
443
443
  log->set_timestamps(timestamps);
444
444
  }
445
+
446
+ void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
447
+ if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
448
+ common_log_add(common_log_main(), level, "%s", text);
449
+ }
450
+ }
@@ -36,6 +36,8 @@ extern int common_log_verbosity_thold;
36
36
 
37
37
  void common_log_set_verbosity_thold(int verbosity); // not thread-safe
38
38
 
39
+ void common_log_default_callback(enum ggml_log_level level, const char * text, void * user_data);
40
+
39
41
  // the common_log uses an internal worker thread to print/write log messages
40
42
  // when the worker thread is paused, incoming log messages are discarded
41
43
  struct common_log;