cui-llama.rn 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ set(
12
12
  ${RNLLAMA_LIB_DIR}/llama-grammar.cpp
13
13
  ${RNLLAMA_LIB_DIR}/llama-sampling.cpp
14
14
  ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
15
+ ${RNLLAMA_LIB_DIR}/log.cpp
15
16
 
16
17
  ${RNLLAMA_LIB_DIR}/ggml-aarch64.c
17
18
  ${RNLLAMA_LIB_DIR}/ggml-alloc.c
@@ -538,7 +538,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
538
538
  putString(env, result, "stopping_word", llama->stopping_word.c_str());
539
539
  putInt(env, result, "tokens_cached", llama->n_past);
540
540
 
541
- const auto timings_token = llama_get_token_timings(llama->ctx);
541
+ const auto timings_token = llama_perf_context(llama -> ctx);
542
542
 
543
543
  auto timingsResult = createWriteableMap(env);
544
544
  putInt(env, timingsResult, "prompt_n", timings_token.n_p_eval);
@@ -635,7 +635,6 @@ Java_com_rnllama_LlamaContext_embedding(
635
635
 
636
636
  llama->rewind();
637
637
 
638
- // llama_reset_timings(llama->ctx);
639
638
  llama_perf_context_reset(llama->ctx);
640
639
  gpt_sampler_reset(llama->ctx_sampling);
641
640
 
package/cpp/common.cpp CHANGED
@@ -3,6 +3,7 @@
3
3
  #endif
4
4
 
5
5
  #include "common.h"
6
+ #include "log.h"
6
7
  // Change JSON_ASSERT from assert() to LM_GGML_ASSERT:
7
8
  #define JSON_ASSERT LM_GGML_ASSERT
8
9
  #include "json.hpp"
@@ -25,6 +26,7 @@
25
26
  #include <unordered_map>
26
27
  #include <unordered_set>
27
28
  #include <vector>
29
+ #include <thread>
28
30
 
29
31
  #if defined(__APPLE__) && defined(__MACH__)
30
32
  #include <sys/types.h>
@@ -48,7 +50,6 @@
48
50
  #if defined(LLAMA_USE_CURL)
49
51
  #include <curl/curl.h>
50
52
  #include <curl/easy.h>
51
- #include <thread>
52
53
  #include <future>
53
54
  #endif
54
55
 
@@ -232,7 +233,7 @@ bool set_process_priority(enum lm_ggml_sched_priority prio) {
232
233
  }
233
234
 
234
235
  if (!SetPriorityClass(GetCurrentProcess(), p)) {
235
- fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
236
+ LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
236
237
  return false;
237
238
  }
238
239
 
@@ -257,7 +258,7 @@ bool set_process_priority(enum lm_ggml_sched_priority prio) {
257
258
  }
258
259
 
259
260
  if (!setpriority(PRIO_PROCESS, 0, p)) {
260
- fprintf(stderr, "warn: failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
261
+ LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
261
262
  return false;
262
263
  }
263
264
  return true;
@@ -290,14 +291,14 @@ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model)
290
291
 
291
292
  if (n_set && n_set < cpuparams.n_threads) {
292
293
  // Not enough set bits, may experience performance issues.
293
- fprintf(stderr, "warn: Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
294
+ LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
294
295
  }
295
296
  }
296
297
 
297
298
  bool parse_cpu_range(const std::string & range, bool (&boolmask)[LM_GGML_MAX_N_THREADS]) {
298
299
  size_t dash_loc = range.find('-');
299
300
  if (dash_loc == std::string::npos) {
300
- fprintf(stderr, "Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
301
+ LOG_ERR("Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
301
302
  return false;
302
303
  }
303
304
 
@@ -309,7 +310,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[LM_GGML_MAX_N_T
309
310
  } else {
310
311
  start_i = std::stoull(range.substr(0, dash_loc));
311
312
  if (start_i >= LM_GGML_MAX_N_THREADS) {
312
- fprintf(stderr, "Start index out of bounds!\n");
313
+ LOG_ERR("Start index out of bounds!\n");
313
314
  return false;
314
315
  }
315
316
  }
@@ -319,7 +320,7 @@ bool parse_cpu_range(const std::string & range, bool (&boolmask)[LM_GGML_MAX_N_T
319
320
  } else {
320
321
  end_i = std::stoull(range.substr(dash_loc + 1));
321
322
  if (end_i >= LM_GGML_MAX_N_THREADS) {
322
- fprintf(stderr, "End index out of bounds!\n");
323
+ LOG_ERR("End index out of bounds!\n");
323
324
  return false;
324
325
  }
325
326
  }
@@ -354,7 +355,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[LM_GGML_MAX_N_THR
354
355
  } else if (c >= 'A' && c <= 'F') {
355
356
  id -= 'A' - 10;
356
357
  } else {
357
- fprintf(stderr, "Invalid hex character '%c' at position %d\n", c, int32_t(i));
358
+ LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
358
359
  return false;
359
360
  }
360
361
 
@@ -367,6 +368,22 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[LM_GGML_MAX_N_THR
367
368
  return true;
368
369
  }
369
370
 
371
+ void gpt_init() {
372
+ llama_log_set([](lm_ggml_log_level level, const char * text, void * /*user_data*/) {
373
+ if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_thold) {
374
+ gpt_log_add(gpt_log_main(), level, "%s", text);
375
+ }
376
+ }, NULL);
377
+
378
+ #ifdef NDEBUG
379
+ const char * build_type = "";
380
+ #else
381
+ const char * build_type = " (debug)";
382
+ #endif
383
+
384
+ LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
385
+ }
386
+
370
387
  std::string gpt_params_get_system_info(const gpt_params & params) {
371
388
  std::ostringstream os;
372
389
 
@@ -447,6 +464,94 @@ void string_replace_all(std::string & s, const std::string & search, const std::
447
464
  s = std::move(builder);
448
465
  }
449
466
 
467
+ std::string string_from(bool value) {
468
+ return value ? "true" : "false";
469
+ }
470
+
471
+ std::string string_from(const std::vector<int> & values) {
472
+ std::stringstream buf;
473
+
474
+ buf << "[ ";
475
+ bool first = true;
476
+ for (auto e : values) {
477
+ if (first) {
478
+ first = false;
479
+ } else {
480
+ buf << ", ";
481
+ }
482
+ buf << std::to_string(e);
483
+ }
484
+ buf << " ]";
485
+
486
+ return buf.str();
487
+ }
488
+
489
+ std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
490
+ std::stringstream buf;
491
+
492
+ buf << "[ ";
493
+
494
+ bool first = true;
495
+ for (const auto & token : tokens) {
496
+ if (!first) {
497
+ buf << ", ";
498
+ } else {
499
+ first = false;
500
+ }
501
+
502
+ auto detokenized = llama_token_to_piece(ctx, token);
503
+
504
+ detokenized.erase(
505
+ std::remove_if(
506
+ detokenized.begin(),
507
+ detokenized.end(),
508
+ [](const unsigned char c) { return !std::isprint(c); }),
509
+ detokenized.end());
510
+
511
+ buf << "'" << detokenized << "'"
512
+ << ":" << std::to_string(token);
513
+ }
514
+
515
+ buf << " ]";
516
+
517
+ return buf.str();
518
+ }
519
+
520
+ std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
521
+ std::stringstream buf;
522
+
523
+ buf << "[ ";
524
+
525
+ bool first = true;
526
+ for (int i = 0; i < batch.n_tokens; ++i) {
527
+ if (!first) {
528
+ buf << ", ";
529
+ } else {
530
+ first = false;
531
+ }
532
+
533
+ auto detokenized = llama_token_to_piece(ctx, batch.token[i]);
534
+
535
+ detokenized.erase(
536
+ std::remove_if(
537
+ detokenized.begin(),
538
+ detokenized.end(),
539
+ [](const unsigned char c) { return !std::isprint(c); }),
540
+ detokenized.end());
541
+
542
+ buf << "\n" << std::to_string(i)
543
+ << ":token '" << detokenized << "'"
544
+ << ":pos " << std::to_string(batch.pos[i])
545
+ << ":n_seq_id " << std::to_string(batch.n_seq_id[i])
546
+ << ":seq_id " << std::to_string(batch.seq_id[i][0])
547
+ << ":logits " << std::to_string(batch.logits[i]);
548
+ }
549
+
550
+ buf << " ]";
551
+
552
+ return buf.str();
553
+ }
554
+
450
555
  void string_process_escapes(std::string & input) {
451
556
  std::size_t input_len = input.length();
452
557
  std::size_t output_idx = 0;
@@ -487,7 +592,7 @@ void string_process_escapes(std::string & input) {
487
592
  bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
488
593
  const char * sep = strchr(data, '=');
489
594
  if (sep == nullptr || sep - data >= 128) {
490
- fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
595
+ LOG_ERR("%s: malformed KV override '%s'\n", __func__, data);
491
596
  return false;
492
597
  }
493
598
  llama_model_kv_override kvo;
@@ -510,20 +615,20 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
510
615
  } else if (std::strcmp(sep, "false") == 0) {
511
616
  kvo.val_bool = false;
512
617
  } else {
513
- fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
618
+ LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
514
619
  return false;
515
620
  }
516
621
  } else if (strncmp(sep, "str:", 4) == 0) {
517
622
  sep += 4;
518
623
  kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
519
624
  if (strlen(sep) > 127) {
520
- fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
625
+ LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
521
626
  return false;
522
627
  }
523
628
  strncpy(kvo.val_str, sep, 127);
524
629
  kvo.val_str[127] = '\0';
525
630
  } else {
526
- fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
631
+ LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
527
632
  return false;
528
633
  }
529
634
  overrides.emplace_back(std::move(kvo));
@@ -735,7 +840,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
735
840
  }
736
841
 
737
842
  if (model == NULL) {
738
- fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
843
+ LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
739
844
  return iparams;
740
845
  }
741
846
 
@@ -743,7 +848,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
743
848
 
744
849
  llama_context * lctx = llama_new_context_with_model(model, cparams);
745
850
  if (lctx == NULL) {
746
- fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
851
+ LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
747
852
  llama_free_model(model);
748
853
  return iparams;
749
854
  }
@@ -779,7 +884,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
779
884
  loaded_la.scale = la.scale;
780
885
  loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
781
886
  if (loaded_la.adapter == nullptr) {
782
- fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
887
+ LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
783
888
  llama_free(lctx);
784
889
  llama_free_model(model);
785
890
  return iparams;
@@ -791,12 +896,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
791
896
  }
792
897
 
793
898
  if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
794
- fprintf(stderr, "%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
899
+ LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
795
900
  params.sparams.ignore_eos = false;
796
901
  }
797
902
 
798
903
  if (params.warmup) {
799
- LOG("warming up the model with an empty run\n");
904
+ LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
800
905
 
801
906
  std::vector<llama_token> tmp;
802
907
  llama_token bos = llama_token_bos(model);
@@ -962,7 +1067,7 @@ static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_
962
1067
  int remaining_attempts = max_attempts;
963
1068
 
964
1069
  while (remaining_attempts > 0) {
965
- fprintf(stderr, "%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
1070
+ LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
966
1071
 
967
1072
  CURLcode res = curl_easy_perform(curl);
968
1073
  if (res == CURLE_OK) {
@@ -970,13 +1075,14 @@ static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_
970
1075
  }
971
1076
 
972
1077
  int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
973
- fprintf(stderr, "%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
1078
+ LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
974
1079
 
975
1080
  remaining_attempts--;
976
1081
  std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
977
1082
  }
978
1083
 
979
- fprintf(stderr, "%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
1084
+ LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
1085
+
980
1086
  return false;
981
1087
  }
982
1088
 
@@ -985,7 +1091,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
985
1091
  // Initialize libcurl
986
1092
  std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
987
1093
  if (!curl) {
988
- fprintf(stderr, "%s: error initializing libcurl\n", __func__);
1094
+ LOG_ERR("%s: error initializing libcurl\n", __func__);
989
1095
  return false;
990
1096
  }
991
1097
 
@@ -1026,11 +1132,11 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1026
1132
  if (metadata_in.good()) {
1027
1133
  try {
1028
1134
  metadata_in >> metadata;
1029
- fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
1135
+ LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
1030
1136
  if (metadata.contains("url") && metadata.at("url").is_string()) {
1031
1137
  auto previous_url = metadata.at("url").get<std::string>();
1032
1138
  if (previous_url != url) {
1033
- fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
1139
+ LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
1034
1140
  return false;
1035
1141
  }
1036
1142
  }
@@ -1041,12 +1147,12 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1041
1147
  last_modified = metadata.at("lastModified");
1042
1148
  }
1043
1149
  } catch (const nlohmann::json::exception & e) {
1044
- fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
1150
+ LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
1045
1151
  return false;
1046
1152
  }
1047
1153
  }
1048
1154
  } else {
1049
- fprintf(stderr, "%s: no previous model file found %s\n", __func__, path.c_str());
1155
+ LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
1050
1156
  }
1051
1157
 
1052
1158
  // Send a HEAD request to retrieve the etag and last-modified headers
@@ -1094,26 +1200,26 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1094
1200
  // HEAD not supported, we don't know if the file has changed
1095
1201
  // force trigger downloading
1096
1202
  force_download = true;
1097
- fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
1203
+ LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
1098
1204
  }
1099
1205
  }
1100
1206
 
1101
1207
  bool should_download = !file_exists || force_download;
1102
1208
  if (!should_download) {
1103
1209
  if (!etag.empty() && etag != headers.etag) {
1104
- fprintf(stderr, "%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
1210
+ LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
1105
1211
  should_download = true;
1106
1212
  } else if (!last_modified.empty() && last_modified != headers.last_modified) {
1107
- fprintf(stderr, "%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
1213
+ LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
1108
1214
  should_download = true;
1109
1215
  }
1110
1216
  }
1111
1217
  if (should_download) {
1112
1218
  std::string path_temporary = path + ".downloadInProgress";
1113
1219
  if (file_exists) {
1114
- fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
1220
+ LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
1115
1221
  if (remove(path.c_str()) != 0) {
1116
- fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str());
1222
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
1117
1223
  return false;
1118
1224
  }
1119
1225
  }
@@ -1128,7 +1234,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1128
1234
 
1129
1235
  std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
1130
1236
  if (!outfile) {
1131
- fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
1237
+ LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
1132
1238
  return false;
1133
1239
  }
1134
1240
 
@@ -1159,7 +1265,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1159
1265
  };
1160
1266
 
1161
1267
  // start the download
1162
- fprintf(stderr, "%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1268
+ LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
1163
1269
  llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
1164
1270
  bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1165
1271
  if (!was_perform_successful) {
@@ -1169,7 +1275,7 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1169
1275
  long http_code = 0;
1170
1276
  curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
1171
1277
  if (http_code < 200 || http_code >= 400) {
1172
- fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
1278
+ LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
1173
1279
  return false;
1174
1280
  }
1175
1281
 
@@ -1183,10 +1289,10 @@ static bool llama_download_file(const std::string & url, const std::string & pat
1183
1289
  {"lastModified", headers.last_modified}
1184
1290
  });
1185
1291
  std::ofstream(metadata_path) << metadata.dump(4);
1186
- fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
1292
+ LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
1187
1293
 
1188
1294
  if (rename(path_temporary.c_str(), path.c_str()) != 0) {
1189
- fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
1295
+ LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
1190
1296
  return false;
1191
1297
  }
1192
1298
  }
@@ -1201,7 +1307,7 @@ struct llama_model * llama_load_model_from_url(
1201
1307
  const struct llama_model_params & params) {
1202
1308
  // Basic validation of the model_url
1203
1309
  if (!model_url || strlen(model_url) == 0) {
1204
- fprintf(stderr, "%s: invalid model_url\n", __func__);
1310
+ LOG_ERR("%s: invalid model_url\n", __func__);
1205
1311
  return NULL;
1206
1312
  }
1207
1313
 
@@ -1218,7 +1324,7 @@ struct llama_model * llama_load_model_from_url(
1218
1324
  };
1219
1325
  auto * ctx_gguf = lm_gguf_init_from_file(path_model, lm_gguf_params);
1220
1326
  if (!ctx_gguf) {
1221
- fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
1327
+ LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model);
1222
1328
  return NULL;
1223
1329
  }
1224
1330
 
@@ -1238,14 +1344,12 @@ struct llama_model * llama_load_model_from_url(
1238
1344
  // and extract split URL and PATH prefixes
1239
1345
  {
1240
1346
  if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
1241
- fprintf(stderr, "\n%s: unexpected model file name: %s"
1242
- " n_split=%d\n", __func__, path_model, n_split);
1347
+ LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split);
1243
1348
  return NULL;
1244
1349
  }
1245
1350
 
1246
1351
  if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
1247
- fprintf(stderr, "\n%s: unexpected model url: %s"
1248
- " n_split=%d\n", __func__, model_url, n_split);
1352
+ LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split);
1249
1353
  return NULL;
1250
1354
  }
1251
1355
  }
@@ -1305,7 +1409,7 @@ struct llama_model * llama_load_model_from_url(
1305
1409
  const char * /*path_model*/,
1306
1410
  const char * /*hf_token*/,
1307
1411
  const struct llama_model_params & /*params*/) {
1308
- fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
1412
+ LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
1309
1413
  return nullptr;
1310
1414
  }
1311
1415
 
@@ -1315,7 +1419,7 @@ struct llama_model * llama_load_model_from_hf(
1315
1419
  const char * /*path_model*/,
1316
1420
  const char * /*hf_token*/,
1317
1421
  const struct llama_model_params & /*params*/) {
1318
- fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
1422
+ LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
1319
1423
  return nullptr;
1320
1424
  }
1321
1425
 
@@ -1643,13 +1747,13 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
1643
1747
  };
1644
1748
  struct lm_gguf_context * ctx_gguf = lm_gguf_init_from_file(load_info.fname.c_str(), meta_lm_gguf_params);
1645
1749
  if (!ctx_gguf) {
1646
- fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
1750
+ LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
1647
1751
  return result;
1648
1752
  }
1649
1753
 
1650
1754
  int32_t n_tensors = lm_gguf_get_n_tensors(ctx_gguf);
1651
1755
  if (n_tensors == 0) {
1652
- fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
1756
+ LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
1653
1757
  }
1654
1758
 
1655
1759
  for (int i = 0; i < n_tensors; i++) {
@@ -1667,23 +1771,23 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
1667
1771
  }
1668
1772
  }
1669
1773
  if (layer_idx < 0) {
1670
- fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1774
+ LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1671
1775
  result.n_embd = -1;
1672
1776
  break;
1673
1777
  } else if (layer_idx == 0) {
1674
- fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1778
+ LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1675
1779
  result.n_embd = -1;
1676
1780
  break;
1677
1781
  }
1678
1782
 
1679
1783
  struct lm_ggml_tensor * tensor = lm_ggml_get_tensor(ctx, name.c_str());
1680
1784
  if (tensor->type != LM_GGML_TYPE_F32) {
1681
- fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
1785
+ LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
1682
1786
  result.n_embd = -1;
1683
1787
  break;
1684
1788
  }
1685
1789
  if (lm_ggml_n_dims(tensor) != 1) {
1686
- fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
1790
+ LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
1687
1791
  result.n_embd = -1;
1688
1792
  break;
1689
1793
  }
@@ -1691,7 +1795,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
1691
1795
  if (result.n_embd == -1) {
1692
1796
  result.n_embd = lm_ggml_nelements(tensor);
1693
1797
  } else if (lm_ggml_nelements(tensor) != result.n_embd) {
1694
- fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
1798
+ LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
1695
1799
  result.n_embd = -1;
1696
1800
  break;
1697
1801
  }
@@ -1708,7 +1812,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
1708
1812
  }
1709
1813
 
1710
1814
  if (result.n_embd == -1) {
1711
- fprintf(stderr, "%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
1815
+ LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
1712
1816
  result.data.clear();
1713
1817
  }
1714
1818
 
@@ -1729,7 +1833,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
1729
1833
  break;
1730
1834
  }
1731
1835
  if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
1732
- fprintf(stderr, "%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
1836
+ LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
1733
1837
  result.n_embd = -1;
1734
1838
  break;
1735
1839
  }
@@ -1745,7 +1849,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
1745
1849
  }
1746
1850
 
1747
1851
  if (result.n_embd == -1) {
1748
- fprintf(stderr, "%s: no valid control vector files passed\n", __func__);
1852
+ LOG_ERR("%s: no valid control vector files passed\n", __func__);
1749
1853
  result.data.clear();
1750
1854
  }
1751
1855
 
package/cpp/common.h CHANGED
@@ -4,11 +4,9 @@
4
4
 
5
5
  #include "llama.h"
6
6
 
7
- #define LOG_NO_FILE_LINE_FUNCTION
8
- #include "log.h"
9
-
10
7
  #include <string>
11
8
  #include <vector>
9
+ #include <sstream>
12
10
 
13
11
  #ifdef _WIN32
14
12
  #define DIRECTORY_SEPARATOR '\\'
@@ -265,6 +263,7 @@ struct gpt_params {
265
263
  bool cont_batching = true; // insert new sequences for decoding on-the-fly
266
264
  bool flash_attn = false; // flash attention
267
265
  bool no_perf = false; // disable performance metrics
266
+ bool ctx_shift = true; // context shift on inifinite text generation
268
267
 
269
268
  bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
270
269
  bool logits_all = false; // return logits for all tokens in the batch
@@ -360,6 +359,10 @@ struct gpt_params {
360
359
  bool batched_bench_output_jsonl = false;
361
360
  };
362
361
 
362
+ // call once at the start of a program if it uses libcommon
363
+ // initializes the logging system and prints info about the build
364
+ void gpt_init();
365
+
363
366
  std::string gpt_params_get_system_info(const gpt_params & params);
364
367
 
365
368
  bool parse_cpu_range(const std::string& range, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
@@ -395,6 +398,11 @@ static std::vector<T> string_split(const std::string & str, char delim) {
395
398
  bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
396
399
  void string_process_escapes(std::string & input);
397
400
 
401
+ std::string string_from(bool value);
402
+ std::string string_from(const std::vector<int> & values);
403
+ std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
404
+ std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
405
+
398
406
  //
399
407
  // Filesystem utils
400
408
  //