@fugood/llama.node 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CMakeLists.txt +2 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +1 -1
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +8 -8
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +8 -9
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +4 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +43 -9
  25. package/src/llama.cpp/.github/workflows/docker.yml +3 -0
  26. package/src/llama.cpp/CMakeLists.txt +7 -4
  27. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  28. package/src/llama.cpp/common/CMakeLists.txt +0 -2
  29. package/src/llama.cpp/common/arg.cpp +642 -607
  30. package/src/llama.cpp/common/arg.h +22 -22
  31. package/src/llama.cpp/common/common.cpp +79 -281
  32. package/src/llama.cpp/common/common.h +130 -100
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  34. package/src/llama.cpp/common/log.cpp +50 -50
  35. package/src/llama.cpp/common/log.h +18 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  37. package/src/llama.cpp/common/ngram-cache.h +19 -19
  38. package/src/llama.cpp/common/sampling.cpp +116 -108
  39. package/src/llama.cpp/common/sampling.h +20 -20
  40. package/src/llama.cpp/docs/build.md +37 -17
  41. package/src/llama.cpp/examples/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +14 -14
  43. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  47. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  49. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  50. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  51. package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
  52. package/src/llama.cpp/examples/infill/infill.cpp +40 -86
  53. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
  54. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  55. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  56. package/src/llama.cpp/examples/llava/clip.cpp +1 -0
  57. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  58. package/src/llama.cpp/examples/llava/llava.cpp +37 -3
  59. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  60. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  61. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  62. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  63. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
  64. package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
  65. package/src/llama.cpp/examples/main/main.cpp +64 -109
  66. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  67. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  68. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  69. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  70. package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
  71. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  72. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
  73. package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
  74. package/src/llama.cpp/examples/server/server.cpp +553 -691
  75. package/src/llama.cpp/examples/server/utils.hpp +312 -25
  76. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  77. package/src/llama.cpp/examples/simple/simple.cpp +128 -96
  78. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  79. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  80. package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
  81. package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
  82. package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
  83. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  84. package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
  85. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  86. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  87. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  88. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  89. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  90. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  91. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  92. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  93. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  94. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  95. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  96. package/src/llama.cpp/ggml/include/ggml.h +53 -393
  97. package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
  98. package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
  99. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  100. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
  101. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  102. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  103. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  104. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  105. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  106. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
  107. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  108. package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
  109. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  110. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
  111. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  112. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
  113. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  114. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  115. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  116. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
  117. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  118. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  120. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  121. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
  122. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  123. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  124. package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
  125. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  126. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
  127. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  128. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  129. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  130. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  131. package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
  132. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  133. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  134. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
  135. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  136. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  137. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
  138. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
  141. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  142. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  143. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
  144. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
  145. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
  146. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  148. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  149. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  150. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  151. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  152. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  153. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  154. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  155. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
  156. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
  157. package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
  158. package/src/llama.cpp/include/llama.h +67 -33
  159. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  160. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  161. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  162. package/src/llama.cpp/src/llama-sampling.cpp +745 -105
  163. package/src/llama.cpp/src/llama-sampling.h +21 -2
  164. package/src/llama.cpp/src/llama-vocab.cpp +49 -9
  165. package/src/llama.cpp/src/llama-vocab.h +35 -11
  166. package/src/llama.cpp/src/llama.cpp +2636 -2406
  167. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  168. package/src/llama.cpp/tests/CMakeLists.txt +1 -2
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
  171. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  172. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  173. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  174. package/src/llama.cpp/tests/test-log.cpp +2 -2
  175. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  176. package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
  177. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  178. package/src/llama.cpp/tests/test-rope.cpp +1 -0
  179. package/src/llama.cpp/tests/test-sampling.cpp +162 -137
  180. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  181. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  182. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  183. package/src/llama.cpp/common/train.cpp +0 -1515
  184. package/src/llama.cpp/common/train.h +0 -233
  185. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  186. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  187. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  188. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  189. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
  190. /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
@@ -8,10 +8,10 @@
8
8
  #include <thread>
9
9
  #include <vector>
10
10
 
11
- int gpt_log_verbosity_thold = LOG_DEFAULT_LLAMA;
11
+ int common_log_verbosity_thold = LOG_DEFAULT_LLAMA;
12
12
 
13
- void gpt_log_set_verbosity_thold(int verbosity) {
14
- gpt_log_verbosity_thold = verbosity;
13
+ void common_log_set_verbosity_thold(int verbosity) {
14
+ common_log_verbosity_thold = verbosity;
15
15
  }
16
16
 
17
17
  #define LOG_COL_DEFAULT "\033[0m"
@@ -29,16 +29,16 @@ static int64_t t_us() {
29
29
  }
30
30
 
31
31
  // colors
32
- enum gpt_log_col : int {
33
- GPT_LOG_COL_DEFAULT = 0,
34
- GPT_LOG_COL_BOLD,
35
- GPT_LOG_COL_RED,
36
- GPT_LOG_COL_GREEN,
37
- GPT_LOG_COL_YELLOW,
38
- GPT_LOG_COL_BLUE,
39
- GPT_LOG_COL_MAGENTA,
40
- GPT_LOG_COL_CYAN,
41
- GPT_LOG_COL_WHITE,
32
+ enum common_log_col : int {
33
+ COMMON_LOG_COL_DEFAULT = 0,
34
+ COMMON_LOG_COL_BOLD,
35
+ COMMON_LOG_COL_RED,
36
+ COMMON_LOG_COL_GREEN,
37
+ COMMON_LOG_COL_YELLOW,
38
+ COMMON_LOG_COL_BLUE,
39
+ COMMON_LOG_COL_MAGENTA,
40
+ COMMON_LOG_COL_CYAN,
41
+ COMMON_LOG_COL_WHITE,
42
42
  };
43
43
 
44
44
  // disable colors by default
@@ -54,7 +54,7 @@ static std::vector<const char *> g_col = {
54
54
  "",
55
55
  };
56
56
 
57
- struct gpt_log_entry {
57
+ struct common_log_entry {
58
58
  enum ggml_log_level level;
59
59
 
60
60
  bool prefix;
@@ -71,7 +71,7 @@ struct gpt_log_entry {
71
71
  if (!fcur) {
72
72
  // stderr displays DBG messages only when their verbosity level is not higher than the threshold
73
73
  // these messages will still be logged to a file
74
- if (level == GGML_LOG_LEVEL_DEBUG && gpt_log_verbosity_thold < LOG_DEFAULT_DEBUG) {
74
+ if (level == GGML_LOG_LEVEL_DEBUG && common_log_verbosity_thold < LOG_DEFAULT_DEBUG) {
75
75
  return;
76
76
  }
77
77
 
@@ -86,19 +86,19 @@ struct gpt_log_entry {
86
86
  if (timestamp) {
87
87
  // [M.s.ms.us]
88
88
  fprintf(fcur, "%s%d.%02d.%03d.%03d%s ",
89
- g_col[GPT_LOG_COL_BLUE],
89
+ g_col[COMMON_LOG_COL_BLUE],
90
90
  (int) (timestamp / 1000000 / 60),
91
91
  (int) (timestamp / 1000000 % 60),
92
92
  (int) (timestamp / 1000 % 1000),
93
93
  (int) (timestamp % 1000),
94
- g_col[GPT_LOG_COL_DEFAULT]);
94
+ g_col[COMMON_LOG_COL_DEFAULT]);
95
95
  }
96
96
 
97
97
  switch (level) {
98
- case GGML_LOG_LEVEL_INFO: fprintf(fcur, "%sI %s", g_col[GPT_LOG_COL_GREEN], g_col[GPT_LOG_COL_DEFAULT]); break;
99
- case GGML_LOG_LEVEL_WARN: fprintf(fcur, "%sW %s", g_col[GPT_LOG_COL_MAGENTA], "" ); break;
100
- case GGML_LOG_LEVEL_ERROR: fprintf(fcur, "%sE %s", g_col[GPT_LOG_COL_RED], "" ); break;
101
- case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, "%sD %s", g_col[GPT_LOG_COL_YELLOW], "" ); break;
98
+ case GGML_LOG_LEVEL_INFO: fprintf(fcur, "%sI %s", g_col[COMMON_LOG_COL_GREEN], g_col[COMMON_LOG_COL_DEFAULT]); break;
99
+ case GGML_LOG_LEVEL_WARN: fprintf(fcur, "%sW %s", g_col[COMMON_LOG_COL_MAGENTA], "" ); break;
100
+ case GGML_LOG_LEVEL_ERROR: fprintf(fcur, "%sE %s", g_col[COMMON_LOG_COL_RED], "" ); break;
101
+ case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, "%sD %s", g_col[COMMON_LOG_COL_YELLOW], "" ); break;
102
102
  default:
103
103
  break;
104
104
  }
@@ -107,18 +107,18 @@ struct gpt_log_entry {
107
107
  fprintf(fcur, "%s", msg.data());
108
108
 
109
109
  if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
110
- fprintf(fcur, "%s", g_col[GPT_LOG_COL_DEFAULT]);
110
+ fprintf(fcur, "%s", g_col[COMMON_LOG_COL_DEFAULT]);
111
111
  }
112
112
 
113
113
  fflush(fcur);
114
114
  }
115
115
  };
116
116
 
117
- struct gpt_log {
117
+ struct common_log {
118
118
  // default capacity - will be expanded if needed
119
- gpt_log() : gpt_log(256) {}
119
+ common_log() : common_log(256) {}
120
120
 
121
- gpt_log(size_t capacity) {
121
+ common_log(size_t capacity) {
122
122
  file = nullptr;
123
123
  prefix = false;
124
124
  timestamps = false;
@@ -137,7 +137,7 @@ struct gpt_log {
137
137
  resume();
138
138
  }
139
139
 
140
- ~gpt_log() {
140
+ ~common_log() {
141
141
  pause();
142
142
  if (file) {
143
143
  fclose(file);
@@ -158,12 +158,12 @@ private:
158
158
  int64_t t_start;
159
159
 
160
160
  // ring buffer of entries
161
- std::vector<gpt_log_entry> entries;
161
+ std::vector<common_log_entry> entries;
162
162
  size_t head;
163
163
  size_t tail;
164
164
 
165
165
  // worker thread copies into this
166
- gpt_log_entry cur;
166
+ common_log_entry cur;
167
167
 
168
168
  public:
169
169
  void add(enum ggml_log_level level, const char * fmt, va_list args) {
@@ -219,7 +219,7 @@ public:
219
219
  tail = (tail + 1) % entries.size();
220
220
  if (tail == head) {
221
221
  // expand the buffer
222
- std::vector<gpt_log_entry> new_entries(2*entries.size());
222
+ std::vector<common_log_entry> new_entries(2*entries.size());
223
223
 
224
224
  size_t new_tail = 0;
225
225
 
@@ -320,15 +320,15 @@ public:
320
320
  pause();
321
321
 
322
322
  if (colors) {
323
- g_col[GPT_LOG_COL_DEFAULT] = LOG_COL_DEFAULT;
324
- g_col[GPT_LOG_COL_BOLD] = LOG_COL_BOLD;
325
- g_col[GPT_LOG_COL_RED] = LOG_COL_RED;
326
- g_col[GPT_LOG_COL_GREEN] = LOG_COL_GREEN;
327
- g_col[GPT_LOG_COL_YELLOW] = LOG_COL_YELLOW;
328
- g_col[GPT_LOG_COL_BLUE] = LOG_COL_BLUE;
329
- g_col[GPT_LOG_COL_MAGENTA] = LOG_COL_MAGENTA;
330
- g_col[GPT_LOG_COL_CYAN] = LOG_COL_CYAN;
331
- g_col[GPT_LOG_COL_WHITE] = LOG_COL_WHITE;
323
+ g_col[COMMON_LOG_COL_DEFAULT] = LOG_COL_DEFAULT;
324
+ g_col[COMMON_LOG_COL_BOLD] = LOG_COL_BOLD;
325
+ g_col[COMMON_LOG_COL_RED] = LOG_COL_RED;
326
+ g_col[COMMON_LOG_COL_GREEN] = LOG_COL_GREEN;
327
+ g_col[COMMON_LOG_COL_YELLOW] = LOG_COL_YELLOW;
328
+ g_col[COMMON_LOG_COL_BLUE] = LOG_COL_BLUE;
329
+ g_col[COMMON_LOG_COL_MAGENTA] = LOG_COL_MAGENTA;
330
+ g_col[COMMON_LOG_COL_CYAN] = LOG_COL_CYAN;
331
+ g_col[COMMON_LOG_COL_WHITE] = LOG_COL_WHITE;
332
332
  } else {
333
333
  for (size_t i = 0; i < g_col.size(); i++) {
334
334
  g_col[i] = "";
@@ -355,47 +355,47 @@ public:
355
355
  // public API
356
356
  //
357
357
 
358
- struct gpt_log * gpt_log_init() {
359
- return new gpt_log;
358
+ struct common_log * common_log_init() {
359
+ return new common_log;
360
360
  }
361
361
 
362
- struct gpt_log * gpt_log_main() {
363
- static struct gpt_log log;
362
+ struct common_log * common_log_main() {
363
+ static struct common_log log;
364
364
 
365
365
  return &log;
366
366
  }
367
367
 
368
- void gpt_log_pause(struct gpt_log * log) {
368
+ void common_log_pause(struct common_log * log) {
369
369
  log->pause();
370
370
  }
371
371
 
372
- void gpt_log_resume(struct gpt_log * log) {
372
+ void common_log_resume(struct common_log * log) {
373
373
  log->resume();
374
374
  }
375
375
 
376
- void gpt_log_free(struct gpt_log * log) {
376
+ void common_log_free(struct common_log * log) {
377
377
  delete log;
378
378
  }
379
379
 
380
- void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...) {
380
+ void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...) {
381
381
  va_list args;
382
382
  va_start(args, fmt);
383
383
  log->add(level, fmt, args);
384
384
  va_end(args);
385
385
  }
386
386
 
387
- void gpt_log_set_file(struct gpt_log * log, const char * file) {
387
+ void common_log_set_file(struct common_log * log, const char * file) {
388
388
  log->set_file(file);
389
389
  }
390
390
 
391
- void gpt_log_set_colors(struct gpt_log * log, bool colors) {
391
+ void common_log_set_colors(struct common_log * log, bool colors) {
392
392
  log->set_colors(colors);
393
393
  }
394
394
 
395
- void gpt_log_set_prefix(struct gpt_log * log, bool prefix) {
395
+ void common_log_set_prefix(struct common_log * log, bool prefix) {
396
396
  log->set_prefix(prefix);
397
397
  }
398
398
 
399
- void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps) {
399
+ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
400
400
  log->set_timestamps(timestamps);
401
401
  }
@@ -14,23 +14,23 @@
14
14
  #define LOG_DEFAULT_LLAMA 0
15
15
 
16
16
  // needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
17
- // set via gpt_log_set_verbosity()
18
- extern int gpt_log_verbosity_thold;
17
+ // set via common_log_set_verbosity()
18
+ extern int common_log_verbosity_thold;
19
19
 
20
- void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
20
+ void common_log_set_verbosity_thold(int verbosity); // not thread-safe
21
21
 
22
- // the gpt_log uses an internal worker thread to print/write log messages
22
+ // the common_log uses an internal worker thread to print/write log messages
23
23
  // when the worker thread is paused, incoming log messages are discarded
24
- struct gpt_log;
24
+ struct common_log;
25
25
 
26
- struct gpt_log * gpt_log_init();
27
- struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit
28
- void gpt_log_pause (struct gpt_log * log); // pause the worker thread, not thread-safe
29
- void gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe
30
- void gpt_log_free (struct gpt_log * log);
26
+ struct common_log * common_log_init();
27
+ struct common_log * common_log_main(); // singleton, automatically destroys itself on exit
28
+ void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe
29
+ void common_log_resume(struct common_log * log); // resume the worker thread, not thread-safe
30
+ void common_log_free (struct common_log * log);
31
31
 
32
32
  LOG_ATTRIBUTE_FORMAT(3, 4)
33
- void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
33
+ void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...);
34
34
 
35
35
  // defaults: file = NULL, colors = false, prefix = false, timestamps = false
36
36
  //
@@ -54,10 +54,10 @@ void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * f
54
54
  // D - debug (stderr, V = LOG_DEFAULT_DEBUG)
55
55
  //
56
56
 
57
- void gpt_log_set_file (struct gpt_log * log, const char * file); // not thread-safe
58
- void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe
59
- void gpt_log_set_prefix (struct gpt_log * log, bool prefix); // whether to output prefix to each log
60
- void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); // whether to output timestamps in the prefix
57
+ void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
58
+ void common_log_set_colors (struct common_log * log, bool colors); // not thread-safe
59
+ void common_log_set_prefix (struct common_log * log, bool prefix); // whether to output prefix to each log
60
+ void common_log_set_timestamps(struct common_log * log, bool timestamps); // whether to output timestamps in the prefix
61
61
 
62
62
  // helper macros for logging
63
63
  // use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
@@ -66,13 +66,13 @@ void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); // w
66
66
  //
67
67
  // LOG_DBG("this is a debug message: %d\n", expensive_function());
68
68
  //
69
- // this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > gpt_log_verbosity_thold
69
+ // this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > common_log_verbosity_thold
70
70
  //
71
71
 
72
72
  #define LOG_TMPL(level, verbosity, ...) \
73
73
  do { \
74
- if ((verbosity) <= gpt_log_verbosity_thold) { \
75
- gpt_log_add(gpt_log_main(), (level), __VA_ARGS__); \
74
+ if ((verbosity) <= common_log_verbosity_thold) { \
75
+ common_log_add(common_log_main(), (level), __VA_ARGS__); \
76
76
  } \
77
77
  } while (0)
78
78
 
@@ -8,7 +8,7 @@
8
8
  #include <fstream>
9
9
  #include <thread>
10
10
 
11
- void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
11
+ void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
12
12
  std::vector<llama_token> & inp, int nnew, bool print_progress) {
13
13
  const int64_t t_start_ms = ggml_time_ms();
14
14
  const int64_t inp_size = inp.size();
@@ -20,16 +20,16 @@ void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, in
20
20
  const int64_t i_start = std::max(inp_size - nnew, ngram_size);
21
21
  for (int64_t i = i_start; i < inp_size; ++i) {
22
22
  const int64_t ngram_start = i - ngram_size;
23
- llama_ngram ngram(&inp[ngram_start], ngram_size);
23
+ common_ngram ngram(&inp[ngram_start], ngram_size);
24
24
  const llama_token token = inp[i];
25
25
 
26
- llama_ngram_cache::iterator part_it = ngram_cache.find(ngram);
26
+ common_ngram_cache::iterator part_it = ngram_cache.find(ngram);
27
27
  if (part_it == ngram_cache.end()) {
28
- llama_ngram_cache_part part;
28
+ common_ngram_cache_part part;
29
29
  part.emplace(token, 1);
30
30
  ngram_cache.emplace(ngram, part);
31
31
  } else {
32
- llama_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
32
+ common_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
33
33
  if (token_count_it == part_it->second.end()) {
34
34
  part_it->second.emplace(token, 1);
35
35
  } else {
@@ -62,12 +62,12 @@ constexpr int draft_min_sample_size_strict[LLAMA_NGRAM_MAX] = { 4, 3, 2, 2};
62
62
  constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66};
63
63
 
64
64
  // Helper function that tries to draft a token from only the static ngram cache:
65
- static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ngram_static) {
66
- llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
65
+ static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram ngram_static) {
66
+ common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
67
67
  if (part_static_it == nc_static.end()) {
68
68
  return -1;
69
69
  }
70
- const llama_ngram_cache_part part_static = part_static_it->second;
70
+ const common_ngram_cache_part part_static = part_static_it->second;
71
71
 
72
72
  int max_count_static = 0;
73
73
  int sum_count_static = 0;
@@ -95,19 +95,19 @@ static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ng
95
95
 
96
96
  // Try to draft a token from primary cache (context/dynamic), validate with static cache:
97
97
  static llama_token try_draft(
98
- llama_ngram_cache & nc_primary, const std::vector<llama_ngram> & ngrams_primary, llama_ngram_cache_part & part_static,
98
+ common_ngram_cache & nc_primary, const std::vector<common_ngram> & ngrams_primary, common_ngram_cache_part & part_static,
99
99
  const int * min_sample_size, const int * min_percent) {
100
100
 
101
101
  llama_token drafted_token = -1;
102
102
 
103
103
  for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == -1; --i) {
104
- const llama_ngram ngram_primary = ngrams_primary[i];
104
+ const common_ngram ngram_primary = ngrams_primary[i];
105
105
 
106
- llama_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
106
+ common_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
107
107
  if (part_primary_it == nc_primary.end()) {
108
108
  continue;
109
109
  }
110
- const llama_ngram_cache_part part_primary = part_primary_it->second;
110
+ const common_ngram_cache_part part_primary = part_primary_it->second;
111
111
 
112
112
  int max_count_primary = 0;
113
113
  int max_count_static = 0;
@@ -117,7 +117,7 @@ static llama_token try_draft(
117
117
  for (std::pair<llama_token, int> token_count_primary : part_primary) {
118
118
  const llama_token token = token_count_primary.first;
119
119
 
120
- llama_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
120
+ common_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
121
121
 
122
122
  const int32_t count_primary = token_count_primary.second;
123
123
  const int32_t count_static = token_count_static_it != part_static.end() ? 100*token_count_static_it->second : 1;
@@ -142,9 +142,9 @@ static llama_token try_draft(
142
142
  return drafted_token;
143
143
  }
144
144
 
145
- void llama_ngram_cache_draft(
145
+ void common_ngram_cache_draft(
146
146
  std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
147
- llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static
147
+ common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static
148
148
  ) {
149
149
  GGML_ASSERT(draft.size() == 1);
150
150
  const int inp_size = inp.size();
@@ -157,21 +157,21 @@ void llama_ngram_cache_draft(
157
157
  llama_token drafted_token = -1;
158
158
 
159
159
  const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1;
160
- llama_ngram ngram_static;
160
+ common_ngram ngram_static;
161
161
  for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
162
162
  ngram_static.tokens[j-ngram_start_static] = get_token(inp, draft, j);
163
163
  }
164
- llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
165
- llama_ngram_cache_part part_static;
164
+ common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
165
+ common_ngram_cache_part part_static;
166
166
  if (part_static_it != nc_static.end()) {
167
167
  part_static = part_static_it->second;
168
168
  }
169
169
 
170
170
  // cd = context + dynamic
171
- std::vector<llama_ngram> ngrams_cd;
171
+ std::vector<common_ngram> ngrams_cd;
172
172
  for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) {
173
173
  const int ngram_start_cd = inp_size-ngram_size_cd + draft.size()-1;
174
- llama_ngram ngram_cd;
174
+ common_ngram ngram_cd;
175
175
  for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) {
176
176
  ngram_cd.tokens[j-ngram_start_cd] = get_token(inp, draft, j);
177
177
  }
@@ -196,16 +196,16 @@ void llama_ngram_cache_draft(
196
196
  }
197
197
  }
198
198
 
199
- void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename) {
199
+ void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename) {
200
200
  std::ofstream file_out(filename, std::ios::binary);
201
- for (std::pair<llama_ngram, llama_ngram_cache_part> item : ngram_cache) {
202
- const llama_ngram ngram = item.first;
203
- llama_ngram_cache_part token_counts = item.second;
201
+ for (std::pair<common_ngram, common_ngram_cache_part> item : ngram_cache) {
202
+ const common_ngram ngram = item.first;
203
+ common_ngram_cache_part token_counts = item.second;
204
204
  GGML_ASSERT(!token_counts.empty());
205
205
  const int32_t ntokens = token_counts.size();
206
206
  GGML_ASSERT(ntokens > 0);
207
207
 
208
- file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(llama_ngram));
208
+ file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(common_ngram));
209
209
  file_out.write(reinterpret_cast<const char *>(&ntokens), sizeof(int32_t));
210
210
  for (std::pair<llama_token, int32_t> item2 : token_counts) {
211
211
  const llama_token token = item2.first;
@@ -219,14 +219,14 @@ void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filen
219
219
 
220
220
  }
221
221
 
222
- llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
222
+ common_ngram_cache common_ngram_cache_load(std::string & filename) {
223
223
  std::ifstream hashmap_file(filename, std::ios::binary);
224
224
  if (!hashmap_file) {
225
225
  throw std::ifstream::failure("Unable to open file " + filename);
226
226
  }
227
- llama_ngram_cache ngram_cache;
227
+ common_ngram_cache ngram_cache;
228
228
 
229
- llama_ngram ngram;
229
+ common_ngram ngram;
230
230
  int32_t ntokens;
231
231
  llama_token token;
232
232
  int32_t count;
@@ -235,11 +235,11 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
235
235
  char * ntokensc = reinterpret_cast<char*>(&ntokens);
236
236
  char * tokenc = reinterpret_cast<char*>(&token);
237
237
  char * countc = reinterpret_cast<char*>(&count);
238
- while(hashmap_file.read(ngramc, sizeof(llama_ngram))) {
238
+ while(hashmap_file.read(ngramc, sizeof(common_ngram))) {
239
239
  GGML_ASSERT(!hashmap_file.eof());
240
240
  GGML_ASSERT(hashmap_file.read(ntokensc, sizeof(int32_t)));
241
241
  GGML_ASSERT(ntokens > 0);
242
- llama_ngram_cache_part token_counts;
242
+ common_ngram_cache_part token_counts;
243
243
 
244
244
  for (int i = 0; i < ntokens; ++i) {
245
245
  GGML_ASSERT(!hashmap_file.eof());
@@ -257,12 +257,12 @@ llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
257
257
  return ngram_cache;
258
258
  }
259
259
 
260
- void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add) {
261
- for (std::pair<llama_ngram, llama_ngram_cache_part> ngram_part : ngram_cache_add) {
262
- const llama_ngram ngram = ngram_part.first;
263
- llama_ngram_cache_part part = ngram_part.second;
260
+ void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add) {
261
+ for (std::pair<common_ngram, common_ngram_cache_part> ngram_part : ngram_cache_add) {
262
+ const common_ngram ngram = ngram_part.first;
263
+ common_ngram_cache_part part = ngram_part.second;
264
264
 
265
- llama_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
265
+ common_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
266
266
  if (part_merged_it == ngram_cache_target.end()) {
267
267
  ngram_cache_target.emplace(ngram, part);
268
268
  continue;
@@ -273,7 +273,7 @@ void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram
273
273
  const int32_t count = token_count.second;
274
274
  GGML_ASSERT(count > 0);
275
275
 
276
- llama_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
276
+ common_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
277
277
  if (token_count_merged_it == part_merged_it->second.end()) {
278
278
  part_merged_it->second.emplace(token, count);
279
279
  continue;
@@ -12,22 +12,22 @@
12
12
 
13
13
  // Data structures to map n-grams to empirical token probabilities:
14
14
 
15
- struct llama_ngram {
15
+ struct common_ngram {
16
16
  llama_token tokens[LLAMA_NGRAM_MAX];
17
17
 
18
- llama_ngram() {
18
+ common_ngram() {
19
19
  for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
20
20
  tokens[i] = -1;
21
21
  }
22
22
  }
23
23
 
24
- llama_ngram(const llama_token * input, const int ngram_size) {
24
+ common_ngram(const llama_token * input, const int ngram_size) {
25
25
  for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
26
26
  tokens[i] = i < ngram_size ? input[i] : -1;
27
27
  }
28
28
  }
29
29
 
30
- bool operator==(const llama_ngram & other) const {
30
+ bool operator==(const common_ngram & other) const {
31
31
  for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
32
32
  if (tokens[i] != other.tokens[i]) {
33
33
  return false;
@@ -37,28 +37,28 @@ struct llama_ngram {
37
37
  }
38
38
  };
39
39
 
40
- struct llama_token_hash_function {
40
+ struct common_token_hash_function {
41
41
  size_t operator()(const llama_token token) const {
42
42
  // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
43
43
  return token * 11400714819323198485llu;
44
44
  }
45
45
  };
46
46
 
47
- struct llama_ngram_hash_function {
48
- size_t operator()(const llama_ngram & ngram) const {
49
- size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
47
+ struct common_ngram_hash_function {
48
+ size_t operator()(const common_ngram & ngram) const {
49
+ size_t hash = common_token_hash_function{}(ngram.tokens[0]);
50
50
  for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
51
- hash ^= llama_token_hash_function{}(ngram.tokens[i]);
51
+ hash ^= common_token_hash_function{}(ngram.tokens[i]);
52
52
  }
53
53
  return hash;
54
54
  }
55
55
  };
56
56
 
57
57
  // token -> number of times token has been seen
58
- typedef std::unordered_map<llama_token, int32_t> llama_ngram_cache_part;
58
+ typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
59
59
 
60
60
  // n-gram -> empirical distribution of following tokens
61
- typedef std::unordered_map<llama_ngram, llama_ngram_cache_part, llama_ngram_hash_function> llama_ngram_cache;
61
+ typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
62
62
 
63
63
 
64
64
  // Update an ngram cache with tokens.
@@ -70,8 +70,8 @@ typedef std::unordered_map<llama_ngram, llama_ngram_cache_part, llama_ngram_hash
70
70
  //
71
71
  // In order to get correct results inp_data can ONLY BE APPENDED TO.
72
72
  // Changes in the middle need a complete rebuild.
73
- void llama_ngram_cache_update(
74
- llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
73
+ void common_ngram_cache_update(
74
+ common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
75
75
 
76
76
  // Try to draft tokens from ngram caches.
77
77
  // inp: the tokens generated so far.
@@ -81,21 +81,21 @@ void llama_ngram_cache_update(
81
81
  // nc_context: ngram cache based on current context.
82
82
  // nc_dynamic: ngram cache based on previous user generations.
83
83
  // nc_static: ngram cache generated from a large text corpus, used for validation.
84
- void llama_ngram_cache_draft(
84
+ void common_ngram_cache_draft(
85
85
  std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
86
- llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static);
86
+ common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
87
87
 
88
88
  // Save an ngram cache to a file.
89
89
  // ngram_cache: the ngram cache to save.
90
90
  // filename: the path under which to save the ngram cache.
91
- void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename);
91
+ void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);
92
92
 
93
- // Load an ngram cache saved with llama_ngram_cache_save.
93
+ // Load an ngram cache saved with common_ngram_cache_save.
94
94
  // filename: the path from which to load the ngram cache.
95
95
  // returns: an ngram cache containing the information saved to filename.
96
- llama_ngram_cache llama_ngram_cache_load(std::string & filename);
96
+ common_ngram_cache common_ngram_cache_load(std::string & filename);
97
97
 
98
98
  // Merge two ngram caches.
99
99
  // ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
100
100
  // ngram_cache_add: the ngram cache to add to ngram_cache_target.
101
- void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add);
101
+ void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);