cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,167 +1,167 @@
1
- #include "llama-impl.h"
2
-
3
- #include "gguf.h"
4
- #include "llama.h"
5
-
6
- #include <cinttypes>
7
- #include <climits>
8
- #include <cstdarg>
9
- #include <cstring>
10
- #include <vector>
11
- #include <sstream>
12
-
13
- struct llama_logger_state {
14
- lm_ggml_log_callback log_callback = llama_log_callback_default;
15
- void * log_callback_user_data = nullptr;
16
- };
17
-
18
- static llama_logger_state g_logger_state;
19
-
20
- time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : lm_ggml_time_us()), t_acc(t_acc) {}
21
-
22
- time_meas::~time_meas() {
23
- if (t_start_us >= 0) {
24
- t_acc += lm_ggml_time_us() - t_start_us;
25
- }
26
- }
27
-
28
- void llama_log_set(lm_ggml_log_callback log_callback, void * user_data) {
29
- lm_ggml_log_set(log_callback, user_data);
30
- g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
31
- g_logger_state.log_callback_user_data = user_data;
32
- }
33
-
34
- static void llama_log_internal_v(lm_ggml_log_level level, const char * format, va_list args) {
35
- va_list args_copy;
36
- va_copy(args_copy, args);
37
- char buffer[128];
38
- int len = vsnprintf(buffer, 128, format, args);
39
- if (len < 128) {
40
- g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
41
- } else {
42
- char * buffer2 = new char[len + 1];
43
- vsnprintf(buffer2, len + 1, format, args_copy);
44
- buffer2[len] = 0;
45
- g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
46
- delete[] buffer2;
47
- }
48
- va_end(args_copy);
49
- }
50
-
51
- void llama_log_internal(lm_ggml_log_level level, const char * format, ...) {
52
- va_list args;
53
- va_start(args, format);
54
- llama_log_internal_v(level, format, args);
55
- va_end(args);
56
- }
57
-
58
- void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data) {
59
- (void) level;
60
- (void) user_data;
61
- fputs(text, stderr);
62
- fflush(stderr);
63
- }
64
-
65
- void replace_all(std::string & s, const std::string & search, const std::string & replace) {
66
- if (search.empty()) {
67
- return;
68
- }
69
- std::string builder;
70
- builder.reserve(s.length());
71
- size_t pos = 0;
72
- size_t last_pos = 0;
73
- while ((pos = s.find(search, last_pos)) != std::string::npos) {
74
- builder.append(s, last_pos, pos - last_pos);
75
- builder.append(replace);
76
- last_pos = pos + search.length();
77
- }
78
- builder.append(s, last_pos, std::string::npos);
79
- s = std::move(builder);
80
- }
81
-
82
- std::string format(const char * fmt, ...) {
83
- va_list ap;
84
- va_list ap2;
85
- va_start(ap, fmt);
86
- va_copy(ap2, ap);
87
- int size = vsnprintf(NULL, 0, fmt, ap);
88
- LM_GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
89
- std::vector<char> buf(size + 1);
90
- int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
91
- LM_GGML_ASSERT(size2 == size);
92
- va_end(ap2);
93
- va_end(ap);
94
- return std::string(buf.data(), size);
95
- }
96
-
97
- std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
98
- char buf[256];
99
- snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
100
- for (size_t i = 1; i < ne.size(); i++) {
101
- snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
102
- }
103
- return buf;
104
- }
105
-
106
- std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t) {
107
- char buf[256];
108
- snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
109
- for (int i = 1; i < LM_GGML_MAX_DIMS; i++) {
110
- snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
111
- }
112
- return buf;
113
- }
114
-
115
- static std::string lm_gguf_data_to_str(enum lm_gguf_type type, const void * data, int i) {
116
- switch (type) {
117
- case LM_GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
118
- case LM_GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
119
- case LM_GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
120
- case LM_GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
121
- case LM_GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
122
- case LM_GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
123
- case LM_GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
124
- case LM_GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
125
- case LM_GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
126
- case LM_GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
127
- case LM_GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
128
- default: return format("unknown type %d", type);
129
- }
130
- }
131
-
132
- std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i) {
133
- const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i);
134
-
135
- switch (type) {
136
- case LM_GGUF_TYPE_STRING:
137
- return lm_gguf_get_val_str(ctx_gguf, i);
138
- case LM_GGUF_TYPE_ARRAY:
139
- {
140
- const enum lm_gguf_type arr_type = lm_gguf_get_arr_type(ctx_gguf, i);
141
- int arr_n = lm_gguf_get_arr_n(ctx_gguf, i);
142
- const void * data = arr_type == LM_GGUF_TYPE_STRING ? nullptr : lm_gguf_get_arr_data(ctx_gguf, i);
143
- std::stringstream ss;
144
- ss << "[";
145
- for (int j = 0; j < arr_n; j++) {
146
- if (arr_type == LM_GGUF_TYPE_STRING) {
147
- std::string val = lm_gguf_get_arr_str(ctx_gguf, i, j);
148
- // escape quotes
149
- replace_all(val, "\\", "\\\\");
150
- replace_all(val, "\"", "\\\"");
151
- ss << '"' << val << '"';
152
- } else if (arr_type == LM_GGUF_TYPE_ARRAY) {
153
- ss << "???";
154
- } else {
155
- ss << lm_gguf_data_to_str(arr_type, data, j);
156
- }
157
- if (j < arr_n - 1) {
158
- ss << ", ";
159
- }
160
- }
161
- ss << "]";
162
- return ss.str();
163
- }
164
- default:
165
- return lm_gguf_data_to_str(type, lm_gguf_get_val_data(ctx_gguf, i), 0);
166
- }
167
- }
1
+ #include "llama-impl.h"
2
+
3
+ #include "gguf.h"
4
+ #include "llama.h"
5
+
6
+ #include <cinttypes>
7
+ #include <climits>
8
+ #include <cstdarg>
9
+ #include <cstring>
10
+ #include <vector>
11
+ #include <sstream>
12
+
13
+ struct llama_logger_state {
14
+ lm_ggml_log_callback log_callback = llama_log_callback_default;
15
+ void * log_callback_user_data = nullptr;
16
+ };
17
+
18
+ static llama_logger_state g_logger_state;
19
+
20
+ time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : lm_ggml_time_us()), t_acc(t_acc) {}
21
+
22
+ time_meas::~time_meas() {
23
+ if (t_start_us >= 0) {
24
+ t_acc += lm_ggml_time_us() - t_start_us;
25
+ }
26
+ }
27
+
28
+ void llama_log_set(lm_ggml_log_callback log_callback, void * user_data) {
29
+ lm_ggml_log_set(log_callback, user_data);
30
+ g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
31
+ g_logger_state.log_callback_user_data = user_data;
32
+ }
33
+
34
+ static void llama_log_internal_v(lm_ggml_log_level level, const char * format, va_list args) {
35
+ va_list args_copy;
36
+ va_copy(args_copy, args);
37
+ char buffer[128];
38
+ int len = vsnprintf(buffer, 128, format, args);
39
+ if (len < 128) {
40
+ g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
41
+ } else {
42
+ char * buffer2 = new char[len + 1];
43
+ vsnprintf(buffer2, len + 1, format, args_copy);
44
+ buffer2[len] = 0;
45
+ g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
46
+ delete[] buffer2;
47
+ }
48
+ va_end(args_copy);
49
+ }
50
+
51
+ void llama_log_internal(lm_ggml_log_level level, const char * format, ...) {
52
+ va_list args;
53
+ va_start(args, format);
54
+ llama_log_internal_v(level, format, args);
55
+ va_end(args);
56
+ }
57
+
58
+ void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data) {
59
+ (void) level;
60
+ (void) user_data;
61
+ fputs(text, stderr);
62
+ fflush(stderr);
63
+ }
64
+
65
+ void replace_all(std::string & s, const std::string & search, const std::string & replace) {
66
+ if (search.empty()) {
67
+ return;
68
+ }
69
+ std::string builder;
70
+ builder.reserve(s.length());
71
+ size_t pos = 0;
72
+ size_t last_pos = 0;
73
+ while ((pos = s.find(search, last_pos)) != std::string::npos) {
74
+ builder.append(s, last_pos, pos - last_pos);
75
+ builder.append(replace);
76
+ last_pos = pos + search.length();
77
+ }
78
+ builder.append(s, last_pos, std::string::npos);
79
+ s = std::move(builder);
80
+ }
81
+
82
+ std::string format(const char * fmt, ...) {
83
+ va_list ap;
84
+ va_list ap2;
85
+ va_start(ap, fmt);
86
+ va_copy(ap2, ap);
87
+ int size = vsnprintf(NULL, 0, fmt, ap);
88
+ LM_GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
89
+ std::vector<char> buf(size + 1);
90
+ int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
91
+ LM_GGML_ASSERT(size2 == size);
92
+ va_end(ap2);
93
+ va_end(ap);
94
+ return std::string(buf.data(), size);
95
+ }
96
+
97
+ std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
98
+ char buf[256];
99
+ snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
100
+ for (size_t i = 1; i < ne.size(); i++) {
101
+ snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
102
+ }
103
+ return buf;
104
+ }
105
+
106
+ std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t) {
107
+ char buf[256];
108
+ snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
109
+ for (int i = 1; i < LM_GGML_MAX_DIMS; i++) {
110
+ snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
111
+ }
112
+ return buf;
113
+ }
114
+
115
+ static std::string lm_gguf_data_to_str(enum lm_gguf_type type, const void * data, int i) {
116
+ switch (type) {
117
+ case LM_GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
118
+ case LM_GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
119
+ case LM_GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
120
+ case LM_GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
121
+ case LM_GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
122
+ case LM_GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
123
+ case LM_GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
124
+ case LM_GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
125
+ case LM_GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
126
+ case LM_GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
127
+ case LM_GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
128
+ default: return format("unknown type %d", type);
129
+ }
130
+ }
131
+
132
+ std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i) {
133
+ const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i);
134
+
135
+ switch (type) {
136
+ case LM_GGUF_TYPE_STRING:
137
+ return lm_gguf_get_val_str(ctx_gguf, i);
138
+ case LM_GGUF_TYPE_ARRAY:
139
+ {
140
+ const enum lm_gguf_type arr_type = lm_gguf_get_arr_type(ctx_gguf, i);
141
+ int arr_n = lm_gguf_get_arr_n(ctx_gguf, i);
142
+ const void * data = arr_type == LM_GGUF_TYPE_STRING ? nullptr : lm_gguf_get_arr_data(ctx_gguf, i);
143
+ std::stringstream ss;
144
+ ss << "[";
145
+ for (int j = 0; j < arr_n; j++) {
146
+ if (arr_type == LM_GGUF_TYPE_STRING) {
147
+ std::string val = lm_gguf_get_arr_str(ctx_gguf, i, j);
148
+ // escape quotes
149
+ replace_all(val, "\\", "\\\\");
150
+ replace_all(val, "\"", "\\\"");
151
+ ss << '"' << val << '"';
152
+ } else if (arr_type == LM_GGUF_TYPE_ARRAY) {
153
+ ss << "???";
154
+ } else {
155
+ ss << lm_gguf_data_to_str(arr_type, data, j);
156
+ }
157
+ if (j < arr_n - 1) {
158
+ ss << ", ";
159
+ }
160
+ }
161
+ ss << "]";
162
+ return ss.str();
163
+ }
164
+ default:
165
+ return lm_gguf_data_to_str(type, lm_gguf_get_val_data(ctx_gguf, i), 0);
166
+ }
167
+ }
package/cpp/llama-impl.h CHANGED
@@ -1,61 +1,61 @@
1
- #pragma once
2
-
3
- #include "ggml.h" // for lm_ggml_log_level
4
-
5
- #include <string>
6
- #include <vector>
7
-
8
- #ifdef __GNUC__
9
- # if defined(__MINGW32__) && !defined(__clang__)
10
- # define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
11
- # else
12
- # define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
13
- # endif
14
- #else
15
- # define LLAMA_ATTRIBUTE_FORMAT(...)
16
- #endif
17
-
18
- //
19
- // logging
20
- //
21
-
22
- LLAMA_ATTRIBUTE_FORMAT(2, 3)
23
- void llama_log_internal (lm_ggml_log_level level, const char * format, ...);
24
- void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
25
-
26
- #define LLAMA_LOG(...) llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
27
- #define LLAMA_LOG_INFO(...) llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
28
- #define LLAMA_LOG_WARN(...) llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
29
- #define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
30
- #define LLAMA_LOG_DEBUG(...) llama_log_internal(LM_GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
31
- #define LLAMA_LOG_CONT(...) llama_log_internal(LM_GGML_LOG_LEVEL_CONT , __VA_ARGS__)
32
-
33
- //
34
- // helpers
35
- //
36
-
37
- template <typename T>
38
- struct no_init {
39
- T value;
40
- no_init() { /* do nothing */ }
41
- };
42
-
43
- struct time_meas {
44
- time_meas(int64_t & t_acc, bool disable = false);
45
- ~time_meas();
46
-
47
- const int64_t t_start_us;
48
-
49
- int64_t & t_acc;
50
- };
51
-
52
- void replace_all(std::string & s, const std::string & search, const std::string & replace);
53
-
54
- // TODO: rename to llama_format ?
55
- LLAMA_ATTRIBUTE_FORMAT(1, 2)
56
- std::string format(const char * fmt, ...);
57
-
58
- std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
59
- std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t);
60
-
61
- std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i);
1
+ #pragma once
2
+
3
+ #include "ggml.h" // for lm_ggml_log_level
4
+
5
+ #include <string>
6
+ #include <vector>
7
+
8
+ #ifdef __GNUC__
9
+ # if defined(__MINGW32__) && !defined(__clang__)
10
+ # define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
11
+ # else
12
+ # define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
13
+ # endif
14
+ #else
15
+ # define LLAMA_ATTRIBUTE_FORMAT(...)
16
+ #endif
17
+
18
+ //
19
+ // logging
20
+ //
21
+
22
+ LLAMA_ATTRIBUTE_FORMAT(2, 3)
23
+ void llama_log_internal (lm_ggml_log_level level, const char * format, ...);
24
+ void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data);
25
+
26
+ #define LLAMA_LOG(...) llama_log_internal(LM_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
27
+ #define LLAMA_LOG_INFO(...) llama_log_internal(LM_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
28
+ #define LLAMA_LOG_WARN(...) llama_log_internal(LM_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
29
+ #define LLAMA_LOG_ERROR(...) llama_log_internal(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
30
+ #define LLAMA_LOG_DEBUG(...) llama_log_internal(LM_GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
31
+ #define LLAMA_LOG_CONT(...) llama_log_internal(LM_GGML_LOG_LEVEL_CONT , __VA_ARGS__)
32
+
33
+ //
34
+ // helpers
35
+ //
36
+
37
+ template <typename T>
38
+ struct no_init {
39
+ T value;
40
+ no_init() { /* do nothing */ }
41
+ };
42
+
43
+ struct time_meas {
44
+ time_meas(int64_t & t_acc, bool disable = false);
45
+ ~time_meas();
46
+
47
+ const int64_t t_start_us;
48
+
49
+ int64_t & t_acc;
50
+ };
51
+
52
+ void replace_all(std::string & s, const std::string & search, const std::string & replace);
53
+
54
+ // TODO: rename to llama_format ?
55
+ LLAMA_ATTRIBUTE_FORMAT(1, 2)
56
+ std::string format(const char * fmt, ...);
57
+
58
+ std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
59
+ std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t);
60
+
61
+ std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i);
@@ -0,0 +1,15 @@
1
+ #include "llama-io.h"
2
+
3
+ void llama_io_write_i::write_string(const std::string & str) {
4
+ uint32_t str_size = str.size();
5
+
6
+ write(&str_size, sizeof(str_size));
7
+ write(str.data(), str_size);
8
+ }
9
+
10
+ void llama_io_read_i::read_string(std::string & str) {
11
+ uint32_t str_size;
12
+ read_to(&str_size, sizeof(str_size));
13
+
14
+ str.assign((const char *) read(str_size), str_size);
15
+ }
package/cpp/llama-io.h ADDED
@@ -0,0 +1,35 @@
1
+ #pragma once
2
+
3
+ #include <cstddef>
4
+ #include <cstdint>
5
+ #include <string>
6
+
7
+ struct lm_ggml_tensor;
8
+
9
+ class llama_io_write_i {
10
+ public:
11
+ llama_io_write_i() = default;
12
+ virtual ~llama_io_write_i() = default;
13
+
14
+ virtual void write(const void * src, size_t size) = 0;
15
+ virtual void write_tensor(const lm_ggml_tensor * tensor, size_t offset, size_t size) = 0;
16
+
17
+ // bytes written so far
18
+ virtual size_t n_bytes() = 0;
19
+
20
+ void write_string(const std::string & str);
21
+ };
22
+
23
+ class llama_io_read_i {
24
+ public:
25
+ llama_io_read_i() = default;
26
+ virtual ~llama_io_read_i() = default;
27
+
28
+ virtual const uint8_t * read(size_t size) = 0;
29
+ virtual void read_to(void * dst, size_t size) = 0;
30
+
31
+ // bytes read so far
32
+ virtual size_t n_bytes() = 0;
33
+
34
+ void read_string(std::string & str);
35
+ };