cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,590 +1,600 @@
1
- #include "llama-mmap.h"
2
-
3
- #include "llama-impl.h"
4
-
5
- #include "ggml.h"
6
-
7
- #include <cstring>
8
- #include <climits>
9
- #include <stdexcept>
10
- #include <cerrno>
11
-
12
- #ifdef __has_include
13
- #if __has_include(<unistd.h>)
14
- #include <unistd.h>
15
- #if defined(_POSIX_MAPPED_FILES)
16
- #include <sys/mman.h>
17
- #include <fcntl.h>
18
- #endif
19
- #if defined(_POSIX_MEMLOCK_RANGE)
20
- #include <sys/resource.h>
21
- #endif
22
- #endif
23
- #endif
24
-
25
- #if defined(_WIN32)
26
- #define WIN32_LEAN_AND_MEAN
27
- #ifndef NOMINMAX
28
- #define NOMINMAX
29
- #endif
30
- #include <windows.h>
31
- #ifndef PATH_MAX
32
- #define PATH_MAX MAX_PATH
33
- #endif
34
- #include <io.h>
35
- #endif
36
-
37
- // TODO: consider moving to llama-impl.h if needed in more places
38
- #if defined(_WIN32)
39
- static std::string llama_format_win_err(DWORD err) {
40
- LPSTR buf;
41
- size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
42
- NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
43
- if (!size) {
44
- return "FormatMessageA failed";
45
- }
46
- std::string ret(buf, size);
47
- LocalFree(buf);
48
- return ret;
49
- }
50
- #endif
51
-
52
- // llama_file
53
-
54
- struct llama_file::impl {
55
- #if defined(_WIN32)
56
- HANDLE fp_win32;
57
- std::string GetErrorMessageWin32(DWORD error_code) const {
58
- std::string ret;
59
- LPSTR lpMsgBuf = NULL;
60
- DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
61
- NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL);
62
- if (!bufLen) {
63
- ret = format("Win32 error code: %lx", error_code);
64
- } else {
65
- ret = lpMsgBuf;
66
- LocalFree(lpMsgBuf);
67
- }
68
-
69
- return ret;
70
- }
71
-
72
- impl(const char * fname, const char * mode) {
73
- fp = lm_ggml_fopen(fname, mode);
74
- if (fp == NULL) {
75
- throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
76
- }
77
- fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
78
- seek(0, SEEK_END);
79
- size = tell();
80
- seek(0, SEEK_SET);
81
- }
82
-
83
- size_t tell() const {
84
- LARGE_INTEGER li;
85
- li.QuadPart = 0;
86
- BOOL ret = SetFilePointerEx(fp_win32, li, &li, FILE_CURRENT);
87
- if (!ret) {
88
- throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
89
- }
90
-
91
- return li.QuadPart;
92
- }
93
-
94
- void seek(size_t offset, int whence) const {
95
- static_assert(SEEK_SET == FILE_BEGIN, "SEEK_SET != FILE_BEGIN");
96
- static_assert(SEEK_CUR == FILE_CURRENT, "SEEK_CUR != FILE_CURRENT");
97
- static_assert(SEEK_END == FILE_END, "SEEK_END != FILE_END");
98
-
99
- LARGE_INTEGER li;
100
- li.QuadPart = offset;
101
- BOOL ret = SetFilePointerEx(fp_win32, li, NULL, whence);
102
- if (!ret) {
103
- throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
104
- }
105
- }
106
-
107
- void read_raw(void * ptr, size_t len) const {
108
- size_t bytes_read = 0;
109
- while (bytes_read < len) {
110
- size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
111
- DWORD chunk_read = 0;
112
- BOOL result = ReadFile(fp_win32, reinterpret_cast<char*>(ptr) + bytes_read, chunk_size, &chunk_read, NULL);
113
- if (!result) {
114
- throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
115
- }
116
- if (chunk_read < chunk_size || chunk_read == 0) {
117
- throw std::runtime_error("unexpectedly reached end of file");
118
- }
119
-
120
- bytes_read += chunk_read;
121
- }
122
- }
123
-
124
- uint32_t read_u32() const {
125
- uint32_t val;
126
- read_raw(&val, sizeof(val));
127
- return val;
128
- }
129
-
130
- void write_raw(const void * ptr, size_t len) const {
131
- size_t bytes_written = 0;
132
- while (bytes_written < len) {
133
- size_t chunk_size = std::min<size_t>(len - bytes_written, 64*1024*1024);
134
- DWORD chunk_written = 0;
135
- BOOL result = WriteFile(fp_win32, reinterpret_cast<char const*>(ptr) + bytes_written, chunk_size, &chunk_written, NULL);
136
- if (!result) {
137
- throw std::runtime_error(format("write error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
138
- }
139
- if (chunk_written < chunk_size || chunk_written == 0) {
140
- throw std::runtime_error("unexpectedly failed to write bytes");
141
- }
142
-
143
- bytes_written += chunk_written;
144
- }
145
- }
146
-
147
- void write_u32(uint32_t val) const {
148
- write_raw(&val, sizeof(val));
149
- }
150
-
151
- ~impl() {
152
- if (fp) {
153
- std::fclose(fp);
154
- }
155
- }
156
- #else
157
- impl(const char * fname, const char * mode) {
158
- fp = lm_ggml_fopen(fname, mode);
159
- if (fp == NULL) {
160
- throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
161
- }
162
- seek(0, SEEK_END);
163
- size = tell();
164
- seek(0, SEEK_SET);
165
- }
166
-
167
- size_t tell() const {
168
- // TODO: this ifdef is never true?
169
- #ifdef _WIN32
170
- __int64 ret = _ftelli64(fp);
171
- #else
172
- long ret = std::ftell(fp);
173
- #endif
174
- if (ret == -1) {
175
- throw std::runtime_error(format("ftell error: %s", strerror(errno)));
176
- }
177
-
178
- return (size_t) ret;
179
- }
180
-
181
- void seek(size_t offset, int whence) const {
182
- // TODO: this ifdef is never true?
183
- #ifdef _WIN32
184
- int ret = _fseeki64(fp, (__int64) offset, whence);
185
- #else
186
- int ret = std::fseek(fp, (long) offset, whence);
187
- #endif
188
- if (ret != 0) {
189
- throw std::runtime_error(format("seek error: %s", strerror(errno)));
190
- }
191
- }
192
-
193
- void read_raw(void * ptr, size_t len) const {
194
- if (len == 0) {
195
- return;
196
- }
197
- errno = 0;
198
- std::size_t ret = std::fread(ptr, len, 1, fp);
199
- if (ferror(fp)) {
200
- throw std::runtime_error(format("read error: %s", strerror(errno)));
201
- }
202
- if (ret != 1) {
203
- throw std::runtime_error("unexpectedly reached end of file");
204
- }
205
- }
206
-
207
- uint32_t read_u32() const {
208
- uint32_t ret;
209
- read_raw(&ret, sizeof(ret));
210
- return ret;
211
- }
212
-
213
- void write_raw(const void * ptr, size_t len) const {
214
- if (len == 0) {
215
- return;
216
- }
217
- errno = 0;
218
- size_t ret = std::fwrite(ptr, len, 1, fp);
219
- if (ret != 1) {
220
- throw std::runtime_error(format("write error: %s", strerror(errno)));
221
- }
222
- }
223
-
224
- void write_u32(uint32_t val) const {
225
- write_raw(&val, sizeof(val));
226
- }
227
-
228
- ~impl() {
229
- if (fp) {
230
- std::fclose(fp);
231
- }
232
- }
233
- #endif
234
-
235
- FILE * fp;
236
- size_t size;
237
- };
238
-
239
- llama_file::llama_file(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
240
- llama_file::~llama_file() = default;
241
-
242
- size_t llama_file::tell() const { return pimpl->tell(); }
243
- size_t llama_file::size() const { return pimpl->size; }
244
-
245
- int llama_file::file_id() const {
246
- #ifdef _WIN32
247
- return _fileno(pimpl->fp);
248
- #else
249
- #if defined(fileno)
250
- return fileno(pimpl->fp);
251
- #else
252
- return ::fileno(pimpl->fp);
253
- #endif
254
- #endif
255
- }
256
-
257
- void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
258
- void llama_file::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); }
259
-
260
- uint32_t llama_file::read_u32() const { return pimpl->read_u32(); }
261
-
262
- void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
263
- void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
264
-
265
- // llama_mmap
266
-
267
- struct llama_mmap::impl {
268
- #ifdef _POSIX_MAPPED_FILES
269
- std::vector<std::pair<size_t, size_t>> mapped_fragments;
270
-
271
- impl(struct llama_file * file, size_t prefetch, bool numa) {
272
- size = file->size();
273
- int fd = file->file_id();
274
- int flags = MAP_SHARED;
275
- if (numa) { prefetch = 0; }
276
- #ifdef __linux__
277
- if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
278
- LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
279
- strerror(errno));
280
- }
281
- if (prefetch) { flags |= MAP_POPULATE; }
282
- #endif
283
- addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
284
- if (addr == MAP_FAILED) {
285
- throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
286
- }
287
-
288
- if (prefetch > 0) {
289
- if (madvise(addr, std::min(file->size(), prefetch), MADV_WILLNEED)) {
290
- fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
291
- strerror(errno));
292
- }
293
- }
294
- if (numa) {
295
- if (madvise(addr, file->size(), MADV_RANDOM)) {
296
- fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
297
- strerror(errno));
298
- }
299
- }
300
-
301
- mapped_fragments.emplace_back(0, file->size());
302
- }
303
-
304
- static void align_range(size_t * first, size_t * last, size_t page_size) {
305
- size_t offset_in_page = *first & (page_size - 1);
306
- size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
307
- *first += offset_to_page;
308
-
309
- *last = *last & ~(page_size - 1);
310
-
311
- if (*last <= *first) {
312
- *last = *first;
313
- }
314
- }
315
-
316
- void unmap_fragment(size_t first, size_t last) {
317
- int page_size = sysconf(_SC_PAGESIZE);
318
- align_range(&first, &last, page_size);
319
- size_t len = last - first;
320
-
321
- if (len == 0) {
322
- return;
323
- }
324
-
325
- LM_GGML_ASSERT(first % page_size == 0);
326
- LM_GGML_ASSERT(last % page_size == 0);
327
- LM_GGML_ASSERT(last > first);
328
-
329
- void * next_page_start = (uint8_t *) addr + first;
330
-
331
- if (munmap(next_page_start, len)) {
332
- LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
333
- }
334
-
335
- std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
336
- for (const auto & frag : mapped_fragments) {
337
- if (frag.first < first && frag.second > last) {
338
- new_mapped_fragments.emplace_back(frag.first, first);
339
- new_mapped_fragments.emplace_back(last, frag.second);
340
- } else if (frag.first < first && frag.second > first) {
341
- new_mapped_fragments.emplace_back(frag.first, first);
342
- } else if (frag.first < last && frag.second > last) {
343
- new_mapped_fragments.emplace_back(last, frag.second);
344
- } else if (frag.first >= first && frag.second <= last) {
345
- } else {
346
- new_mapped_fragments.push_back(frag);
347
- }
348
- }
349
- mapped_fragments = std::move(new_mapped_fragments);
350
- }
351
-
352
- ~impl() {
353
- for (const auto & frag : mapped_fragments) {
354
- if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
355
- LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
356
- }
357
- }
358
- }
359
- #elif defined(_WIN32)
360
- impl(struct llama_file * file, size_t prefetch, bool numa) {
361
- LM_GGML_UNUSED(numa);
362
-
363
- size = file->size();
364
-
365
- HANDLE hFile = (HANDLE) _get_osfhandle(file->file_id());
366
-
367
- HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
368
-
369
- if (hMapping == NULL) {
370
- DWORD error = GetLastError();
371
- throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
372
- }
373
-
374
- addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
375
- DWORD error = GetLastError();
376
- CloseHandle(hMapping);
377
-
378
- if (addr == NULL) {
379
- throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
380
- }
381
-
382
- if (prefetch > 0) {
383
- #if _WIN32_WINNT >= 0x602
384
- BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
385
- HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
386
-
387
- pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory");
388
-
389
- if (pPrefetchVirtualMemory) {
390
- WIN32_MEMORY_RANGE_ENTRY range;
391
- range.VirtualAddress = addr;
392
- range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
393
- if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
394
- LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
395
- llama_format_win_err(GetLastError()).c_str());
396
- }
397
- }
398
- #else
399
- throw std::runtime_error("PrefetchVirtualMemory unavailable");
400
- #endif
401
- }
402
- }
403
-
404
- void unmap_fragment(size_t first, size_t last) {
405
- LM_GGML_UNUSED(first);
406
- LM_GGML_UNUSED(last);
407
- }
408
-
409
- ~impl() {
410
- if (!UnmapViewOfFile(addr)) {
411
- LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
412
- llama_format_win_err(GetLastError()).c_str());
413
- }
414
- }
415
- #else
416
- impl(struct llama_file * file, size_t prefetch, bool numa) {
417
- LM_GGML_UNUSED(file);
418
- LM_GGML_UNUSED(prefetch);
419
- LM_GGML_UNUSED(numa);
420
-
421
- throw std::runtime_error("mmap not supported");
422
- }
423
-
424
- void unmap_fragment(size_t first, size_t last) {
425
- LM_GGML_UNUSED(first);
426
- LM_GGML_UNUSED(last);
427
-
428
- throw std::runtime_error("mmap not supported");
429
- }
430
- #endif
431
-
432
- void * addr;
433
- size_t size;
434
- };
435
-
436
- llama_mmap::llama_mmap(struct llama_file * file, size_t prefetch, bool numa) : pimpl(std::make_unique<impl>(file, prefetch, numa)) {}
437
- llama_mmap::~llama_mmap() = default;
438
-
439
- size_t llama_mmap::size() const { return pimpl->size; }
440
- void * llama_mmap::addr() const { return pimpl->addr; }
441
-
442
- void llama_mmap::unmap_fragment(size_t first, size_t last) { pimpl->unmap_fragment(first, last); }
443
-
444
- #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
445
- const bool llama_mmap::SUPPORTED = true;
446
- #else
447
- const bool llama_mmap::SUPPORTED = false;
448
- #endif
449
-
450
- // llama_mlock
451
-
452
- struct llama_mlock::impl {
453
- #ifdef _POSIX_MEMLOCK_RANGE
454
- static size_t lock_granularity() {
455
- return (size_t) sysconf(_SC_PAGESIZE);
456
- }
457
-
458
- bool raw_lock(const void * addr, size_t size) const {
459
- if (!mlock(addr, size)) {
460
- return true;
461
- }
462
-
463
- #ifdef __APPLE__
464
- #define MLOCK_SUGGESTION \
465
- "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
466
- "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MEMLOCK (ulimit -l).\n"
467
- #else
468
- #define MLOCK_SUGGESTION \
469
- "Try increasing RLIMIT_MEMLOCK ('ulimit -l' as root).\n"
470
- #endif
471
-
472
- char* errmsg = std::strerror(errno);
473
- bool suggest = (errno == ENOMEM);
474
-
475
- struct rlimit lock_limit;
476
- if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
477
- suggest = false;
478
- }
479
- if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
480
- suggest = false;
481
- }
482
-
483
- LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
484
- size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
485
- return false;
486
- }
487
-
488
- static void raw_unlock(void * addr, size_t size) {
489
- if (munlock(addr, size)) {
490
- LLAMA_LOG_WARN("warning: failed to munlock buffer: %s\n", std::strerror(errno));
491
- }
492
- }
493
- #elif defined(_WIN32)
494
- static size_t lock_granularity() {
495
- SYSTEM_INFO si;
496
- GetSystemInfo(&si);
497
- return (size_t) si.dwPageSize;
498
- }
499
-
500
- bool raw_lock(void * ptr, size_t len) const {
501
- for (int tries = 1; ; tries++) {
502
- if (VirtualLock(ptr, len)) {
503
- return true;
504
- }
505
- if (tries == 2) {
506
- LLAMA_LOG_WARN("warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
507
- len, size, llama_format_win_err(GetLastError()).c_str());
508
- return false;
509
- }
510
-
511
- SIZE_T min_ws_size, max_ws_size;
512
- if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
513
- LLAMA_LOG_WARN("warning: GetProcessWorkingSetSize failed: %s\n",
514
- llama_format_win_err(GetLastError()).c_str());
515
- return false;
516
- }
517
- size_t increment = len + 1048576;
518
- min_ws_size += increment;
519
- max_ws_size += increment;
520
- if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
521
- LLAMA_LOG_WARN("warning: SetProcessWorkingSetSize failed: %s\n",
522
- llama_format_win_err(GetLastError()).c_str());
523
- return false;
524
- }
525
- }
526
- }
527
-
528
- static void raw_unlock(void * ptr, size_t len) {
529
- if (!VirtualUnlock(ptr, len)) {
530
- LLAMA_LOG_WARN("warning: failed to VirtualUnlock buffer: %s\n",
531
- llama_format_win_err(GetLastError()).c_str());
532
- }
533
- }
534
- #else
535
- static size_t lock_granularity() {
536
- return (size_t) 65536;
537
- }
538
-
539
- bool raw_lock(const void * addr, size_t len) const {
540
- LLAMA_LOG_WARN("warning: mlock not supported on this system\n");
541
- return false;
542
- }
543
-
544
- static void raw_unlock(const void * addr, size_t len) {}
545
- #endif
546
-
547
- impl() : addr(NULL), size(0), failed_already(false) {}
548
-
549
- void init(void * ptr) {
550
- LM_GGML_ASSERT(addr == NULL && size == 0);
551
- addr = ptr;
552
- }
553
-
554
- void grow_to(size_t target_size) {
555
- LM_GGML_ASSERT(addr);
556
- if (failed_already) {
557
- return;
558
- }
559
- size_t granularity = lock_granularity();
560
- target_size = (target_size + granularity - 1) & ~(granularity - 1);
561
- if (target_size > size) {
562
- if (raw_lock((uint8_t *) addr + size, target_size - size)) {
563
- size = target_size;
564
- } else {
565
- failed_already = true;
566
- }
567
- }
568
- }
569
-
570
- void * addr;
571
- size_t size;
572
-
573
- bool failed_already;
574
- };
575
-
576
- llama_mlock::llama_mlock() : pimpl(std::make_unique<impl>()) {}
577
- llama_mlock::~llama_mlock() = default;
578
-
579
- void llama_mlock::init(void * ptr) { pimpl->init(ptr); }
580
- void llama_mlock::grow_to(size_t target_size) { pimpl->grow_to(target_size); }
581
-
582
- #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
583
- const bool llama_mlock::SUPPORTED = true;
584
- #else
585
- const bool llama_mlock::SUPPORTED = false;
586
- #endif
587
-
588
- size_t llama_path_max() {
589
- return PATH_MAX;
590
- }
1
+ #include "llama-mmap.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include "ggml.h"
6
+
7
+ #include <cstring>
8
+ #include <climits>
9
+ #include <stdexcept>
10
+ #include <cerrno>
11
+ #include <algorithm>
12
+
13
+ #ifdef __has_include
14
+ #if __has_include(<unistd.h>)
15
+ #include <unistd.h>
16
+ #if defined(_POSIX_MAPPED_FILES)
17
+ #include <sys/mman.h>
18
+ #include <fcntl.h>
19
+ #endif
20
+ #if defined(_POSIX_MEMLOCK_RANGE)
21
+ #include <sys/resource.h>
22
+ #endif
23
+ #endif
24
+ #endif
25
+
26
+ #if defined(_WIN32)
27
+ #define WIN32_LEAN_AND_MEAN
28
+ #ifndef NOMINMAX
29
+ #define NOMINMAX
30
+ #endif
31
+ #include <windows.h>
32
+ #ifndef PATH_MAX
33
+ #define PATH_MAX MAX_PATH
34
+ #endif
35
+ #include <io.h>
36
+ #endif
37
+
38
+ #if defined(__APPLE__)
39
+ #include <TargetConditionals.h>
40
+ #endif
41
+
42
+ // TODO: consider moving to llama-impl.h if needed in more places
43
+ #if defined(_WIN32)
44
+ static std::string llama_format_win_err(DWORD err) {
45
+ LPSTR buf;
46
+ size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
47
+ NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
48
+ if (!size) {
49
+ return "FormatMessageA failed";
50
+ }
51
+ std::string ret(buf, size);
52
+ LocalFree(buf);
53
+ return ret;
54
+ }
55
+ #endif
56
+
57
+ // llama_file
58
+
59
+ struct llama_file::impl {
60
+ #if defined(_WIN32)
61
+ HANDLE fp_win32;
62
+ std::string GetErrorMessageWin32(DWORD error_code) const {
63
+ std::string ret;
64
+ LPSTR lpMsgBuf = NULL;
65
+ DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
66
+ NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL);
67
+ if (!bufLen) {
68
+ ret = format("Win32 error code: %lx", error_code);
69
+ } else {
70
+ ret = lpMsgBuf;
71
+ LocalFree(lpMsgBuf);
72
+ }
73
+
74
+ return ret;
75
+ }
76
+
77
+ impl(const char * fname, const char * mode) {
78
+ fp = lm_ggml_fopen(fname, mode);
79
+ if (fp == NULL) {
80
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
81
+ }
82
+ fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
83
+ seek(0, SEEK_END);
84
+ size = tell();
85
+ seek(0, SEEK_SET);
86
+ }
87
+
88
+ size_t tell() const {
89
+ LARGE_INTEGER li;
90
+ li.QuadPart = 0;
91
+ BOOL ret = SetFilePointerEx(fp_win32, li, &li, FILE_CURRENT);
92
+ if (!ret) {
93
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
94
+ }
95
+
96
+ return li.QuadPart;
97
+ }
98
+
99
+ void seek(size_t offset, int whence) const {
100
+ static_assert(SEEK_SET == FILE_BEGIN, "SEEK_SET != FILE_BEGIN");
101
+ static_assert(SEEK_CUR == FILE_CURRENT, "SEEK_CUR != FILE_CURRENT");
102
+ static_assert(SEEK_END == FILE_END, "SEEK_END != FILE_END");
103
+
104
+ LARGE_INTEGER li;
105
+ li.QuadPart = offset;
106
+ BOOL ret = SetFilePointerEx(fp_win32, li, NULL, whence);
107
+ if (!ret) {
108
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
109
+ }
110
+ }
111
+
112
+ void read_raw(void * ptr, size_t len) const {
113
+ size_t bytes_read = 0;
114
+ while (bytes_read < len) {
115
+ size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
116
+ DWORD chunk_read = 0;
117
+ BOOL result = ReadFile(fp_win32, reinterpret_cast<char*>(ptr) + bytes_read, chunk_size, &chunk_read, NULL);
118
+ if (!result) {
119
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
120
+ }
121
+ if (chunk_read < chunk_size || chunk_read == 0) {
122
+ throw std::runtime_error("unexpectedly reached end of file");
123
+ }
124
+
125
+ bytes_read += chunk_read;
126
+ }
127
+ }
128
+
129
+ uint32_t read_u32() const {
130
+ uint32_t val;
131
+ read_raw(&val, sizeof(val));
132
+ return val;
133
+ }
134
+
135
+ void write_raw(const void * ptr, size_t len) const {
136
+ size_t bytes_written = 0;
137
+ while (bytes_written < len) {
138
+ size_t chunk_size = std::min<size_t>(len - bytes_written, 64*1024*1024);
139
+ DWORD chunk_written = 0;
140
+ BOOL result = WriteFile(fp_win32, reinterpret_cast<char const*>(ptr) + bytes_written, chunk_size, &chunk_written, NULL);
141
+ if (!result) {
142
+ throw std::runtime_error(format("write error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
143
+ }
144
+ if (chunk_written < chunk_size || chunk_written == 0) {
145
+ throw std::runtime_error("unexpectedly failed to write bytes");
146
+ }
147
+
148
+ bytes_written += chunk_written;
149
+ }
150
+ }
151
+
152
+ void write_u32(uint32_t val) const {
153
+ write_raw(&val, sizeof(val));
154
+ }
155
+
156
+ ~impl() {
157
+ if (fp) {
158
+ std::fclose(fp);
159
+ }
160
+ }
161
+ #else
162
+ impl(const char * fname, const char * mode) {
163
+ fp = lm_ggml_fopen(fname, mode);
164
+ if (fp == NULL) {
165
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
166
+ }
167
+ seek(0, SEEK_END);
168
+ size = tell();
169
+ seek(0, SEEK_SET);
170
+ }
171
+
172
+ size_t tell() const {
173
+ // TODO: this ifdef is never true?
174
+ #ifdef _WIN32
175
+ __int64 ret = _ftelli64(fp);
176
+ #else
177
+ long ret = std::ftell(fp);
178
+ #endif
179
+ if (ret == -1) {
180
+ throw std::runtime_error(format("ftell error: %s", strerror(errno)));
181
+ }
182
+
183
+ return (size_t) ret;
184
+ }
185
+
186
+ void seek(size_t offset, int whence) const {
187
+ // TODO: this ifdef is never true?
188
+ #ifdef _WIN32
189
+ int ret = _fseeki64(fp, (__int64) offset, whence);
190
+ #else
191
+ int ret = std::fseek(fp, (long) offset, whence);
192
+ #endif
193
+ if (ret != 0) {
194
+ throw std::runtime_error(format("seek error: %s", strerror(errno)));
195
+ }
196
+ }
197
+
198
+ void read_raw(void * ptr, size_t len) const {
199
+ if (len == 0) {
200
+ return;
201
+ }
202
+ errno = 0;
203
+ std::size_t ret = std::fread(ptr, len, 1, fp);
204
+ if (ferror(fp)) {
205
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
206
+ }
207
+ if (ret != 1) {
208
+ throw std::runtime_error("unexpectedly reached end of file");
209
+ }
210
+ }
211
+
212
+ uint32_t read_u32() const {
213
+ uint32_t ret;
214
+ read_raw(&ret, sizeof(ret));
215
+ return ret;
216
+ }
217
+
218
+ void write_raw(const void * ptr, size_t len) const {
219
+ if (len == 0) {
220
+ return;
221
+ }
222
+ errno = 0;
223
+ size_t ret = std::fwrite(ptr, len, 1, fp);
224
+ if (ret != 1) {
225
+ throw std::runtime_error(format("write error: %s", strerror(errno)));
226
+ }
227
+ }
228
+
229
+ void write_u32(uint32_t val) const {
230
+ write_raw(&val, sizeof(val));
231
+ }
232
+
233
+ ~impl() {
234
+ if (fp) {
235
+ std::fclose(fp);
236
+ }
237
+ }
238
+ #endif
239
+
240
+ FILE * fp;
241
+ size_t size;
242
+ };
243
+
244
+ llama_file::llama_file(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
245
+ llama_file::~llama_file() = default;
246
+
247
+ size_t llama_file::tell() const { return pimpl->tell(); }
248
+ size_t llama_file::size() const { return pimpl->size; }
249
+
250
+ int llama_file::file_id() const {
251
+ #ifdef _WIN32
252
+ return _fileno(pimpl->fp);
253
+ #else
254
+ #if defined(fileno)
255
+ return fileno(pimpl->fp);
256
+ #else
257
+ return ::fileno(pimpl->fp);
258
+ #endif
259
+ #endif
260
+ }
261
+
262
+ void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
263
+ void llama_file::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); }
264
+
265
+ uint32_t llama_file::read_u32() const { return pimpl->read_u32(); }
266
+
267
+ void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
268
+ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
269
+
270
+ // llama_mmap
271
+
272
+ struct llama_mmap::impl {
273
+ #ifdef _POSIX_MAPPED_FILES
274
+ std::vector<std::pair<size_t, size_t>> mapped_fragments;
275
+
276
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
277
+ size = file->size();
278
+ int fd = file->file_id();
279
+ int flags = MAP_SHARED;
280
+ if (numa) { prefetch = 0; }
281
+ #ifdef __linux__
282
+ if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
283
+ LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
284
+ strerror(errno));
285
+ }
286
+ if (prefetch) { flags |= MAP_POPULATE; }
287
+ #endif
288
+ addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
289
+ if (addr == MAP_FAILED) {
290
+ throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
291
+ }
292
+
293
+ if (prefetch > 0) {
294
+ if (madvise(addr, std::min(file->size(), prefetch), MADV_WILLNEED)) {
295
+ fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
296
+ strerror(errno));
297
+ }
298
+ }
299
+ if (numa) {
300
+ if (madvise(addr, file->size(), MADV_RANDOM)) {
301
+ fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
302
+ strerror(errno));
303
+ }
304
+ }
305
+
306
+ mapped_fragments.emplace_back(0, file->size());
307
+ }
308
+
309
+ static void align_range(size_t * first, size_t * last, size_t page_size) {
310
+ size_t offset_in_page = *first & (page_size - 1);
311
+ size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
312
+ *first += offset_to_page;
313
+
314
+ *last = *last & ~(page_size - 1);
315
+
316
+ if (*last <= *first) {
317
+ *last = *first;
318
+ }
319
+ }
320
+
321
+ void unmap_fragment(size_t first, size_t last) {
322
+ int page_size = sysconf(_SC_PAGESIZE);
323
+ align_range(&first, &last, page_size);
324
+ size_t len = last - first;
325
+
326
+ if (len == 0) {
327
+ return;
328
+ }
329
+
330
+ LM_GGML_ASSERT(first % page_size == 0);
331
+ LM_GGML_ASSERT(last % page_size == 0);
332
+ LM_GGML_ASSERT(last > first);
333
+
334
+ void * next_page_start = (uint8_t *) addr + first;
335
+
336
+ if (munmap(next_page_start, len)) {
337
+ LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
338
+ }
339
+
340
+ std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
341
+ for (const auto & frag : mapped_fragments) {
342
+ if (frag.first < first && frag.second > last) {
343
+ new_mapped_fragments.emplace_back(frag.first, first);
344
+ new_mapped_fragments.emplace_back(last, frag.second);
345
+ } else if (frag.first < first && frag.second > first) {
346
+ new_mapped_fragments.emplace_back(frag.first, first);
347
+ } else if (frag.first < last && frag.second > last) {
348
+ new_mapped_fragments.emplace_back(last, frag.second);
349
+ } else if (frag.first >= first && frag.second <= last) {
350
+ } else {
351
+ new_mapped_fragments.push_back(frag);
352
+ }
353
+ }
354
+ mapped_fragments = std::move(new_mapped_fragments);
355
+ }
356
+
357
+ ~impl() {
358
+ for (const auto & frag : mapped_fragments) {
359
+ if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
360
+ LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
361
+ }
362
+ }
363
+ }
364
+ #elif defined(_WIN32)
365
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
366
+ LM_GGML_UNUSED(numa);
367
+
368
+ size = file->size();
369
+
370
+ HANDLE hFile = (HANDLE) _get_osfhandle(file->file_id());
371
+
372
+ HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
373
+
374
+ if (hMapping == NULL) {
375
+ DWORD error = GetLastError();
376
+ throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
377
+ }
378
+
379
+ addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
380
+ DWORD error = GetLastError();
381
+ CloseHandle(hMapping);
382
+
383
+ if (addr == NULL) {
384
+ throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
385
+ }
386
+
387
+ if (prefetch > 0) {
388
+ #if _WIN32_WINNT >= 0x602
389
+ BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
390
+ HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
391
+
392
+ pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory");
393
+
394
+ if (pPrefetchVirtualMemory) {
395
+ WIN32_MEMORY_RANGE_ENTRY range;
396
+ range.VirtualAddress = addr;
397
+ range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
398
+ if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
399
+ LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
400
+ llama_format_win_err(GetLastError()).c_str());
401
+ }
402
+ }
403
+ #else
404
+ throw std::runtime_error("PrefetchVirtualMemory unavailable");
405
+ #endif
406
+ }
407
+ }
408
+
409
+ void unmap_fragment(size_t first, size_t last) {
410
+ LM_GGML_UNUSED(first);
411
+ LM_GGML_UNUSED(last);
412
+ }
413
+
414
+ ~impl() {
415
+ if (!UnmapViewOfFile(addr)) {
416
+ LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
417
+ llama_format_win_err(GetLastError()).c_str());
418
+ }
419
+ }
420
+ #else
421
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
422
+ LM_GGML_UNUSED(file);
423
+ LM_GGML_UNUSED(prefetch);
424
+ LM_GGML_UNUSED(numa);
425
+
426
+ throw std::runtime_error("mmap not supported");
427
+ }
428
+
429
+ void unmap_fragment(size_t first, size_t last) {
430
+ LM_GGML_UNUSED(first);
431
+ LM_GGML_UNUSED(last);
432
+
433
+ throw std::runtime_error("mmap not supported");
434
+ }
435
+ #endif
436
+
437
+ void * addr;
438
+ size_t size;
439
+ };
440
+
441
+ llama_mmap::llama_mmap(struct llama_file * file, size_t prefetch, bool numa) : pimpl(std::make_unique<impl>(file, prefetch, numa)) {}
442
+ llama_mmap::~llama_mmap() = default;
443
+
444
+ size_t llama_mmap::size() const { return pimpl->size; }
445
+ void * llama_mmap::addr() const { return pimpl->addr; }
446
+
447
+ void llama_mmap::unmap_fragment(size_t first, size_t last) { pimpl->unmap_fragment(first, last); }
448
+
449
+ #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
450
+ const bool llama_mmap::SUPPORTED = true;
451
+ #else
452
+ const bool llama_mmap::SUPPORTED = false;
453
+ #endif
454
+
455
+ // llama_mlock
456
+
457
+ struct llama_mlock::impl {
458
+ #ifdef _POSIX_MEMLOCK_RANGE
459
+ static size_t lock_granularity() {
460
+ return (size_t) sysconf(_SC_PAGESIZE);
461
+ }
462
+
463
+ bool raw_lock(const void * addr, size_t size) const {
464
+ if (!mlock(addr, size)) {
465
+ return true;
466
+ }
467
+
468
+ #ifdef __APPLE__
469
+ #define MLOCK_SUGGESTION \
470
+ "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
471
+ "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MEMLOCK (ulimit -l).\n"
472
+ #else
473
+ #define MLOCK_SUGGESTION \
474
+ "Try increasing RLIMIT_MEMLOCK ('ulimit -l' as root).\n"
475
+ #endif
476
+
477
+ char* errmsg = std::strerror(errno);
478
+ bool suggest = (errno == ENOMEM);
479
+ #if defined(TARGET_OS_VISION) || defined(TARGET_OS_TV) || defined(_AIX)
480
+ // visionOS/tvOS dont't support RLIMIT_MEMLOCK
481
+ // Skip resource limit checks on visionOS/tvOS
482
+ suggest = false;
483
+ #else
484
+ struct rlimit lock_limit;
485
+ if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
486
+ suggest = false;
487
+ }
488
+ if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
489
+ suggest = false;
490
+ }
491
+ #endif
492
+
493
+ LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
494
+ size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
495
+ return false;
496
+ }
497
+
498
+ static void raw_unlock(void * addr, size_t size) {
499
+ if (munlock(addr, size)) {
500
+ LLAMA_LOG_WARN("warning: failed to munlock buffer: %s\n", std::strerror(errno));
501
+ }
502
+ }
503
+ #elif defined(_WIN32)
504
+ static size_t lock_granularity() {
505
+ SYSTEM_INFO si;
506
+ GetSystemInfo(&si);
507
+ return (size_t) si.dwPageSize;
508
+ }
509
+
510
+ bool raw_lock(void * ptr, size_t len) const {
511
+ for (int tries = 1; ; tries++) {
512
+ if (VirtualLock(ptr, len)) {
513
+ return true;
514
+ }
515
+ if (tries == 2) {
516
+ LLAMA_LOG_WARN("warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
517
+ len, size, llama_format_win_err(GetLastError()).c_str());
518
+ return false;
519
+ }
520
+
521
+ SIZE_T min_ws_size, max_ws_size;
522
+ if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
523
+ LLAMA_LOG_WARN("warning: GetProcessWorkingSetSize failed: %s\n",
524
+ llama_format_win_err(GetLastError()).c_str());
525
+ return false;
526
+ }
527
+ size_t increment = len + 1048576;
528
+ min_ws_size += increment;
529
+ max_ws_size += increment;
530
+ if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
531
+ LLAMA_LOG_WARN("warning: SetProcessWorkingSetSize failed: %s\n",
532
+ llama_format_win_err(GetLastError()).c_str());
533
+ return false;
534
+ }
535
+ }
536
+ }
537
+
538
+ static void raw_unlock(void * ptr, size_t len) {
539
+ if (!VirtualUnlock(ptr, len)) {
540
+ LLAMA_LOG_WARN("warning: failed to VirtualUnlock buffer: %s\n",
541
+ llama_format_win_err(GetLastError()).c_str());
542
+ }
543
+ }
544
+ #else
545
+ static size_t lock_granularity() {
546
+ return (size_t) 65536;
547
+ }
548
+
549
+ bool raw_lock(const void * addr, size_t len) const {
550
+ LLAMA_LOG_WARN("warning: mlock not supported on this system\n");
551
+ return false;
552
+ }
553
+
554
+ static void raw_unlock(const void * addr, size_t len) {}
555
+ #endif
556
+
557
+ impl() : addr(NULL), size(0), failed_already(false) {}
558
+
559
+ void init(void * ptr) {
560
+ LM_GGML_ASSERT(addr == NULL && size == 0);
561
+ addr = ptr;
562
+ }
563
+
564
+ void grow_to(size_t target_size) {
565
+ LM_GGML_ASSERT(addr);
566
+ if (failed_already) {
567
+ return;
568
+ }
569
+ size_t granularity = lock_granularity();
570
+ target_size = (target_size + granularity - 1) & ~(granularity - 1);
571
+ if (target_size > size) {
572
+ if (raw_lock((uint8_t *) addr + size, target_size - size)) {
573
+ size = target_size;
574
+ } else {
575
+ failed_already = true;
576
+ }
577
+ }
578
+ }
579
+
580
+ void * addr;
581
+ size_t size;
582
+
583
+ bool failed_already;
584
+ };
585
+
586
+ llama_mlock::llama_mlock() : pimpl(std::make_unique<impl>()) {}
587
+ llama_mlock::~llama_mlock() = default;
588
+
589
+ void llama_mlock::init(void * ptr) { pimpl->init(ptr); }
590
+ void llama_mlock::grow_to(size_t target_size) { pimpl->grow_to(target_size); }
591
+
592
+ #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
593
+ const bool llama_mlock::SUPPORTED = true;
594
+ #else
595
+ const bool llama_mlock::SUPPORTED = false;
596
+ #endif
597
+
598
+ size_t llama_path_max() {
599
+ return PATH_MAX;
600
+ }