llama_cpp 0.10.4 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/ext/llama_cpp/extconf.rb +35 -110
  4. data/ext/llama_cpp/llama_cpp.cpp +52 -28
  5. data/lib/llama_cpp/version.rb +1 -1
  6. data/sig/llama_cpp.rbs +3 -1
  7. data/vendor/include/.gitkeep +0 -0
  8. data/vendor/lib/.gitkeep +0 -0
  9. data/vendor/tmp/llama.cpp/Makefile +758 -0
  10. data/vendor/tmp/llama.cpp/scripts/get-flags.mk +38 -0
  11. metadata +29 -26
  12. data/ext/llama_cpp/src/llama-util.h +0 -546
  13. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/LICENSE +0 -0
  14. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-alloc.c +0 -0
  15. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-alloc.h +0 -0
  16. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend-impl.h +0 -0
  17. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend.c +0 -0
  18. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-backend.h +0 -0
  19. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-cuda.cu +0 -0
  20. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-cuda.h +0 -0
  21. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-impl.h +0 -0
  22. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.h +0 -0
  23. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.m +0 -0
  24. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-metal.metal +0 -0
  25. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-mpi.c +0 -0
  26. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-mpi.h +0 -0
  27. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-opencl.cpp +0 -0
  28. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-opencl.h +0 -0
  29. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-quants.c +0 -0
  30. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml-quants.h +0 -0
  31. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml.c +0 -0
  32. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/ggml.h +0 -0
  33. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/llama.cpp +0 -0
  34. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/llama.h +0 -0
  35. /data/{ext/llama_cpp/src → vendor/tmp/llama.cpp}/unicode.h +0 -0
@@ -0,0 +1,38 @@
1
+ ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
2
+ GF_CC_IS_GCC = 1
3
+ GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
4
+ else
5
+ GF_CC_IS_CLANG = 1
6
+ ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
7
+ GF_CC_IS_LLVM_CLANG = 1
8
+ else
9
+ GF_CC_IS_APPLE_CLANG = 1
10
+ endif
11
+ GF_CC_VER := \
12
+ $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
13
+ | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
14
+ endif
15
+
16
+ ifeq ($(GF_CC_IS_CLANG), 1)
17
+ # clang options
18
+ GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
19
+ GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
20
+
21
+ ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
22
+ GF_CFLAGS += -Wdouble-promotion
23
+ endif
24
+ ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
25
+ GF_CFLAGS += -Wdouble-promotion
26
+ endif
27
+ else
28
+ # gcc options
29
+ GF_CFLAGS = -Wdouble-promotion
30
+ GF_CXXFLAGS = -Wno-array-bounds
31
+
32
+ ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
33
+ GF_CXXFLAGS += -Wno-format-truncation
34
+ endif
35
+ ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
36
+ GF_CXXFLAGS += -Wextra-semi
37
+ endif
38
+ endif
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.4
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-01-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -29,33 +29,36 @@ files:
29
29
  - ext/llama_cpp/extconf.rb
30
30
  - ext/llama_cpp/llama_cpp.cpp
31
31
  - ext/llama_cpp/llama_cpp.h
32
- - ext/llama_cpp/src/LICENSE
33
- - ext/llama_cpp/src/ggml-alloc.c
34
- - ext/llama_cpp/src/ggml-alloc.h
35
- - ext/llama_cpp/src/ggml-backend-impl.h
36
- - ext/llama_cpp/src/ggml-backend.c
37
- - ext/llama_cpp/src/ggml-backend.h
38
- - ext/llama_cpp/src/ggml-cuda.cu
39
- - ext/llama_cpp/src/ggml-cuda.h
40
- - ext/llama_cpp/src/ggml-impl.h
41
- - ext/llama_cpp/src/ggml-metal.h
42
- - ext/llama_cpp/src/ggml-metal.m
43
- - ext/llama_cpp/src/ggml-metal.metal
44
- - ext/llama_cpp/src/ggml-mpi.c
45
- - ext/llama_cpp/src/ggml-mpi.h
46
- - ext/llama_cpp/src/ggml-opencl.cpp
47
- - ext/llama_cpp/src/ggml-opencl.h
48
- - ext/llama_cpp/src/ggml-quants.c
49
- - ext/llama_cpp/src/ggml-quants.h
50
- - ext/llama_cpp/src/ggml.c
51
- - ext/llama_cpp/src/ggml.h
52
- - ext/llama_cpp/src/llama-util.h
53
- - ext/llama_cpp/src/llama.cpp
54
- - ext/llama_cpp/src/llama.h
55
- - ext/llama_cpp/src/unicode.h
56
32
  - lib/llama_cpp.rb
57
33
  - lib/llama_cpp/version.rb
58
34
  - sig/llama_cpp.rbs
35
+ - vendor/include/.gitkeep
36
+ - vendor/lib/.gitkeep
37
+ - vendor/tmp/llama.cpp/LICENSE
38
+ - vendor/tmp/llama.cpp/Makefile
39
+ - vendor/tmp/llama.cpp/ggml-alloc.c
40
+ - vendor/tmp/llama.cpp/ggml-alloc.h
41
+ - vendor/tmp/llama.cpp/ggml-backend-impl.h
42
+ - vendor/tmp/llama.cpp/ggml-backend.c
43
+ - vendor/tmp/llama.cpp/ggml-backend.h
44
+ - vendor/tmp/llama.cpp/ggml-cuda.cu
45
+ - vendor/tmp/llama.cpp/ggml-cuda.h
46
+ - vendor/tmp/llama.cpp/ggml-impl.h
47
+ - vendor/tmp/llama.cpp/ggml-metal.h
48
+ - vendor/tmp/llama.cpp/ggml-metal.m
49
+ - vendor/tmp/llama.cpp/ggml-metal.metal
50
+ - vendor/tmp/llama.cpp/ggml-mpi.c
51
+ - vendor/tmp/llama.cpp/ggml-mpi.h
52
+ - vendor/tmp/llama.cpp/ggml-opencl.cpp
53
+ - vendor/tmp/llama.cpp/ggml-opencl.h
54
+ - vendor/tmp/llama.cpp/ggml-quants.c
55
+ - vendor/tmp/llama.cpp/ggml-quants.h
56
+ - vendor/tmp/llama.cpp/ggml.c
57
+ - vendor/tmp/llama.cpp/ggml.h
58
+ - vendor/tmp/llama.cpp/llama.cpp
59
+ - vendor/tmp/llama.cpp/llama.h
60
+ - vendor/tmp/llama.cpp/scripts/get-flags.mk
61
+ - vendor/tmp/llama.cpp/unicode.h
59
62
  homepage: https://github.com/yoshoku/llama_cpp.rb
60
63
  licenses:
61
64
  - MIT
@@ -1,546 +0,0 @@
1
- // Internal header to be included only by llama.cpp.
2
- // Contains wrappers around OS interfaces.
3
-
4
- #ifndef LLAMA_UTIL_H
5
- #define LLAMA_UTIL_H
6
-
7
- #include <cstdio>
8
- #include <cstdint>
9
- #include <cerrno>
10
- #include <cstring>
11
- #include <cstdarg>
12
- #include <cstdlib>
13
- #include <climits>
14
-
15
- #include <string>
16
- #include <vector>
17
- #include <stdexcept>
18
-
19
- #ifdef __has_include
20
- #if __has_include(<unistd.h>)
21
- #include <unistd.h>
22
- #if defined(_POSIX_MAPPED_FILES)
23
- #include <sys/mman.h>
24
- #endif
25
- #if defined(_POSIX_MEMLOCK_RANGE)
26
- #include <sys/resource.h>
27
- #endif
28
- #endif
29
- #endif
30
-
31
- #if defined(_WIN32)
32
- #define WIN32_LEAN_AND_MEAN
33
- #ifndef NOMINMAX
34
- #define NOMINMAX
35
- #endif
36
- #include <windows.h>
37
- #include <io.h>
38
- #include <stdio.h> // for _fseeki64
39
- #endif
40
-
41
- #define LLAMA_ASSERT(x) \
42
- do { \
43
- if (!(x)) { \
44
- fprintf(stderr, "LLAMA_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
45
- abort(); \
46
- } \
47
- } while (0)
48
-
49
- #ifdef __GNUC__
50
- #ifdef __MINGW32__
51
- __attribute__((format(gnu_printf, 1, 2)))
52
- #else
53
- __attribute__((format(printf, 1, 2)))
54
- #endif
55
- #endif
56
- static std::string format(const char * fmt, ...) {
57
- va_list ap, ap2;
58
- va_start(ap, fmt);
59
- va_copy(ap2, ap);
60
- int size = vsnprintf(NULL, 0, fmt, ap);
61
- LLAMA_ASSERT(size >= 0 && size < INT_MAX);
62
- std::vector<char> buf(size + 1);
63
- int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
64
- LLAMA_ASSERT(size2 == size);
65
- va_end(ap2);
66
- va_end(ap);
67
- return std::string(buf.data(), size);
68
- }
69
-
70
- struct llama_file {
71
- // use FILE * so we don't have to re-open the file to mmap
72
- FILE * fp;
73
- size_t size;
74
-
75
- llama_file(const char * fname, const char * mode) {
76
- fp = std::fopen(fname, mode);
77
- if (fp == NULL) {
78
- throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
79
- }
80
- seek(0, SEEK_END);
81
- size = tell();
82
- seek(0, SEEK_SET);
83
- }
84
-
85
- size_t tell() const {
86
- #ifdef _WIN32
87
- __int64 ret = _ftelli64(fp);
88
- #else
89
- long ret = std::ftell(fp);
90
- #endif
91
- LLAMA_ASSERT(ret != -1); // this really shouldn't fail
92
- return (size_t) ret;
93
- }
94
-
95
- void seek(size_t offset, int whence) {
96
- #ifdef _WIN32
97
- int ret = _fseeki64(fp, (__int64) offset, whence);
98
- #else
99
- int ret = std::fseek(fp, (long) offset, whence);
100
- #endif
101
- LLAMA_ASSERT(ret == 0); // same
102
- }
103
-
104
- void read_raw(void * ptr, size_t len) const {
105
- if (len == 0) {
106
- return;
107
- }
108
- errno = 0;
109
- std::size_t ret = std::fread(ptr, len, 1, fp);
110
- if (ferror(fp)) {
111
- throw std::runtime_error(format("read error: %s", strerror(errno)));
112
- }
113
- if (ret != 1) {
114
- throw std::runtime_error(std::string("unexpectedly reached end of file"));
115
- }
116
- }
117
-
118
- std::uint32_t read_u32() {
119
- std::uint32_t ret;
120
- read_raw(&ret, sizeof(ret));
121
- return ret;
122
- }
123
-
124
- std::string read_string(std::uint32_t len) {
125
- std::vector<char> chars(len);
126
- read_raw(chars.data(), len);
127
- return std::string(chars.data(), len);
128
- }
129
-
130
- void write_raw(const void * ptr, size_t len) const {
131
- if (len == 0) {
132
- return;
133
- }
134
- errno = 0;
135
- size_t ret = std::fwrite(ptr, len, 1, fp);
136
- if (ret != 1) {
137
- throw std::runtime_error(format("write error: %s", strerror(errno)));
138
- }
139
- }
140
-
141
- void write_u32(std::uint32_t val) {
142
- write_raw(&val, sizeof(val));
143
- }
144
-
145
- ~llama_file() {
146
- if (fp) {
147
- std::fclose(fp);
148
- }
149
- }
150
- };
151
-
152
- // llama_context_data
153
- struct llama_data_context {
154
- virtual void write(const void * src, size_t size) = 0;
155
- virtual size_t get_size_written() = 0;
156
- virtual ~llama_data_context() = default;
157
- };
158
-
159
- struct llama_data_buffer_context : llama_data_context {
160
- uint8_t* ptr;
161
- size_t size_written = 0;
162
-
163
- llama_data_buffer_context(uint8_t * p) : ptr(p) {}
164
-
165
- void write(const void * src, size_t size) override {
166
- memcpy(ptr, src, size);
167
- ptr += size;
168
- size_written += size;
169
- }
170
-
171
- size_t get_size_written() override {
172
- return size_written;
173
- }
174
- };
175
-
176
- struct llama_data_file_context : llama_data_context {
177
- llama_file* file;
178
- size_t size_written = 0;
179
-
180
- llama_data_file_context(llama_file * f) : file(f) {}
181
-
182
- void write(const void * src, size_t size) override {
183
- file->write_raw(src, size);
184
- size_written += size;
185
- }
186
-
187
- size_t get_size_written() override {
188
- return size_written;
189
- }
190
- };
191
-
192
- #if defined(_WIN32)
193
- static std::string llama_format_win_err(DWORD err) {
194
- LPSTR buf;
195
- size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
196
- NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
197
- if (!size) {
198
- return "FormatMessageA failed";
199
- }
200
- std::string ret(buf, size);
201
- LocalFree(buf);
202
- return ret;
203
- }
204
- #endif
205
-
206
- struct llama_mmap {
207
- void * addr;
208
- size_t size;
209
-
210
- llama_mmap(const llama_mmap &) = delete;
211
-
212
- #ifdef _POSIX_MAPPED_FILES
213
- static constexpr bool SUPPORTED = true;
214
-
215
- llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
216
- size = file->size;
217
- int fd = fileno(file->fp);
218
- int flags = MAP_SHARED;
219
- // prefetch/readahead impairs performance on NUMA systems
220
- if (numa) { prefetch = 0; }
221
- #ifdef __linux__
222
- if (prefetch >= file->size) { flags |= MAP_POPULATE; }
223
- #endif
224
- addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
225
- if (addr == MAP_FAILED) {
226
- throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
227
- }
228
-
229
- if (prefetch > 0) {
230
- // Advise the kernel to preload the mapped memory
231
- if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
232
- fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
233
- strerror(errno));
234
- }
235
- }
236
- if (numa) {
237
- // advise the kernel not to use readahead
238
- // (because the next page might not belong on the same node)
239
- if (madvise(addr, file->size, MADV_RANDOM)) {
240
- fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
241
- strerror(errno));
242
- }
243
- }
244
- }
245
-
246
- ~llama_mmap() {
247
- munmap(addr, size);
248
- }
249
- #elif defined(_WIN32)
250
- static constexpr bool SUPPORTED = true;
251
-
252
- llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
253
- (void) numa;
254
-
255
- size = file->size;
256
-
257
- HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
258
-
259
- HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
260
- DWORD error = GetLastError();
261
-
262
- if (hMapping == NULL) {
263
- throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
264
- }
265
-
266
- addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
267
- error = GetLastError();
268
- CloseHandle(hMapping);
269
-
270
- if (addr == NULL) {
271
- throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
272
- }
273
-
274
- #if _WIN32_WINNT >= _WIN32_WINNT_WIN8
275
- if (prefetch) {
276
- // Advise the kernel to preload the mapped memory
277
-
278
- WIN32_MEMORY_RANGE_ENTRY range;
279
- range.VirtualAddress = addr;
280
-
281
- range.NumberOfBytes = (SIZE_T)size;
282
- if (!PrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
283
- fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
284
- llama_format_win_err(GetLastError()).c_str());
285
- }
286
- }
287
- #else
288
- #pragma message("warning: You are building for pre-Windows 8; prefetch not supported")
289
- #endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8
290
- }
291
-
292
- ~llama_mmap() {
293
- if (!UnmapViewOfFile(addr)) {
294
- fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
295
- llama_format_win_err(GetLastError()).c_str());
296
- }
297
- }
298
- #else
299
- static constexpr bool SUPPORTED = false;
300
-
301
- llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
302
- (void) prefetch;
303
- (void) numa;
304
-
305
- throw std::runtime_error(std::string("mmap not supported"));
306
- }
307
- #endif
308
- };
309
-
310
- // Represents some region of memory being locked using mlock or VirtualLock;
311
- // will automatically unlock on destruction.
312
- struct llama_mlock {
313
- void * addr = NULL;
314
- size_t size = 0;
315
- bool failed_already = false;
316
-
317
- llama_mlock() {}
318
- llama_mlock(const llama_mlock &) = delete;
319
-
320
- ~llama_mlock() {
321
- if (size) {
322
- raw_unlock(addr, size);
323
- }
324
- }
325
-
326
- void init(void * ptr) {
327
- LLAMA_ASSERT(addr == NULL && size == 0);
328
- addr = ptr;
329
- }
330
-
331
- void grow_to(size_t target_size) {
332
- LLAMA_ASSERT(addr);
333
- if (failed_already) {
334
- return;
335
- }
336
- size_t granularity = lock_granularity();
337
- target_size = (target_size + granularity - 1) & ~(granularity - 1);
338
- if (target_size > size) {
339
- if (raw_lock((uint8_t *) addr + size, target_size - size)) {
340
- size = target_size;
341
- } else {
342
- failed_already = true;
343
- }
344
- }
345
- }
346
-
347
- #ifdef _POSIX_MEMLOCK_RANGE
348
- static constexpr bool SUPPORTED = true;
349
-
350
- size_t lock_granularity() {
351
- return (size_t) sysconf(_SC_PAGESIZE);
352
- }
353
-
354
- #ifdef __APPLE__
355
- #define MLOCK_SUGGESTION \
356
- "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
357
- "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
358
- #else
359
- #define MLOCK_SUGGESTION \
360
- "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
361
- #endif
362
-
363
- bool raw_lock(const void * addr, size_t size) {
364
- if (!mlock(addr, size)) {
365
- return true;
366
- } else {
367
- char* errmsg = std::strerror(errno);
368
- bool suggest = (errno == ENOMEM);
369
-
370
- // Check if the resource limit is fine after all
371
- struct rlimit lock_limit;
372
- if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit))
373
- suggest = false;
374
- if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size))
375
- suggest = false;
376
-
377
- fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
378
- size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
379
- return false;
380
- }
381
- }
382
-
383
- #undef MLOCK_SUGGESTION
384
-
385
- void raw_unlock(void * addr, size_t size) {
386
- if (munlock(addr, size)) {
387
- fprintf(stderr, "warning: failed to munlock buffer: %s\n", std::strerror(errno));
388
- }
389
- }
390
- #elif defined(_WIN32)
391
- static constexpr bool SUPPORTED = true;
392
-
393
- size_t lock_granularity() {
394
- SYSTEM_INFO si;
395
- GetSystemInfo(&si);
396
- return (size_t) si.dwPageSize;
397
- }
398
-
399
- bool raw_lock(void * ptr, size_t len) {
400
- for (int tries = 1; ; tries++) {
401
- if (VirtualLock(ptr, len)) {
402
- return true;
403
- }
404
- if (tries == 2) {
405
- fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
406
- len, size, llama_format_win_err(GetLastError()).c_str());
407
- return false;
408
- }
409
-
410
- // It failed but this was only the first try; increase the working
411
- // set size and try again.
412
- SIZE_T min_ws_size, max_ws_size;
413
- if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
414
- fprintf(stderr, "warning: GetProcessWorkingSetSize failed: %s\n",
415
- llama_format_win_err(GetLastError()).c_str());
416
- return false;
417
- }
418
- // Per MSDN: "The maximum number of pages that a process can lock
419
- // is equal to the number of pages in its minimum working set minus
420
- // a small overhead."
421
- // Hopefully a megabyte is enough overhead:
422
- size_t increment = len + 1048576;
423
- // The minimum must be <= the maximum, so we need to increase both:
424
- min_ws_size += increment;
425
- max_ws_size += increment;
426
- if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
427
- fprintf(stderr, "warning: SetProcessWorkingSetSize failed: %s\n",
428
- llama_format_win_err(GetLastError()).c_str());
429
- return false;
430
- }
431
- }
432
- }
433
-
434
- void raw_unlock(void * ptr, size_t len) {
435
- if (!VirtualUnlock(ptr, len)) {
436
- fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n",
437
- llama_format_win_err(GetLastError()).c_str());
438
- }
439
- }
440
- #else
441
- static constexpr bool SUPPORTED = false;
442
-
443
- size_t lock_granularity() {
444
- return (size_t) 65536;
445
- }
446
-
447
- bool raw_lock(const void * addr, size_t len) {
448
- fprintf(stderr, "warning: mlock not supported on this system\n");
449
- return false;
450
- }
451
-
452
- void raw_unlock(const void * addr, size_t len) {}
453
- #endif
454
- };
455
-
456
- // Replacement for std::vector<uint8_t> that doesn't require zero-initialization.
457
- struct llama_buffer {
458
- uint8_t * addr = NULL;
459
- size_t size = 0;
460
-
461
- llama_buffer() = default;
462
-
463
- void resize(size_t len) {
464
- #ifdef GGML_USE_METAL
465
- free(addr);
466
- int result = posix_memalign((void **) &addr, getpagesize(), len);
467
- if (result == 0) {
468
- memset(addr, 0, len);
469
- }
470
- else {
471
- addr = NULL;
472
- }
473
- #else
474
- delete[] addr;
475
- addr = new uint8_t[len];
476
- #endif
477
- size = len;
478
- }
479
-
480
- ~llama_buffer() {
481
- #ifdef GGML_USE_METAL
482
- free(addr);
483
- #else
484
- delete[] addr;
485
- #endif
486
- addr = NULL;
487
- }
488
-
489
- // disable copy and move
490
- llama_buffer(const llama_buffer&) = delete;
491
- llama_buffer(llama_buffer&&) = delete;
492
- llama_buffer& operator=(const llama_buffer&) = delete;
493
- llama_buffer& operator=(llama_buffer&&) = delete;
494
- };
495
-
496
- #ifdef GGML_USE_CUBLAS
497
- #include "ggml-cuda.h"
498
- struct llama_ctx_buffer {
499
- uint8_t * addr = NULL;
500
- bool is_cuda;
501
- size_t size = 0;
502
-
503
- llama_ctx_buffer() = default;
504
-
505
- void resize(size_t size) {
506
- free();
507
-
508
- addr = (uint8_t *) ggml_cuda_host_malloc(size);
509
- if (addr) {
510
- is_cuda = true;
511
- }
512
- else {
513
- // fall back to pageable memory
514
- addr = new uint8_t[size];
515
- is_cuda = false;
516
- }
517
- this->size = size;
518
- }
519
-
520
- void free() {
521
- if (addr) {
522
- if (is_cuda) {
523
- ggml_cuda_host_free(addr);
524
- }
525
- else {
526
- delete[] addr;
527
- }
528
- }
529
- addr = NULL;
530
- }
531
-
532
- ~llama_ctx_buffer() {
533
- free();
534
- }
535
-
536
- // disable copy and move
537
- llama_ctx_buffer(const llama_ctx_buffer&) = delete;
538
- llama_ctx_buffer(llama_ctx_buffer&&) = delete;
539
- llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
540
- llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
541
- };
542
- #else
543
- typedef llama_buffer llama_ctx_buffer;
544
- #endif
545
-
546
- #endif