llama_cpp 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@
14
14
 
15
15
  #include <string>
16
16
  #include <vector>
17
+ #include <stdexcept>
17
18
 
18
19
  #ifdef __has_include
19
20
  #if __has_include(<unistd.h>)
@@ -21,6 +22,9 @@
21
22
  #if defined(_POSIX_MAPPED_FILES)
22
23
  #include <sys/mman.h>
23
24
  #endif
25
+ #if defined(_POSIX_MEMLOCK_RANGE)
26
+ #include <sys/resource.h>
27
+ #endif
24
28
  #endif
25
29
  #endif
26
30
 
@@ -71,7 +75,7 @@ struct llama_file {
71
75
  llama_file(const char * fname, const char * mode) {
72
76
  fp = std::fopen(fname, mode);
73
77
  if (fp == NULL) {
74
- throw format("failed to open %s: %s", fname, std::strerror(errno));
78
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
75
79
  }
76
80
  seek(0, SEEK_END);
77
81
  size = tell();
@@ -104,10 +108,10 @@ struct llama_file {
104
108
  errno = 0;
105
109
  std::size_t ret = std::fread(ptr, size, 1, fp);
106
110
  if (ferror(fp)) {
107
- throw format("read error: %s", strerror(errno));
111
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
108
112
  }
109
113
  if (ret != 1) {
110
- throw std::string("unexpectedly reached end of file");
114
+ throw std::runtime_error(std::string("unexpectedly reached end of file"));
111
115
  }
112
116
  }
113
117
 
@@ -130,7 +134,7 @@ struct llama_file {
130
134
  errno = 0;
131
135
  size_t ret = std::fwrite(ptr, size, 1, fp);
132
136
  if (ret != 1) {
133
- throw format("write error: %s", strerror(errno));
137
+ throw std::runtime_error(format("write error: %s", strerror(errno)));
134
138
  }
135
139
  }
136
140
 
@@ -177,7 +181,7 @@ struct llama_mmap {
177
181
  #endif
178
182
  addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
179
183
  if (addr == MAP_FAILED) {
180
- throw format("mmap failed: %s", strerror(errno));
184
+ throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
181
185
  }
182
186
 
183
187
  if (prefetch) {
@@ -204,7 +208,7 @@ struct llama_mmap {
204
208
  DWORD error = GetLastError();
205
209
 
206
210
  if (hMapping == NULL) {
207
- throw format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str());
211
+ throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
208
212
  }
209
213
 
210
214
  addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
@@ -212,7 +216,7 @@ struct llama_mmap {
212
216
  CloseHandle(hMapping);
213
217
 
214
218
  if (addr == NULL) {
215
- throw format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str());
219
+ throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
216
220
  }
217
221
 
218
222
  #if _WIN32_WINNT >= _WIN32_WINNT_WIN8
@@ -240,8 +244,9 @@ struct llama_mmap {
240
244
  #else
241
245
  static constexpr bool SUPPORTED = false;
242
246
 
243
- llama_mmap(struct llama_file *) {
244
- throw std::string("mmap not supported");
247
+ llama_mmap(struct llama_file *, bool prefetch = true) {
248
+ (void)prefetch;
249
+ throw std::runtime_error(std::string("mmap not supported"));
245
250
  }
246
251
  #endif
247
252
  };
@@ -303,8 +308,18 @@ struct llama_mlock {
303
308
  if (!mlock(addr, size)) {
304
309
  return true;
305
310
  } else {
306
- fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n" MLOCK_SUGGESTION,
307
- size, this->size, std::strerror(errno));
311
+ char* errmsg = std::strerror(errno);
312
+ bool suggest = (errno == ENOMEM);
313
+
314
+ // Check if the resource limit is fine after all
315
+ struct rlimit lock_limit;
316
+ if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit))
317
+ suggest = false;
318
+ if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size))
319
+ suggest = false;
320
+
321
+ fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
322
+ size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
308
323
  return false;
309
324
  }
310
325
  }
@@ -369,8 +384,13 @@ struct llama_mlock {
369
384
  #else
370
385
  static constexpr bool SUPPORTED = false;
371
386
 
372
- void raw_lock(const void * addr, size_t size) {
387
+ size_t lock_granularity() {
388
+ return (size_t) 65536;
389
+ }
390
+
391
+ bool raw_lock(const void * addr, size_t size) {
373
392
  fprintf(stderr, "warning: mlock not supported on this system\n");
393
+ return false;
374
394
  }
375
395
 
376
396
  void raw_unlock(const void * addr, size_t size) {}
@@ -382,6 +402,8 @@ struct llama_buffer {
382
402
  uint8_t * addr = NULL;
383
403
  size_t size = 0;
384
404
 
405
+ llama_buffer() = default;
406
+
385
407
  void resize(size_t size) {
386
408
  delete[] addr;
387
409
  addr = new uint8_t[size];
@@ -391,5 +413,62 @@ struct llama_buffer {
391
413
  ~llama_buffer() {
392
414
  delete[] addr;
393
415
  }
416
+
417
+ // disable copy and move
418
+ llama_buffer(const llama_buffer&) = delete;
419
+ llama_buffer(llama_buffer&&) = delete;
420
+ llama_buffer& operator=(const llama_buffer&) = delete;
421
+ llama_buffer& operator=(llama_buffer&&) = delete;
422
+ };
423
+
424
+ #ifdef GGML_USE_CUBLAS
425
+ #include "ggml-cuda.h"
426
+ struct llama_ctx_buffer {
427
+ uint8_t * addr = NULL;
428
+ bool is_cuda;
429
+ size_t size = 0;
430
+
431
+ llama_ctx_buffer() = default;
432
+
433
+ void resize(size_t size) {
434
+ free();
435
+
436
+ addr = (uint8_t *) ggml_cuda_host_malloc(size);
437
+ if (addr) {
438
+ is_cuda = true;
439
+ }
440
+ else {
441
+ // fall back to pageable memory
442
+ addr = new uint8_t[size];
443
+ is_cuda = false;
444
+ }
445
+ this->size = size;
446
+ }
447
+
448
+ void free() {
449
+ if (addr) {
450
+ if (is_cuda) {
451
+ ggml_cuda_host_free(addr);
452
+ }
453
+ else {
454
+ delete[] addr;
455
+ }
456
+ }
457
+ addr = NULL;
458
+ }
459
+
460
+ ~llama_ctx_buffer() {
461
+ free();
462
+ }
463
+
464
+ // disable copy and move
465
+ llama_ctx_buffer(const llama_ctx_buffer&) = delete;
466
+ llama_ctx_buffer(llama_ctx_buffer&&) = delete;
467
+ llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
468
+ llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
394
469
  };
470
+ #else
471
+ typedef llama_buffer llama_ctx_buffer;
472
+ #endif
473
+
395
474
  #endif