llama_cpp 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,6 +14,7 @@
14
14
 
15
15
  #include <string>
16
16
  #include <vector>
17
+ #include <stdexcept>
17
18
 
18
19
  #ifdef __has_include
19
20
  #if __has_include(<unistd.h>)
@@ -21,6 +22,9 @@
21
22
  #if defined(_POSIX_MAPPED_FILES)
22
23
  #include <sys/mman.h>
23
24
  #endif
25
+ #if defined(_POSIX_MEMLOCK_RANGE)
26
+ #include <sys/resource.h>
27
+ #endif
24
28
  #endif
25
29
  #endif
26
30
 
@@ -71,7 +75,7 @@ struct llama_file {
71
75
  llama_file(const char * fname, const char * mode) {
72
76
  fp = std::fopen(fname, mode);
73
77
  if (fp == NULL) {
74
- throw format("failed to open %s: %s", fname, std::strerror(errno));
78
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
75
79
  }
76
80
  seek(0, SEEK_END);
77
81
  size = tell();
@@ -104,10 +108,10 @@ struct llama_file {
104
108
  errno = 0;
105
109
  std::size_t ret = std::fread(ptr, size, 1, fp);
106
110
  if (ferror(fp)) {
107
- throw format("read error: %s", strerror(errno));
111
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
108
112
  }
109
113
  if (ret != 1) {
110
- throw std::string("unexpectedly reached end of file");
114
+ throw std::runtime_error(std::string("unexpectedly reached end of file"));
111
115
  }
112
116
  }
113
117
 
@@ -130,7 +134,7 @@ struct llama_file {
130
134
  errno = 0;
131
135
  size_t ret = std::fwrite(ptr, size, 1, fp);
132
136
  if (ret != 1) {
133
- throw format("write error: %s", strerror(errno));
137
+ throw std::runtime_error(format("write error: %s", strerror(errno)));
134
138
  }
135
139
  }
136
140
 
@@ -177,7 +181,7 @@ struct llama_mmap {
177
181
  #endif
178
182
  addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
179
183
  if (addr == MAP_FAILED) {
180
- throw format("mmap failed: %s", strerror(errno));
184
+ throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
181
185
  }
182
186
 
183
187
  if (prefetch) {
@@ -204,7 +208,7 @@ struct llama_mmap {
204
208
  DWORD error = GetLastError();
205
209
 
206
210
  if (hMapping == NULL) {
207
- throw format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str());
211
+ throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
208
212
  }
209
213
 
210
214
  addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
@@ -212,7 +216,7 @@ struct llama_mmap {
212
216
  CloseHandle(hMapping);
213
217
 
214
218
  if (addr == NULL) {
215
- throw format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str());
219
+ throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
216
220
  }
217
221
 
218
222
  #if _WIN32_WINNT >= _WIN32_WINNT_WIN8
@@ -240,8 +244,9 @@ struct llama_mmap {
240
244
  #else
241
245
  static constexpr bool SUPPORTED = false;
242
246
 
243
- llama_mmap(struct llama_file *) {
244
- throw std::string("mmap not supported");
247
+ llama_mmap(struct llama_file *, bool prefetch = true) {
248
+ (void)prefetch;
249
+ throw std::runtime_error(std::string("mmap not supported"));
245
250
  }
246
251
  #endif
247
252
  };
@@ -303,8 +308,18 @@ struct llama_mlock {
303
308
  if (!mlock(addr, size)) {
304
309
  return true;
305
310
  } else {
306
- fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n" MLOCK_SUGGESTION,
307
- size, this->size, std::strerror(errno));
311
+ char* errmsg = std::strerror(errno);
312
+ bool suggest = (errno == ENOMEM);
313
+
314
+ // Check if the resource limit is fine after all
315
+ struct rlimit lock_limit;
316
+ if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit))
317
+ suggest = false;
318
+ if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size))
319
+ suggest = false;
320
+
321
+ fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
322
+ size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
308
323
  return false;
309
324
  }
310
325
  }
@@ -369,8 +384,13 @@ struct llama_mlock {
369
384
  #else
370
385
  static constexpr bool SUPPORTED = false;
371
386
 
372
- void raw_lock(const void * addr, size_t size) {
387
+ size_t lock_granularity() {
388
+ return (size_t) 65536;
389
+ }
390
+
391
+ bool raw_lock(const void * addr, size_t size) {
373
392
  fprintf(stderr, "warning: mlock not supported on this system\n");
393
+ return false;
374
394
  }
375
395
 
376
396
  void raw_unlock(const void * addr, size_t size) {}
@@ -382,6 +402,8 @@ struct llama_buffer {
382
402
  uint8_t * addr = NULL;
383
403
  size_t size = 0;
384
404
 
405
+ llama_buffer() = default;
406
+
385
407
  void resize(size_t size) {
386
408
  delete[] addr;
387
409
  addr = new uint8_t[size];
@@ -391,5 +413,62 @@ struct llama_buffer {
391
413
  ~llama_buffer() {
392
414
  delete[] addr;
393
415
  }
416
+
417
+ // disable copy and move
418
+ llama_buffer(const llama_buffer&) = delete;
419
+ llama_buffer(llama_buffer&&) = delete;
420
+ llama_buffer& operator=(const llama_buffer&) = delete;
421
+ llama_buffer& operator=(llama_buffer&&) = delete;
422
+ };
423
+
424
+ #ifdef GGML_USE_CUBLAS
425
+ #include "ggml-cuda.h"
426
+ struct llama_ctx_buffer {
427
+ uint8_t * addr = NULL;
428
+ bool is_cuda;
429
+ size_t size = 0;
430
+
431
+ llama_ctx_buffer() = default;
432
+
433
+ void resize(size_t size) {
434
+ free();
435
+
436
+ addr = (uint8_t *) ggml_cuda_host_malloc(size);
437
+ if (addr) {
438
+ is_cuda = true;
439
+ }
440
+ else {
441
+ // fall back to pageable memory
442
+ addr = new uint8_t[size];
443
+ is_cuda = false;
444
+ }
445
+ this->size = size;
446
+ }
447
+
448
+ void free() {
449
+ if (addr) {
450
+ if (is_cuda) {
451
+ ggml_cuda_host_free(addr);
452
+ }
453
+ else {
454
+ delete[] addr;
455
+ }
456
+ }
457
+ addr = NULL;
458
+ }
459
+
460
+ ~llama_ctx_buffer() {
461
+ free();
462
+ }
463
+
464
+ // disable copy and move
465
+ llama_ctx_buffer(const llama_ctx_buffer&) = delete;
466
+ llama_ctx_buffer(llama_ctx_buffer&&) = delete;
467
+ llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
468
+ llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
394
469
  };
470
+ #else
471
+ typedef llama_buffer llama_ctx_buffer;
472
+ #endif
473
+
395
474
  #endif