llama_cpp 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -190,7 +190,7 @@
190
190
  #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
191
191
  #define GGML_FILE_VERSION 1
192
192
 
193
- #define GGML_QNT_VERSION 1 // bump this on quantization format changes
193
+ #define GGML_QNT_VERSION 2 // bump this on quantization format changes
194
194
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
195
195
 
196
196
  #define GGML_MAX_DIMS 4
@@ -249,6 +249,7 @@ extern "C" {
249
249
  enum ggml_backend {
250
250
  GGML_BACKEND_CPU = 0,
251
251
  GGML_BACKEND_CUDA = 1,
252
+ GGML_BACKEND_CL = 2,
252
253
  };
253
254
 
254
255
  // model file types
@@ -313,6 +314,7 @@ extern "C" {
313
314
  GGML_OP_ROPE,
314
315
  GGML_OP_ROPE_BACK,
315
316
  GGML_OP_ALIBI,
317
+ GGML_OP_CLAMP,
316
318
  GGML_OP_CONV_1D_1S,
317
319
  GGML_OP_CONV_1D_2S,
318
320
 
@@ -849,7 +851,7 @@ extern "C" {
849
851
  int n_past);
850
852
 
851
853
  // in-place, returns view(a)
852
- GGML_API struct ggml_tensor * gml_diag_mask_zero_inplace(
854
+ GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
853
855
  struct ggml_context * ctx,
854
856
  struct ggml_tensor * a,
855
857
  int n_past);
@@ -897,7 +899,16 @@ extern "C" {
897
899
  struct ggml_context * ctx,
898
900
  struct ggml_tensor * a,
899
901
  int n_past,
900
- int n_head);
902
+ int n_head,
903
+ float bias_max);
904
+
905
+ // clamp
906
+ // in-place, returns view(a)
907
+ struct ggml_tensor * ggml_clamp(
908
+ struct ggml_context * ctx,
909
+ struct ggml_tensor * a,
910
+ float min,
911
+ float max);
901
912
 
902
913
  // padding = 1
903
914
  // TODO: we don't support extra parameters for now
@@ -101,12 +101,12 @@ struct llama_file {
101
101
  LLAMA_ASSERT(ret == 0); // same
102
102
  }
103
103
 
104
- void read_raw(void * ptr, size_t size) {
105
- if (size == 0) {
104
+ void read_raw(void * ptr, size_t len) const {
105
+ if (len == 0) {
106
106
  return;
107
107
  }
108
108
  errno = 0;
109
- std::size_t ret = std::fread(ptr, size, 1, fp);
109
+ std::size_t ret = std::fread(ptr, len, 1, fp);
110
110
  if (ferror(fp)) {
111
111
  throw std::runtime_error(format("read error: %s", strerror(errno)));
112
112
  }
@@ -127,12 +127,12 @@ struct llama_file {
127
127
  return std::string(chars.data(), len);
128
128
  }
129
129
 
130
- void write_raw(const void * ptr, size_t size) {
131
- if (size == 0) {
130
+ void write_raw(const void * ptr, size_t len) const {
131
+ if (len == 0) {
132
132
  return;
133
133
  }
134
134
  errno = 0;
135
- size_t ret = std::fwrite(ptr, size, 1, fp);
135
+ size_t ret = std::fwrite(ptr, len, 1, fp);
136
136
  if (ret != 1) {
137
137
  throw std::runtime_error(format("write error: %s", strerror(errno)));
138
138
  }
@@ -172,7 +172,7 @@ struct llama_mmap {
172
172
  #ifdef _POSIX_MAPPED_FILES
173
173
  static constexpr bool SUPPORTED = true;
174
174
 
175
- llama_mmap(struct llama_file * file, bool prefetch = true) {
175
+ llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
176
176
  size = file->size;
177
177
  int fd = fileno(file->fp);
178
178
  int flags = MAP_SHARED;
@@ -184,9 +184,9 @@ struct llama_mmap {
184
184
  throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
185
185
  }
186
186
 
187
- if (prefetch) {
187
+ if (prefetch > 0) {
188
188
  // Advise the kernel to preload the mapped memory
189
- if (madvise(addr, file->size, MADV_WILLNEED)) {
189
+ if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
190
190
  fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
191
191
  strerror(errno));
192
192
  }
@@ -267,9 +267,9 @@ struct llama_mlock {
267
267
  }
268
268
  }
269
269
 
270
- void init(void * addr) {
271
- LLAMA_ASSERT(this->addr == NULL && this->size == 0);
272
- this->addr = addr;
270
+ void init(void * ptr) {
271
+ LLAMA_ASSERT(addr == NULL && size == 0);
272
+ addr = ptr;
273
273
  }
274
274
 
275
275
  void grow_to(size_t target_size) {
@@ -340,14 +340,14 @@ struct llama_mlock {
340
340
  return (size_t) si.dwPageSize;
341
341
  }
342
342
 
343
- bool raw_lock(void * addr, size_t size) {
343
+ bool raw_lock(void * ptr, size_t len) {
344
344
  for (int tries = 1; ; tries++) {
345
- if (VirtualLock(addr, size)) {
345
+ if (VirtualLock(ptr, len)) {
346
346
  return true;
347
347
  }
348
348
  if (tries == 2) {
349
349
  fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
350
- size, this->size, llama_format_win_err(GetLastError()).c_str());
350
+ len, size, llama_format_win_err(GetLastError()).c_str());
351
351
  return false;
352
352
  }
353
353
 
@@ -363,7 +363,7 @@ struct llama_mlock {
363
363
  // is equal to the number of pages in its minimum working set minus
364
364
  // a small overhead."
365
365
  // Hopefully a megabyte is enough overhead:
366
- size_t increment = size + 1048576;
366
+ size_t increment = len + 1048576;
367
367
  // The minimum must be <= the maximum, so we need to increase both:
368
368
  min_ws_size += increment;
369
369
  max_ws_size += increment;
@@ -375,8 +375,8 @@ struct llama_mlock {
375
375
  }
376
376
  }
377
377
 
378
- void raw_unlock(void * addr, size_t size) {
379
- if (!VirtualUnlock(addr, size)) {
378
+ void raw_unlock(void * ptr, size_t len) {
379
+ if (!VirtualUnlock(ptr, len)) {
380
380
  fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n",
381
381
  llama_format_win_err(GetLastError()).c_str());
382
382
  }
@@ -388,12 +388,12 @@ struct llama_mlock {
388
388
  return (size_t) 65536;
389
389
  }
390
390
 
391
- bool raw_lock(const void * addr, size_t size) {
391
+ bool raw_lock(const void * addr, size_t len) {
392
392
  fprintf(stderr, "warning: mlock not supported on this system\n");
393
393
  return false;
394
394
  }
395
395
 
396
- void raw_unlock(const void * addr, size_t size) {}
396
+ void raw_unlock(const void * addr, size_t len) {}
397
397
  #endif
398
398
  };
399
399
 
@@ -404,10 +404,10 @@ struct llama_buffer {
404
404
 
405
405
  llama_buffer() = default;
406
406
 
407
- void resize(size_t size) {
407
+ void resize(size_t len) {
408
408
  delete[] addr;
409
- addr = new uint8_t[size];
410
- this->size = size;
409
+ addr = new uint8_t[len];
410
+ size = len;
411
411
  }
412
412
 
413
413
  ~llama_buffer() {