llama_cpp 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,7 +77,7 @@ struct free_block {
77
77
  size_t size;
78
78
  };
79
79
 
80
- #define MAX_FREE_BLOCKS 128
80
+ #define MAX_FREE_BLOCKS 256
81
81
 
82
82
  struct ggml_allocr {
83
83
  void * data;
@@ -131,6 +131,10 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten
131
131
  return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size;
132
132
  }
133
133
 
134
+ static bool ggml_is_view(struct ggml_tensor * t) {
135
+ return t->view_src != NULL;
136
+ }
137
+
134
138
  void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
135
139
  #ifdef GGML_ALLOCATOR_DEBUG
136
140
  GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
@@ -183,6 +187,7 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
183
187
  }
184
188
 
185
189
  tensor->data = addr;
190
+ AT_PRINTF("%s: allocated data at %p\n", __func__, tensor->data);
186
191
 
187
192
  #ifdef GGML_ALLOCATOR_DEBUG
188
193
  add_allocated_tensor(alloc, tensor);
@@ -214,7 +219,8 @@ static void ggml_allocr_free_tensor(struct ggml_allocr * alloc, struct ggml_tens
214
219
 
215
220
  size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
216
221
  size = aligned_offset(NULL, size, alloc->alignment);
217
- AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
222
+ AT_PRINTF("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, ptr, size, alloc->n_free_blocks);
223
+ AT_PRINTF("%s: alloc->data = %p alloc->data+alloc->size = %p alloc->data+alloc->max_size = %p\n", __func__, alloc->data, (char*)alloc->data + alloc->size, (char*)alloc->data + alloc->max_size);
218
224
 
219
225
  #ifdef GGML_ALLOCATOR_DEBUG
220
226
  remove_allocated_tensor(alloc, tensor);
@@ -338,8 +344,8 @@ static void free_vmem(void * base_addr, size_t size) {
338
344
 
339
345
  // allocate uncommitted virtual memory to measure the size of the graph
340
346
  static void alloc_measure_vmem(void ** base_addr, size_t * size) {
341
- // 1TB for 64-bit, 1GB for 32-bit
342
- *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40;
347
+ // 128GB for 64-bit, 1GB for 32-bit
348
+ *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<37;
343
349
  do {
344
350
  *base_addr = alloc_vmem(*size);
345
351
  if (*base_addr != NULL) {
@@ -399,10 +405,6 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
399
405
 
400
406
  //////////// compute graph allocator
401
407
 
402
- static bool ggml_is_view(struct ggml_tensor * t) {
403
- return t->view_src != NULL;
404
- }
405
-
406
408
  static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
407
409
  if (a->type != b->type) {
408
410
  return false;
@@ -631,3 +633,7 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
631
633
  size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
632
634
  return ggml_allocr_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
633
635
  }
636
+
637
+ size_t ggml_allocr_max_size(struct ggml_allocr * alloc) {
638
+ return alloc->max_size;
639
+ }
@@ -19,6 +19,7 @@ GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
19
19
  GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
20
20
  GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
21
21
  GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
22
+ GGML_API size_t ggml_allocr_max_size(struct ggml_allocr * alloc);
22
23
 
23
24
 
24
25
  #ifdef __cplusplus