llama_cpp 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -77,7 +77,7 @@ struct free_block {
77
77
  size_t size;
78
78
  };
79
79
 
80
- #define MAX_FREE_BLOCKS 128
80
+ #define MAX_FREE_BLOCKS 256
81
81
 
82
82
  struct ggml_allocr {
83
83
  void * data;
@@ -131,6 +131,10 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten
131
131
  return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size;
132
132
  }
133
133
 
134
+ static bool ggml_is_view(struct ggml_tensor * t) {
135
+ return t->view_src != NULL;
136
+ }
137
+
134
138
  void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
135
139
  #ifdef GGML_ALLOCATOR_DEBUG
136
140
  GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
@@ -183,6 +187,7 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
183
187
  }
184
188
 
185
189
  tensor->data = addr;
190
+ AT_PRINTF("%s: allocated data at %p\n", __func__, tensor->data);
186
191
 
187
192
  #ifdef GGML_ALLOCATOR_DEBUG
188
193
  add_allocated_tensor(alloc, tensor);
@@ -214,7 +219,8 @@ static void ggml_allocr_free_tensor(struct ggml_allocr * alloc, struct ggml_tens
214
219
 
215
220
  size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
216
221
  size = aligned_offset(NULL, size, alloc->alignment);
217
- AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
222
+ AT_PRINTF("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, ptr, size, alloc->n_free_blocks);
223
+ AT_PRINTF("%s: alloc->data = %p alloc->data+alloc->size = %p alloc->data+alloc->max_size = %p\n", __func__, alloc->data, (char*)alloc->data + alloc->size, (char*)alloc->data + alloc->max_size);
218
224
 
219
225
  #ifdef GGML_ALLOCATOR_DEBUG
220
226
  remove_allocated_tensor(alloc, tensor);
@@ -338,8 +344,8 @@ static void free_vmem(void * base_addr, size_t size) {
338
344
 
339
345
  // allocate uncommitted virtual memory to measure the size of the graph
340
346
  static void alloc_measure_vmem(void ** base_addr, size_t * size) {
341
- // 1TB for 64-bit, 1GB for 32-bit
342
- *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40;
347
+ // 128GB for 64-bit, 1GB for 32-bit
348
+ *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<37;
343
349
  do {
344
350
  *base_addr = alloc_vmem(*size);
345
351
  if (*base_addr != NULL) {
@@ -399,10 +405,6 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
399
405
 
400
406
  //////////// compute graph allocator
401
407
 
402
- static bool ggml_is_view(struct ggml_tensor * t) {
403
- return t->view_src != NULL;
404
- }
405
-
406
408
  static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
407
409
  if (a->type != b->type) {
408
410
  return false;
@@ -631,3 +633,7 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
631
633
  size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
632
634
  return ggml_allocr_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
633
635
  }
636
+
637
+ size_t ggml_allocr_max_size(struct ggml_allocr * alloc) {
638
+ return alloc->max_size;
639
+ }
@@ -19,6 +19,7 @@ GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
19
19
  GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
20
20
  GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
21
21
  GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
22
+ GGML_API size_t ggml_allocr_max_size(struct ggml_allocr * alloc);
22
23
 
23
24
 
24
25
  #ifdef __cplusplus