llama_cpp 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 545786d4c9308ffe0f7e214a12427beaea0b26bec915ff84b16eed25ef1932a4
4
- data.tar.gz: aaa0d4fc1710b13a26163306c8b51e423233c2f7e4b3d6127f94c9b6c4846f9c
3
+ metadata.gz: 8a6623a24970936369944231171226dda1ce579bf29fc3711f8923c8d2d22cba
4
+ data.tar.gz: dbff8f38ea54195b05fc0acbaf8fceb7fd6bfdc329100a18665ef2cba2fd5d81
5
5
  SHA512:
6
- metadata.gz: 12b3ac122fd7ea59b51e2d6ff905ed1a71cf8a8b3650a269d4a3793ae32a0149f6836a792c8f216d0fdb0c39aeb3b47914e73ffc74b574bbe686660e6be84ea1
7
- data.tar.gz: 5056b95552f3434692a6c19653810d77bb28ddf9b28abd78712ccfb4ee4f7d836a5d54e283513fcfc617cc79ffa7bb9257d4ac2b6d96ec89158bf94acd4cec86
6
+ metadata.gz: 710ab86cfea7b5f91a386bdf87872c1d19ba49057bc02aa11a4f0198aee404a2d5b931965fdeba40aa1353269f95a451090e261305931e31a182a078827ace80
7
+ data.tar.gz: ec4d956b5ab5ad665a0e99489b81b364b79ed39e74146629e4140240b5e176f4ef9dbf3d1c11acdb4098398114fbf055a2ad4f8251ed98ec42471a478f6dcaa2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
2
+
3
+ - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
4
+ - Add `type` method to Model.
5
+ - Revert pull request #2592 in llama.cpp.
6
+ It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
7
+
8
+ ## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
9
+
10
+ - Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
11
+
1
12
  ## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
2
13
 
3
14
  - Bump bundled llama.cpp from master-1a94186 to master-468ea24.
@@ -52,8 +52,8 @@ end
52
52
  if with_config('metal')
53
53
  $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
- $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders'
56
- $objs = %w[ggml.o llama.o llama_cpp.o ggml-metal.o]
55
+ $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
+ $objs = %w[ggml.o ggml-alloc.o llama.o llama_cpp.o ggml-metal.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -814,6 +814,7 @@ public:
814
814
  rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
815
815
  rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
816
816
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
817
+ rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
817
818
  }
818
819
 
819
820
  private:
@@ -1061,6 +1062,13 @@ private:
1061
1062
  RB_GC_GUARD(text_);
1062
1063
  return ret;
1063
1064
  }
1065
+
1066
+ static VALUE _llama_model_get_model_type(VALUE self) {
1067
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1068
+ char buf[128];
1069
+ ::llama_model_type(ptr->model, buf, sizeof(buf));
1070
+ return rb_str_new_cstr(buf);
1071
+ }
1064
1072
  };
1065
1073
 
1066
1074
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -67,6 +67,8 @@ struct ggml_allocr {
67
67
  struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
68
68
  size_t max_size;
69
69
  bool measure;
70
+ int parse_seq[GGML_MAX_NODES];
71
+ bool has_parse_seq;
70
72
 
71
73
  #ifdef GGML_ALLOCATOR_DEBUG
72
74
  struct ggml_tensor * allocated_tensors[1024];
@@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
111
113
 
112
114
  size_t max_avail = 0;
113
115
 
114
- // find the best fitting free block
116
+ // find the best fitting free block besides the last block
115
117
  int best_fit_block = -1;
116
118
  size_t best_fit_size = SIZE_MAX;
117
- for (int i = 0; i < alloc->n_free_blocks; i++) {
119
+ for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
118
120
  struct free_block * block = &alloc->free_blocks[i];
119
121
  max_avail = MAX(max_avail, block->size);
120
122
  if (block->size >= size && block->size <= best_fit_size) {
@@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
126
128
  AT_PRINTF("block %d\n", best_fit_block);
127
129
 
128
130
  if (best_fit_block == -1) {
129
- fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
130
- __func__, size, max_avail);
131
- GGML_ASSERT(!"not enough space in the buffer");
131
+ // the last block is our last resort
132
+ struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
133
+ if (block->size >= size) {
134
+ best_fit_block = alloc->n_free_blocks - 1;
135
+ max_avail = MAX(max_avail, block->size);
136
+ } else {
137
+ fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
138
+ __func__, size, max_avail);
139
+ GGML_ASSERT(!"not enough space in the buffer");
132
140
  return;
141
+ }
133
142
  }
134
143
  struct free_block * block = &alloc->free_blocks[best_fit_block];
135
144
  void * addr = block->addr;
@@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
229
238
  alloc->n_free_blocks++;
230
239
  }
231
240
 
241
+ void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
242
+ int pos = 0;
243
+ for (int i = 0; i < n; i++) {
244
+ if (list[i] != -1) {
245
+ alloc->parse_seq[pos] = list[i];
246
+ pos++;
247
+ }
248
+ }
249
+ alloc->has_parse_seq = true;
250
+ }
251
+
232
252
  void ggml_allocr_reset(struct ggml_allocr * alloc) {
233
253
  alloc->n_free_blocks = 1;
234
254
  size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
@@ -248,6 +268,8 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
248
268
  /*.hash_table = */ {{0}},
249
269
  /*.max_size = */ 0,
250
270
  /*.measure = */ false,
271
+ /*.parse_seq = */ {0},
272
+ /*.has_parse_seq = */ false,
251
273
  #ifdef GGML_ALLOCATOR_DEBUG
252
274
  /*.allocated_tensors = */ = {0},
253
275
  #endif
@@ -275,6 +297,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
275
297
  /*.hash_table = */ {{0}},
276
298
  /*.max_size = */ 0,
277
299
  /*.measure = */ true,
300
+ /*.parse_seq = */ {0},
301
+ /*.has_parse_seq = */ false,
278
302
  #ifdef GGML_ALLOCATOR_DEBUG
279
303
  /*.allocated_tensors = */ = {0},
280
304
  #endif
@@ -394,6 +418,14 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
394
418
  if (parent == NULL) {
395
419
  break;
396
420
  }
421
+
422
+ // if the node's data is external, then we cannot re-use it
423
+ if ((char *) parent->data < (char *) alloc->data ||
424
+ (char *) parent->data >= ((char *) alloc->data + alloc->size)) {
425
+ AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data);
426
+ continue;
427
+ }
428
+
397
429
  struct hash_node * p_hn = hash_get(ht, parent);
398
430
  if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
399
431
  if (ggml_is_view(parent)) {
@@ -465,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
465
497
  allocate_node(alloc, input);
466
498
  }
467
499
  }
468
- for (int i = 0; i < gf->n_nodes; i++) {
500
+ for (int ind = 0; ind < gf->n_nodes; ind++) {
501
+ int i;
502
+ if (alloc->has_parse_seq) {
503
+ i = alloc->parse_seq[ind];
504
+ } else {
505
+ i = ind;
506
+ }
469
507
  struct ggml_tensor * node = gf->nodes[i];
470
508
 
471
509
  // allocate parents (leafs)
@@ -10,6 +10,10 @@ extern "C" {
10
10
  GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
11
11
  GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
12
12
 
13
+ // tell the allocator to parse nodes following the order described in the list
14
+ // you should call this if your graph are optimized to execute out-of-order
15
+ GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
16
+
13
17
  GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
14
18
  GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
15
19
  GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);