llama_cpp 0.3.6 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 545786d4c9308ffe0f7e214a12427beaea0b26bec915ff84b16eed25ef1932a4
4
- data.tar.gz: aaa0d4fc1710b13a26163306c8b51e423233c2f7e4b3d6127f94c9b6c4846f9c
3
+ metadata.gz: 8a6623a24970936369944231171226dda1ce579bf29fc3711f8923c8d2d22cba
4
+ data.tar.gz: dbff8f38ea54195b05fc0acbaf8fceb7fd6bfdc329100a18665ef2cba2fd5d81
5
5
  SHA512:
6
- metadata.gz: 12b3ac122fd7ea59b51e2d6ff905ed1a71cf8a8b3650a269d4a3793ae32a0149f6836a792c8f216d0fdb0c39aeb3b47914e73ffc74b574bbe686660e6be84ea1
7
- data.tar.gz: 5056b95552f3434692a6c19653810d77bb28ddf9b28abd78712ccfb4ee4f7d836a5d54e283513fcfc617cc79ffa7bb9257d4ac2b6d96ec89158bf94acd4cec86
6
+ metadata.gz: 710ab86cfea7b5f91a386bdf87872c1d19ba49057bc02aa11a4f0198aee404a2d5b931965fdeba40aa1353269f95a451090e261305931e31a182a078827ace80
7
+ data.tar.gz: ec4d956b5ab5ad665a0e99489b81b364b79ed39e74146629e4140240b5e176f4ef9dbf3d1c11acdb4098398114fbf055a2ad4f8251ed98ec42471a478f6dcaa2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
2
+
3
+ - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
4
+ - Add `type` method to Model.
5
+ - Revert pull request #2592 in llama.cpp.
6
+ It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
7
+
8
+ ## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
9
+
10
+ - Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
11
+
1
12
  ## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
2
13
 
3
14
  - Bump bundled llama.cpp from master-1a94186 to master-468ea24.
@@ -52,8 +52,8 @@ end
52
52
  if with_config('metal')
53
53
  $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
- $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders'
56
- $objs = %w[ggml.o llama.o llama_cpp.o ggml-metal.o]
55
+ $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
+ $objs = %w[ggml.o ggml-alloc.o llama.o llama_cpp.o ggml-metal.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -814,6 +814,7 @@ public:
814
814
  rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
815
815
  rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
816
816
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
817
+ rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
817
818
  }
818
819
 
819
820
  private:
@@ -1061,6 +1062,13 @@ private:
1061
1062
  RB_GC_GUARD(text_);
1062
1063
  return ret;
1063
1064
  }
1065
+
1066
+ static VALUE _llama_model_get_model_type(VALUE self) {
1067
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1068
+ char buf[128];
1069
+ ::llama_model_type(ptr->model, buf, sizeof(buf));
1070
+ return rb_str_new_cstr(buf);
1071
+ }
1064
1072
  };
1065
1073
 
1066
1074
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -67,6 +67,8 @@ struct ggml_allocr {
67
67
  struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
68
68
  size_t max_size;
69
69
  bool measure;
70
+ int parse_seq[GGML_MAX_NODES];
71
+ bool has_parse_seq;
70
72
 
71
73
  #ifdef GGML_ALLOCATOR_DEBUG
72
74
  struct ggml_tensor * allocated_tensors[1024];
@@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
111
113
 
112
114
  size_t max_avail = 0;
113
115
 
114
- // find the best fitting free block
116
+ // find the best fitting free block besides the last block
115
117
  int best_fit_block = -1;
116
118
  size_t best_fit_size = SIZE_MAX;
117
- for (int i = 0; i < alloc->n_free_blocks; i++) {
119
+ for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
118
120
  struct free_block * block = &alloc->free_blocks[i];
119
121
  max_avail = MAX(max_avail, block->size);
120
122
  if (block->size >= size && block->size <= best_fit_size) {
@@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
126
128
  AT_PRINTF("block %d\n", best_fit_block);
127
129
 
128
130
  if (best_fit_block == -1) {
129
- fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
130
- __func__, size, max_avail);
131
- GGML_ASSERT(!"not enough space in the buffer");
131
+ // the last block is our last resort
132
+ struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
133
+ if (block->size >= size) {
134
+ best_fit_block = alloc->n_free_blocks - 1;
135
+ max_avail = MAX(max_avail, block->size);
136
+ } else {
137
+ fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
138
+ __func__, size, max_avail);
139
+ GGML_ASSERT(!"not enough space in the buffer");
132
140
  return;
141
+ }
133
142
  }
134
143
  struct free_block * block = &alloc->free_blocks[best_fit_block];
135
144
  void * addr = block->addr;
@@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
229
238
  alloc->n_free_blocks++;
230
239
  }
231
240
 
241
+ void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
242
+ int pos = 0;
243
+ for (int i = 0; i < n; i++) {
244
+ if (list[i] != -1) {
245
+ alloc->parse_seq[pos] = list[i];
246
+ pos++;
247
+ }
248
+ }
249
+ alloc->has_parse_seq = true;
250
+ }
251
+
232
252
  void ggml_allocr_reset(struct ggml_allocr * alloc) {
233
253
  alloc->n_free_blocks = 1;
234
254
  size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
@@ -248,6 +268,8 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
248
268
  /*.hash_table = */ {{0}},
249
269
  /*.max_size = */ 0,
250
270
  /*.measure = */ false,
271
+ /*.parse_seq = */ {0},
272
+ /*.has_parse_seq = */ false,
251
273
  #ifdef GGML_ALLOCATOR_DEBUG
252
274
  /*.allocated_tensors = */ = {0},
253
275
  #endif
@@ -275,6 +297,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
275
297
  /*.hash_table = */ {{0}},
276
298
  /*.max_size = */ 0,
277
299
  /*.measure = */ true,
300
+ /*.parse_seq = */ {0},
301
+ /*.has_parse_seq = */ false,
278
302
  #ifdef GGML_ALLOCATOR_DEBUG
279
303
  /*.allocated_tensors = */ = {0},
280
304
  #endif
@@ -394,6 +418,14 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
394
418
  if (parent == NULL) {
395
419
  break;
396
420
  }
421
+
422
+ // if the node's data is external, then we cannot re-use it
423
+ if ((char *) parent->data < (char *) alloc->data ||
424
+ (char *) parent->data >= ((char *) alloc->data + alloc->size)) {
425
+ AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data);
426
+ continue;
427
+ }
428
+
397
429
  struct hash_node * p_hn = hash_get(ht, parent);
398
430
  if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
399
431
  if (ggml_is_view(parent)) {
@@ -465,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
465
497
  allocate_node(alloc, input);
466
498
  }
467
499
  }
468
- for (int i = 0; i < gf->n_nodes; i++) {
500
+ for (int ind = 0; ind < gf->n_nodes; ind++) {
501
+ int i;
502
+ if (alloc->has_parse_seq) {
503
+ i = alloc->parse_seq[ind];
504
+ } else {
505
+ i = ind;
506
+ }
469
507
  struct ggml_tensor * node = gf->nodes[i];
470
508
 
471
509
  // allocate parents (leafs)
@@ -10,6 +10,10 @@ extern "C" {
10
10
  GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
11
11
  GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
12
12
 
13
+ // tell the allocator to parse nodes following the order described in the list
14
+ // you should call this if your graph are optimized to execute out-of-order
15
+ GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
16
+
13
17
  GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
14
18
  GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
15
19
  GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);