llama_cpp 0.3.7 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 349bc515c7f9f4f85ab75e092b568e042559a782e6943bc8906e66791b3ed2ce
4
- data.tar.gz: ed4e310e20af8b2ebc54fa3bf9b4cc0321262577d31d9a955eba36aa4a8fd71e
3
+ metadata.gz: 8a6623a24970936369944231171226dda1ce579bf29fc3711f8923c8d2d22cba
4
+ data.tar.gz: dbff8f38ea54195b05fc0acbaf8fceb7fd6bfdc329100a18665ef2cba2fd5d81
5
5
  SHA512:
6
- metadata.gz: ee350ecf8bcb7fb9fb40e4be4a66c321c9248c0b9bc90a5988e4d08a98b012e26a5f0c814d96e871a7db4abda07839b782aed214f23b48ed7dbbfcfe6f245d69
7
- data.tar.gz: 7a36940dd803468ae889c31771ed4f1ff72a450eb06f44b1118c4ae334cad6643c7335f45c974e8f269435c5265efdd347e17d1c71c78b1cf6c5f57734d4e9fb
6
+ metadata.gz: 710ab86cfea7b5f91a386bdf87872c1d19ba49057bc02aa11a4f0198aee404a2d5b931965fdeba40aa1353269f95a451090e261305931e31a182a078827ace80
7
+ data.tar.gz: ec4d956b5ab5ad665a0e99489b81b364b79ed39e74146629e4140240b5e176f4ef9dbf3d1c11acdb4098398114fbf055a2ad4f8251ed98ec42471a478f6dcaa2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
2
+
3
+ - Bump bundled llama.cpp from master-9ca4abe to master-097e121.
4
+ - Add `type` method to Model.
5
+ - Revert pull request #2592 in llama.cpp.
6
+ It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
7
+
1
8
  ## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
2
9
 
3
10
  - Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
@@ -52,8 +52,8 @@ end
52
52
  if with_config('metal')
53
53
  $CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
54
54
  $CXXFLAGS << ' -DGGML_USE_METAL'
55
- $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders'
56
- $objs = %w[ggml.o llama.o llama_cpp.o ggml-metal.o]
55
+ $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
56
+ $objs = %w[ggml.o ggml-alloc.o llama.o llama_cpp.o ggml-metal.o]
57
57
  $objs << 'k_quants.o' unless with_config('no_k_quants')
58
58
  end
59
59
 
@@ -814,6 +814,7 @@ public:
814
814
  rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
815
815
  rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
816
816
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
817
+ rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
817
818
  }
818
819
 
819
820
  private:
@@ -1061,6 +1062,13 @@ private:
1061
1062
  RB_GC_GUARD(text_);
1062
1063
  return ret;
1063
1064
  }
1065
+
1066
+ static VALUE _llama_model_get_model_type(VALUE self) {
1067
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1068
+ char buf[128];
1069
+ ::llama_model_type(ptr->model, buf, sizeof(buf));
1070
+ return rb_str_new_cstr(buf);
1071
+ }
1064
1072
  };
1065
1073
 
1066
1074
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -67,6 +67,8 @@ struct ggml_allocr {
67
67
  struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
68
68
  size_t max_size;
69
69
  bool measure;
70
+ int parse_seq[GGML_MAX_NODES];
71
+ bool has_parse_seq;
70
72
 
71
73
  #ifdef GGML_ALLOCATOR_DEBUG
72
74
  struct ggml_tensor * allocated_tensors[1024];
@@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
111
113
 
112
114
  size_t max_avail = 0;
113
115
 
114
- // find the best fitting free block
116
+ // find the best fitting free block besides the last block
115
117
  int best_fit_block = -1;
116
118
  size_t best_fit_size = SIZE_MAX;
117
- for (int i = 0; i < alloc->n_free_blocks; i++) {
119
+ for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
118
120
  struct free_block * block = &alloc->free_blocks[i];
119
121
  max_avail = MAX(max_avail, block->size);
120
122
  if (block->size >= size && block->size <= best_fit_size) {
@@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
126
128
  AT_PRINTF("block %d\n", best_fit_block);
127
129
 
128
130
  if (best_fit_block == -1) {
129
- fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
130
- __func__, size, max_avail);
131
- GGML_ASSERT(!"not enough space in the buffer");
131
+ // the last block is our last resort
132
+ struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
133
+ if (block->size >= size) {
134
+ best_fit_block = alloc->n_free_blocks - 1;
135
+ max_avail = MAX(max_avail, block->size);
136
+ } else {
137
+ fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
138
+ __func__, size, max_avail);
139
+ GGML_ASSERT(!"not enough space in the buffer");
132
140
  return;
141
+ }
133
142
  }
134
143
  struct free_block * block = &alloc->free_blocks[best_fit_block];
135
144
  void * addr = block->addr;
@@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
229
238
  alloc->n_free_blocks++;
230
239
  }
231
240
 
241
+ void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
242
+ int pos = 0;
243
+ for (int i = 0; i < n; i++) {
244
+ if (list[i] != -1) {
245
+ alloc->parse_seq[pos] = list[i];
246
+ pos++;
247
+ }
248
+ }
249
+ alloc->has_parse_seq = true;
250
+ }
251
+
232
252
  void ggml_allocr_reset(struct ggml_allocr * alloc) {
233
253
  alloc->n_free_blocks = 1;
234
254
  size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
@@ -248,6 +268,8 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
248
268
  /*.hash_table = */ {{0}},
249
269
  /*.max_size = */ 0,
250
270
  /*.measure = */ false,
271
+ /*.parse_seq = */ {0},
272
+ /*.has_parse_seq = */ false,
251
273
  #ifdef GGML_ALLOCATOR_DEBUG
252
274
  /*.allocated_tensors = */ = {0},
253
275
  #endif
@@ -275,6 +297,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
275
297
  /*.hash_table = */ {{0}},
276
298
  /*.max_size = */ 0,
277
299
  /*.measure = */ true,
300
+ /*.parse_seq = */ {0},
301
+ /*.has_parse_seq = */ false,
278
302
  #ifdef GGML_ALLOCATOR_DEBUG
279
303
  /*.allocated_tensors = */ = {0},
280
304
  #endif
@@ -473,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
473
497
  allocate_node(alloc, input);
474
498
  }
475
499
  }
476
- for (int i = 0; i < gf->n_nodes; i++) {
500
+ for (int ind = 0; ind < gf->n_nodes; ind++) {
501
+ int i;
502
+ if (alloc->has_parse_seq) {
503
+ i = alloc->parse_seq[ind];
504
+ } else {
505
+ i = ind;
506
+ }
477
507
  struct ggml_tensor * node = gf->nodes[i];
478
508
 
479
509
  // allocate parents (leafs)
@@ -10,6 +10,10 @@ extern "C" {
10
10
  GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
11
11
  GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
12
12
 
13
+ // tell the allocator to parse nodes following the order described in the list
14
+ // you should call this if your graph are optimized to execute out-of-order
15
+ GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
16
+
13
17
  GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
14
18
  GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
15
19
  GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);