llama_cpp 0.15.2 → 0.15.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30dd4c29b86098faf7c78de5fa8e57021b631bb5eb3d14c93f63f1d186383ab8
4
- data.tar.gz: b011d891f1cd725f84821428a8db24004b52c9614e785f493f721f7abde71029
3
+ metadata.gz: 167132898a0cb63faaf4fd7583d9b988992ba7c5ec0f5602d5a158f04e0cdfa0
4
+ data.tar.gz: 8a65658eb93b9cf80d5ede554b15968c495f045c32e57cc96ed732c56330d25f
5
5
  SHA512:
6
- metadata.gz: 6c1628f93762747688f802db8593946e8581c869f63c610669b45759f644b3d19b061825b788e328b6b984977112837586ed398b6118a8f8e5f0c7f6fd0eb2dd
7
- data.tar.gz: 2f8c3d9f1e6c0f6db7e0682995c8d34179d5405d32784bf00f04a3408cb5bf4c95557bfa1692026f8d3dc9e672d6b15dec5d33cbd76ddc1d94e5ec964a9d0409
6
+ metadata.gz: 9625ac088c4d5c50cc51bbbcbc744cb7041766ccbb7a42a9cd1b80b29ebe64414d39875dea5d61a87025e239ad78be2a2ea4d3f85a187684321e409fc01a40fd
7
+ data.tar.gz: 6f68445f10765a4eb1124ed1cfd2afb7544d146823efad27b2b6955bb0ee822ae8b0f9cccb68777c8cb211f665a0e2531eba04a4240399af1101a5dbcd645ae9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## [[0.15.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.3...v0.15.4)] - 2024-06-01
2
+
3
+ - Bump llama.cpp from b2988 to b3056.
4
+ - Add LLAMA_VOCAB_PRE_TYPE_SMAUG constant.
5
+ - Add `token_is_control?` method to `Model`.
6
+
7
+ ## [[0.15.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.2...v0.15.3)] - 2024-05-25
8
+
9
+ - Bump llama.cpp from b2917 to b2988.
10
+ - Add constants for pre-tokenization types.
11
+ - Add `n_threads` method to `Context`.
12
+ - Add `n_threads_batch` method to `Context`.
13
+ - Add `set_n_threads` method to `Context`.
14
+
1
15
  ## [[0.15.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.15.1...v0.15.2)] - 2024-05-18
2
16
 
3
17
  - Bump llama.cpp from b2839 to b2917.
@@ -1536,6 +1536,7 @@ public:
1536
1536
  rb_define_method(rb_cLLaMAModel, "token_suffix", RUBY_METHOD_FUNC(_llama_model_token_suffix), 0);
1537
1537
  rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
1538
1538
  rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
1539
+ rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
1539
1540
  }
1540
1541
 
1541
1542
  private:
@@ -1848,6 +1849,16 @@ private:
1848
1849
  LLaMAModelWrapper* ptr = get_llama_model(self);
1849
1850
  return llama_token_is_eog(ptr->model, token) ? Qtrue : Qfalse;
1850
1851
  }
1852
+
1853
+ static VALUE _llama_model_token_is_control(VALUE self, VALUE token_) {
1854
+ if (!RB_INTEGER_TYPE_P(token_)) {
1855
+ rb_raise(rb_eArgError, "token must be an integer");
1856
+ return Qnil;
1857
+ }
1858
+ const llama_token token = NUM2INT(token_);
1859
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1860
+ return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
1861
+ }
1851
1862
  };
1852
1863
 
1853
1864
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -2122,10 +2133,13 @@ public:
2122
2133
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2123
2134
  rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2124
2135
  rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
2136
+ rb_define_method(rb_cLLaMAContext, "set_n_threads", RUBY_METHOD_FUNC(_llama_context_set_n_threads), -1);
2125
2137
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2126
2138
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2127
2139
  rb_define_method(rb_cLLaMAContext, "n_ubatch", RUBY_METHOD_FUNC(_llama_context_n_ubatch), 0);
2128
2140
  rb_define_method(rb_cLLaMAContext, "n_seq_max", RUBY_METHOD_FUNC(_llama_context_n_seq_max), 0);
2141
+ rb_define_method(rb_cLLaMAContext, "n_threads", RUBY_METHOD_FUNC(_llama_context_n_threads), 0);
2142
+ rb_define_method(rb_cLLaMAContext, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_n_threads_batch), 0);
2129
2143
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
2130
2144
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
2131
2145
  rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
@@ -2343,6 +2357,33 @@ private:
2343
2357
  return output;
2344
2358
  }
2345
2359
 
2360
+ static VALUE _llama_context_set_n_threads(int argc, VALUE* argv, VALUE self) {
2361
+ VALUE kw_args = Qnil;
2362
+ ID kw_table[2] = { rb_intern("n_threads"), rb_intern("n_threads_batch") };
2363
+ VALUE kw_values[2] = { Qundef, Qundef };
2364
+ rb_scan_args(argc, argv, ":", &kw_args);
2365
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
2366
+
2367
+ VALUE n_threads = kw_values[0];
2368
+ if (!RB_INTEGER_TYPE_P(n_threads)) {
2369
+ rb_raise(rb_eArgError, "n_threads must be an integer");
2370
+ return Qnil;
2371
+ }
2372
+ VALUE n_threads_batch = kw_values[1];
2373
+ if (!RB_INTEGER_TYPE_P(n_threads_batch)) {
2374
+ rb_raise(rb_eArgError, "n_threads_batch must be an integer");
2375
+ return Qnil;
2376
+ }
2377
+
2378
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2379
+ if (ptr->ctx == NULL) {
2380
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2381
+ return Qnil;
2382
+ }
2383
+ llama_set_n_threads(ptr->ctx, NUM2UINT(n_threads), NUM2UINT(n_threads_batch));
2384
+ return Qnil;
2385
+ }
2386
+
2346
2387
  static VALUE _llama_context_n_ctx(VALUE self) {
2347
2388
  LLaMAContextWrapper* ptr = get_llama_context(self);
2348
2389
  if (ptr->ctx == NULL) {
@@ -2379,6 +2420,24 @@ private:
2379
2420
  return UINT2NUM(llama_n_seq_max(ptr->ctx));
2380
2421
  }
2381
2422
 
2423
+ static VALUE _llama_context_n_threads(VALUE self) {
2424
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2425
+ if (ptr->ctx == NULL) {
2426
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2427
+ return Qnil;
2428
+ }
2429
+ return UINT2NUM(llama_n_threads(ptr->ctx));
2430
+ }
2431
+
2432
+ static VALUE _llama_context_n_threads_batch(VALUE self) {
2433
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2434
+ if (ptr->ctx == NULL) {
2435
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2436
+ return Qnil;
2437
+ }
2438
+ return UINT2NUM(llama_n_threads_batch(ptr->ctx));
2439
+ }
2440
+
2382
2441
  static VALUE _llama_context_get_timings(VALUE self) {
2383
2442
  LLaMAContextWrapper* ptr = get_llama_context(self);
2384
2443
  if (ptr->ctx == NULL) {
@@ -3430,9 +3489,11 @@ extern "C" void Init_llama_cpp(void) {
3430
3489
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_GPT2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT2));
3431
3490
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_REFACT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_REFACT));
3432
3491
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_COMMAND_R", INT2NUM(LLAMA_VOCAB_PRE_TYPE_COMMAND_R));
3492
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_STABLELM2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_STABLELM2));
3433
3493
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_QWEN2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_QWEN2));
3434
3494
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
3435
3495
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3496
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3436
3497
 
3437
3498
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
3438
3499
  rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.15.2'
6
+ VERSION = '0.15.4'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2917'
9
+ LLAMA_CPP_VERSION = 'b3056'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -26,9 +26,11 @@ module LLaMACpp
26
26
  LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
27
27
  LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
28
28
  LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
29
+ LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
29
30
  LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
30
31
  LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
31
32
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
33
+ LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
32
34
 
33
35
  LLAMA_FTYPE_ALL_F32: Integer
34
36
  LLAMA_FTYPE_MOSTLY_F16: Integer
@@ -158,6 +160,7 @@ module LLaMACpp
158
160
  def token_suffix: () -> Integer
159
161
  def token_eot: () -> Integer
160
162
  def token_is_eog?: (Integer) -> bool
163
+ def token_is_control?: (Integer) -> bool
161
164
  end
162
165
 
163
166
  class Timings
@@ -241,10 +244,13 @@ module LLaMACpp
241
244
  def embeddings_seq: (Integer) -> Array[Float]
242
245
  def decode: (::LLaMACpp::Batch) -> void
243
246
  def logits: () -> Array[Float]
247
+ def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
244
248
  def n_ctx: () -> Integer
245
249
  def n_batch: () -> Integer
246
250
  def n_ubatch: () -> Integer
247
251
  def n_seq_max: () -> Integer
252
+ def n_threads: () -> Integer
253
+ def n_threads_batch: () -> Integer
248
254
  def timings: () -> ::LLaMACpp::Timings
249
255
  def print_timings: () -> void
250
256
  def reset_timings: () -> void
@@ -381,15 +381,16 @@ ifneq ($(filter ppc64le%,$(UNAME_M)),)
381
381
  CUDA_POWER_ARCH = 1
382
382
  endif
383
383
 
384
+ ifneq ($(filter loongarch64%,$(UNAME_M)),)
385
+ MK_CFLAGS += -mlasx
386
+ MK_CXXFLAGS += -mlasx
387
+ endif
388
+
384
389
  else
385
390
  MK_CFLAGS += -march=rv64gcv -mabi=lp64d
386
391
  MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
387
392
  endif
388
393
 
389
- ifdef LLAMA_QKK_64
390
- MK_CPPFLAGS += -DGGML_QKK_64
391
- endif
392
-
393
394
  ifndef LLAMA_NO_ACCELERATE
394
395
  # Mac OS - include Accelerate framework.
395
396
  # `-framework Accelerate` works both with Apple Silicon and Mac Intel
@@ -401,13 +402,6 @@ ifndef LLAMA_NO_ACCELERATE
401
402
  endif
402
403
  endif # LLAMA_NO_ACCELERATE
403
404
 
404
- ifdef LLAMA_MPI
405
- MK_CPPFLAGS += -DGGML_USE_MPI
406
- MK_CFLAGS += -Wno-cast-qual
407
- MK_CXXFLAGS += -Wno-cast-qual
408
- OBJS += ggml-mpi.o
409
- endif # LLAMA_MPI
410
-
411
405
  ifdef LLAMA_OPENBLAS
412
406
  MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
413
407
  MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
@@ -449,6 +443,9 @@ endif # JETSON_EOL_MODULE_DETECT
449
443
  ifdef LLAMA_DEBUG
450
444
  MK_NVCCFLAGS += -lineinfo
451
445
  endif # LLAMA_DEBUG
446
+ ifdef LLAMA_CUDA_DEBUG
447
+ MK_NVCCFLAGS += --device-debug
448
+ endif # LLAMA_CUDA_DEBUG
452
449
  ifdef LLAMA_CUDA_NVCC
453
450
  NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
454
451
  else
@@ -631,11 +628,6 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
631
628
  endif
632
629
  endif # LLAMA_METAL
633
630
 
634
- ifdef LLAMA_MPI
635
- ggml-mpi.o: ggml-mpi.c ggml-mpi.h
636
- $(CC) $(CFLAGS) -c $< -o $@
637
- endif # LLAMA_MPI
638
-
639
631
  ifndef LLAMA_NO_LLAMAFILE
640
632
  sgemm.o: sgemm.cpp sgemm.h ggml.h
641
633
  $(CXX) $(CXXFLAGS) -c $< -o $@
@@ -65,13 +65,8 @@ typedef sycl::half2 ggml_half2;
65
65
  // QK = number of values after dequantization
66
66
  // QK_K = super-block size
67
67
 
68
- #ifdef GGML_QKK_64
69
- #define QK_K 64
70
- #define K_SCALE_SIZE 4
71
- #else
72
68
  #define QK_K 256
73
69
  #define K_SCALE_SIZE 12
74
- #endif // GGML_QKK_64
75
70
 
76
71
  #if defined(GGML_COMMON_DECL_CUDA) || defined(GGML_COMMON_DECL_HIP) || defined(GGML_COMMON_DECL_SYCL)
77
72
  // QR = QK / number of values before dequantization
@@ -131,13 +126,8 @@ typedef sycl::half2 ggml_half2;
131
126
  #define QI4_NL (QK4_NL / (4*QR4_NL))
132
127
  #define QR4_NL 2
133
128
 
134
- #if QK_K == 64
135
- #define QI4_XS QI4_NL
136
- #define QR4_XS QR4_NL
137
- #else
138
129
  #define QI4_XS (QK_K / (4*QR4_XS))
139
130
  #define QR4_XS 8
140
- #endif
141
131
 
142
132
  #endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
143
133
 
@@ -228,15 +218,6 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wro
228
218
  // weight is represented as x = a * q
229
219
  // 16 blocks of 16 elements each
230
220
  // Effectively 3.4375 bits per weight
231
- #ifdef GGML_QKK_64
232
- typedef struct {
233
- uint8_t hmask[QK_K/8]; // quants - high bit
234
- uint8_t qs[QK_K/4]; // quants - low 2 bits
235
- uint8_t scales[2];
236
- ggml_half d; // super-block scale
237
- } block_q3_K;
238
- static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
239
- #else
240
221
  typedef struct {
241
222
  uint8_t hmask[QK_K/8]; // quants - high bit
242
223
  uint8_t qs[QK_K/4]; // quants - low 2 bits
@@ -244,20 +225,11 @@ typedef struct {
244
225
  ggml_half d; // super-block scale
245
226
  } block_q3_K;
246
227
  static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
247
- #endif
248
228
 
249
229
  // 4-bit quantization
250
230
  // 8 blocks of 32 elements each
251
231
  // weight is represented as x = a * q + b
252
232
  // Effectively 4.5 bits per weight
253
- #ifdef GGML_QKK_64
254
- typedef struct {
255
- ggml_half d[2]; // super-block scales/mins
256
- uint8_t scales[2]; // 4-bit block scales/mins
257
- uint8_t qs[QK_K/2]; // 4--bit quants
258
- } block_q4_K;
259
- static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + QK_K/2 + 2, "wrong q4_K block size/padding");
260
- #else
261
233
  typedef struct {
262
234
  union {
263
235
  struct {
@@ -270,21 +242,11 @@ typedef struct {
270
242
  uint8_t qs[QK_K/2]; // 4--bit quants
271
243
  } block_q4_K;
272
244
  static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
273
- #endif
274
245
 
275
246
  // 5-bit quantization
276
247
  // 8 blocks of 32 elements each
277
248
  // weight is represented as x = a * q + b
278
249
  // Effectively 5.5 bits per weight
279
- #ifdef GGML_QKK_64
280
- typedef struct {
281
- ggml_half d; // super-block scale
282
- int8_t scales[QK_K/16]; // 8-bit block scales
283
- uint8_t qh[QK_K/8]; // quants, high bit
284
- uint8_t qs[QK_K/2]; // quants, low 4 bits
285
- } block_q5_K;
286
- static_assert(sizeof(block_q5_K) == sizeof(ggml_half) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
287
- #else
288
250
  typedef struct {
289
251
  union {
290
252
  struct {
@@ -298,7 +260,6 @@ typedef struct {
298
260
  uint8_t qs[QK_K/2]; // quants, low 4 bits
299
261
  } block_q5_K;
300
262
  static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
301
- #endif
302
263
 
303
264
  // 6-bit quantization
304
265
  // weight is represented as x = a * q
@@ -356,11 +317,7 @@ typedef struct {
356
317
  static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_half) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
357
318
 
358
319
  // 3.4375 bpw
359
- #if QK_K == 64
360
- #define IQ3S_N_SCALE 2
361
- #else
362
320
  #define IQ3S_N_SCALE QK_K/64
363
- #endif
364
321
  typedef struct {
365
322
  ggml_half d;
366
323
  uint8_t qs[QK_K/4];
@@ -381,16 +338,9 @@ static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wron
381
338
  typedef struct {
382
339
  uint8_t qs[QK_K/8]; // grid index, low 8 bits
383
340
  uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8)
384
- #if QK_K == 64
385
- ggml_half d;
386
- #endif
387
341
  uint8_t scales[QK_K/32]; // 3-bit block scales (4-bit if QK_K == 64)
388
342
  } block_iq1_m;
389
- #if QK_K == 64
390
- static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32 + sizeof(ggml_half), "wrong iq1_m block size/padding");
391
- #else
392
343
  static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding");
393
- #endif
394
344
 
395
345
  // Used by IQ1_M quants
396
346
  typedef union {
@@ -406,9 +356,6 @@ typedef struct {
406
356
  } block_iq4_nl;
407
357
  static_assert(sizeof(block_iq4_nl) == sizeof(ggml_half) + QK4_NL/2, "wrong iq4_nl block size/padding");
408
358
 
409
- #if QK_K == 64
410
- #define block_iq4_xs block_iq4_nl
411
- #else
412
359
  typedef struct {
413
360
  ggml_half d;
414
361
  uint16_t scales_h;
@@ -416,7 +363,6 @@ typedef struct {
416
363
  uint8_t qs[QK_K/2];
417
364
  } block_iq4_xs;
418
365
  static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
419
- #endif
420
366
 
421
367
  #endif // GGML_COMMON_DECL
422
368
  #endif // GGML_COMMON_DECL