llama_cpp 0.14.4 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92ebd411f54255b05074ef79ed3e220c9ff4332164cfc831122d766226322515
4
- data.tar.gz: 571f0ec65776945d40a54e31bba26cc4194b19965cc4841ce40b9ad3d94605df
3
+ metadata.gz: 7d80abb57b135ff04718e34099accaaabf3358553b0f061d79b195a99386739d
4
+ data.tar.gz: 5b24a9b7846b962f4063a0e50f15c6d9a9c874d1931ed32c200f3383869a2fd9
5
5
  SHA512:
6
- metadata.gz: 34ca9567b7eb96add562e977f22f8b2be087c026c85bf92cd5e31f9a96ea5f02a841bdf05f745c4079740a4bb01476fb9bab313317d66dbf8870fa829a269c86
7
- data.tar.gz: 64b19ef010bb52800cd3710c1ec70bcb7b747e53b6ea7d8f13d84d336b1ee67868153f5bbdfee0b4131dfddaf1c656c49bd774084ee2d14f191d22d215a47737
6
+ metadata.gz: dfb20e108a57b65ff624db1e2ee37034ffca406d906268d89ff441099a02c00fd67743a786a0353df2368614003604a4bf5982089024f14aee2e0f95e210e297
7
+ data.tar.gz: 0a0bbd93dfe57e033f25e5c3e3d61fb568362aa2d317851dbb69fe620e5e30bc8b08c27272579e7841c50b87984abf70ade4a9e7e34fb2615e106a5c2474b79e
data/CHANGELOG.md CHANGED
@@ -1,10 +1,18 @@
1
+ ## [[0.14.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.4...v0.14.5)] - 2024-04-13
2
+
3
+ - Bump llama.cpp from b2608 to b2658.
4
+ - Add magic number constants.
5
+ - Add `token_cls` and `token_sep` methods to `Model`.
6
+
7
+ Implementation bindings for llama_state_get_size, llama_state_get_data, llama_state_set_data, llama_state_load_file, llama_state_save_file, llama_state_seq_get_size, llama_state_seq_get_data, llama_state_seq_set_data, llama_state_seq_save_file, and llama_state_seq_load_file has been skipped.
8
+
1
9
  ## [[0.14.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.3...v0.14.4)] - 2024-04-06
2
10
 
3
11
  - Bump llama.cpp from b2496 to b2573.
4
12
  - Add file type constants.
5
13
  - Bump llama.cpp from b2573 to b2608.
6
14
 
7
- Implementation of llama_split_path, llama_split_prefix binding, llama_grammar_accept, and decode_utf8 has been skipped.
15
+ Implementation bindings for llama_split_path, llama_split_prefix binding, llama_grammar_accept, and decode_utf8 has been skipped.
8
16
 
9
17
  ## [[0.14.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.2...v0.14.3)] - 2024-03-23
10
18
 
data/examples/chat.rb CHANGED
@@ -127,8 +127,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
127
127
  end
128
128
 
129
129
  if input_echo
130
- output = []
131
- embd.each { |token| output << context.model.token_to_piece(token) }
130
+ output = embd.map { |token| context.model.token_to_piece(token) }
132
131
  output_str = output.join
133
132
  output_str.chomp!(antiprompt) if first_input
134
133
  print(output_str)
@@ -136,8 +135,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
136
135
 
137
136
  if embd_input.size <= n_consumed
138
137
  if antiprompt.size.positive?
139
- last_output = []
140
- last_n_tokens.each { |token| last_output << context.model.token_to_piece(token) }
138
+ last_output = last_n_tokens.map { |token| context.model.token_to_piece(token) }
141
139
  last_output_str = last_output.join
142
140
 
143
141
  search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
@@ -37,6 +37,7 @@ if RUBY_PLATFORM.match?(/darwin/)
37
37
  abort('Failed to set installation path for libllama.dylib.') unless mkstatus.success?
38
38
  end
39
39
  FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal-embed.metal", VENDOR_LIB_DIR)
40
+ FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal.metal", VENDOR_LIB_DIR)
40
41
  end
41
42
 
42
43
  abort('libstdc++ is not found.') unless have_library('stdc++')
@@ -1478,6 +1478,8 @@ public:
1478
1478
  rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_type), 1);
1479
1479
  rb_define_method(rb_cLLaMAModel, "token_bos", RUBY_METHOD_FUNC(_llama_model_token_bos), 0);
1480
1480
  rb_define_method(rb_cLLaMAModel, "token_eos", RUBY_METHOD_FUNC(_llama_model_token_eos), 0);
1481
+ rb_define_method(rb_cLLaMAModel, "token_cls", RUBY_METHOD_FUNC(_llama_model_token_cls), 0);
1482
+ rb_define_method(rb_cLLaMAModel, "token_sep", RUBY_METHOD_FUNC(_llama_model_token_sep), 0);
1481
1483
  rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
1482
1484
  rb_define_method(rb_cLLaMAModel, "add_bos_token?", RUBY_METHOD_FUNC(_llama_model_add_bos_token), 0);
1483
1485
  rb_define_method(rb_cLLaMAModel, "add_eos_token?", RUBY_METHOD_FUNC(_llama_model_add_eos_token), 0);
@@ -1743,6 +1745,16 @@ private:
1743
1745
  return INT2NUM(llama_token_eos(ptr->model));
1744
1746
  }
1745
1747
 
1748
+ static VALUE _llama_model_token_cls(VALUE self) {
1749
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1750
+ return INT2NUM(llama_token_cls(ptr->model));
1751
+ }
1752
+
1753
+ static VALUE _llama_model_token_sep(VALUE self) {
1754
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1755
+ return INT2NUM(llama_token_sep(ptr->model));
1756
+ }
1757
+
1746
1758
  static VALUE _llama_model_token_nl(VALUE self) {
1747
1759
  LLaMAModelWrapper* ptr = get_llama_model(self);
1748
1760
  return INT2NUM(llama_token_nl(ptr->model));
@@ -3414,15 +3426,26 @@ extern "C" void Init_llama_cpp(void) {
3414
3426
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
3415
3427
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
3416
3428
 
3429
+ ss_magic.str("");
3430
+ ss_magic.clear(std::stringstream::goodbit);
3431
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSQ;
3432
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSQ", rb_str_new2(ss_magic.str().c_str()));
3433
+
3417
3434
  ss_magic.str("");
3418
3435
  ss_magic.clear(std::stringstream::goodbit);
3419
3436
  ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
3420
3437
  rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
3421
3438
 
3439
+ ss_magic.str("");
3440
+ ss_magic.clear(std::stringstream::goodbit);
3441
+ ss_magic << std::showbase << std::hex << LLAMA_STATE_SEQ_MAGIC;
3442
+ rb_define_const(rb_mLLaMACpp, "LLAMA_STATE_SEQ_MAGIC", rb_str_new2(ss_magic.str().c_str()));
3443
+
3422
3444
  ss_magic.str("");
3423
3445
  ss_magic.clear(std::stringstream::goodbit);
3424
3446
  ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
3425
3447
  rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
3426
3448
 
3427
3449
  rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
3450
+ rb_define_const(rb_mLLaMACpp, "LLAMA_STATE_SEQ_VERSION", rb_str_new2(std::to_string(LLAMA_STATE_SEQ_VERSION).c_str()));
3428
3451
  }
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.14.4'
6
+ VERSION = '0.14.5'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2608'
9
+ LLAMA_CPP_VERSION = 'b2658'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -3,6 +3,14 @@ module LLaMACpp
3
3
  LLAMA_CPP_VERSION: String
4
4
  LLAMA_DEFALUT_SEED: String
5
5
 
6
+ LLAMA_FILE_MAGIC_GGLA: String
7
+ LLAMA_FILE_MAGIC_GGSN: String
8
+ LLAMA_FILE_MAGIC_GGSQ: String
9
+ LLAMA_SESSION_MAGIC: String
10
+ LLAMA_SESSION_VERSION: String
11
+ LLAMA_STATE_SEQ_MAGIC: String
12
+ LLAMA_STATE_SEQ_VERSION: String
13
+
6
14
  LLAMA_VOCAB_TYPE_NONE: Integer
7
15
  LLAMA_VOCAB_TYPE_SPM: Integer
8
16
  LLAMA_VOCAB_TYPE_BPE: Integer
@@ -124,6 +132,8 @@ module LLaMACpp
124
132
  def type: (Integer) -> Integer
125
133
  def token_bos: () -> Integer
126
134
  def token_eos: () -> Integer
135
+ def token_cls: () -> Integer
136
+ def token_sep: () -> Integer
127
137
  def token_nl: () -> Integer
128
138
  def add_bos_token?: () -> bool
129
139
  def add_eos_token?: () -> bool
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023 Georgi Gerganov
3
+ Copyright (c) 2023-2024 The ggml authors
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,7 +1,7 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
3
  main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
- simple batched batched-bench save-load-state server gguf gguf-split llama-bench libllava.a llava-cli baby-llama beam-search \
4
+ simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama beam-search \
5
5
  retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
6
6
 
7
7
  # Binaries only useful for tests
@@ -10,7 +10,7 @@ TEST_TARGETS = \
10
10
  tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11
11
  tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12
12
  tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease \
13
- tests/test-json-schema-to-grammar
13
+ tests/test-json-schema-to-grammar tests/test-grammar-integration
14
14
 
15
15
  # Code coverage output files
16
16
  COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -648,7 +648,7 @@ CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])'
648
648
  ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
649
649
  ifndef CUDA_DOCKER_ARCH
650
650
  ifndef CUDA_POWER_ARCH
651
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
651
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
652
652
  endif # CUDA_POWER_ARCH
653
653
  endif # CUDA_DOCKER_ARCH
654
654
  endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
@@ -805,6 +805,10 @@ gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(O
805
805
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
806
806
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
807
807
 
808
+ eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
809
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
810
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
811
+
808
812
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
809
813
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
810
814
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -923,6 +927,10 @@ tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-
923
927
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
924
928
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
925
929
 
930
+ tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
931
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
932
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
933
+
926
934
  tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
927
935
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
928
936
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -137,7 +137,7 @@ extern "C" {
137
137
  /*
138
138
  Example usage:
139
139
 
140
- // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be asigned
140
+ // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
141
141
  // preferrably to run on the same backend as the buffer
142
142
  ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
143
143
 
@@ -1225,7 +1225,7 @@ static void ggml_cuda_op_mul_mat_cublas(
1225
1225
 
1226
1226
  // the main device has a larger memory buffer to hold the results from all GPUs
1227
1227
  // ldc == nrows of the matrix that cuBLAS writes into
1228
- int ldc = id == ctx.device ? ne0 : row_diff;
1228
+ int64_t ldc = id == ctx.device ? ne0 : row_diff;
1229
1229
 
1230
1230
  const int compute_capability = ggml_cuda_info().devices[id].cc;
1231
1231
 
@@ -1377,8 +1377,8 @@ static void ggml_cuda_op_mul_mat(
1377
1377
  const int64_t ne0 = dst->ne[0];
1378
1378
  const int64_t ne1 = dst->ne[1];
1379
1379
 
1380
- const int nb2 = dst->nb[2];
1381
- const int nb3 = dst->nb[3];
1380
+ const int64_t nb2 = dst->nb[2];
1381
+ const int64_t nb3 = dst->nb[3];
1382
1382
 
1383
1383
  GGML_ASSERT(ggml_backend_buffer_is_cuda(dst->buffer));
1384
1384
  GGML_ASSERT(ggml_backend_buffer_is_cuda(src1->buffer));
@@ -2617,6 +2617,7 @@ GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size
2617
2617
  return false;
2618
2618
  }
2619
2619
 
2620
+ #if CUDART_VERSION >= 11100
2620
2621
  cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
2621
2622
  if (err != cudaSuccess) {
2622
2623
  // clear the error
@@ -2627,6 +2628,9 @@ GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size
2627
2628
  return false;
2628
2629
  }
2629
2630
  return true;
2631
+ #else
2632
+ return false;
2633
+ #endif
2630
2634
  }
2631
2635
 
2632
2636
  GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer) {