llama_cpp 0.12.0 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87010edca1b352ae7bdd3a693451893b13dd75e9e109f9e2b42f6164cc186b08
4
- data.tar.gz: ff34254b6377698903dcf771663b91c3c804111228888d96e91363bd0f29d3a6
3
+ metadata.gz: 13381408318e71cc1fc55c40ee9be6e62ad9e3ad6a8ce39279bb8040614e9b3b
4
+ data.tar.gz: 6456734b18865a7811f08d0d9d599771f574f4b59bd5b54a964ece7428115907
5
5
  SHA512:
6
- metadata.gz: a23aa59fa4936940b28942398bfe98bdb09574162943ebaff31cdbda19394c7690f6c780f49da31eecc4b77427718a8b7ee58e62b2adb087100e1eee66310abc
7
- data.tar.gz: 5cc105e69fc81d4616d93cd036af70f809be0c99b9155a6d3e386c9900ca012123353c23417ce56a5a64a1d805108b35de2d9feb5a6265c110d9341e5a2e242b
6
+ metadata.gz: 1014349771d7aa3c318027de11603e96d5482e4bd5b1bcf0fd4874040245daf44c4cfb801077a698846459a7619ca9e01e0afc3507fc7bd519e7ba68a000a15d
7
+ data.tar.gz: 1315ca8954397edb0db93347a10762e35f829377ef3dba0ea9cf6c67f986972ac8e75b46c410a3ceceefc0474f2abbe6f441e56a60e789ef1d2617fc15cfb29e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [[0.12.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.0...v0.12.1)] - 2024-01-13
2
+
3
+ - Bump bundled llama.cpp from b1768 to b1833.
4
+ - Add model file type constants.
5
+ - Add `kv_cache_seq_div` method to `Context`.
6
+
1
7
  ## [[0.12.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.1...v0.12.0)] - 2024-01-11
2
8
 
3
9
  - Add `get_one` singleton method to `Batch`.
@@ -2026,6 +2026,7 @@ public:
2026
2026
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
2027
2027
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
2028
2028
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_shift", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_shift), 4);
2029
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
2029
2030
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
2030
2031
  rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
2031
2032
  rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
@@ -2378,6 +2379,16 @@ private:
2378
2379
  return Qnil;
2379
2380
  }
2380
2381
 
2382
+ static VALUE _llama_context_kv_cache_seq_div(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
2383
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2384
+ if (ptr->ctx == NULL) {
2385
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2386
+ return Qnil;
2387
+ }
2388
+ llama_kv_cache_seq_div(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
2389
+ return Qnil;
2390
+ }
2391
+
2381
2392
  static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
2382
2393
  LLaMAContextWrapper* ptr = get_llama_context(self);
2383
2394
  if (ptr->ctx == NULL) {
@@ -3209,6 +3220,9 @@ extern "C" void Init_llama_cpp(void) {
3209
3220
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_S));
3210
3221
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
3211
3222
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
3223
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
3224
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
3225
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
3212
3226
 
3213
3227
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3214
3228
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.0'
6
+ VERSION = '0.12.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1768'
9
+ LLAMA_CPP_VERSION = 'b1833'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -22,6 +22,9 @@ module LLaMACpp
22
22
  LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
23
23
  LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
24
24
  LLAMA_FTYPE_MOSTLY_Q6_K: Integer
25
+ LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
26
+ LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
27
+ LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
25
28
 
26
29
  LLAMA_KV_OVERRIDE_INT: Integer
27
30
  LLAMA_KV_OVERRIDE_FLOAT: Integer
@@ -193,6 +196,7 @@ module LLaMACpp
193
196
  def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
194
197
  def kv_cache_seq_keep: (Integer) -> void
195
198
  def kv_cache_seq_shift: (Integer, Integer, Integer, Integer) -> void
199
+ def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
196
200
  def set_rng_seed: (Integer) -> void
197
201
  def load_session_file: (session_path: String) -> void
198
202
  def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
@@ -1,8 +1,8 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
- main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
3
+ main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
4
  simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
5
- speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup tests/test-c.o
5
+ speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o
6
6
 
7
7
  # Binaries only useful for tests
8
8
  TEST_TARGETS = \
@@ -620,6 +620,9 @@ quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.
620
620
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621
621
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
622
622
 
623
+ imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
624
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
625
+
623
626
  embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
624
627
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
625
628
 
@@ -671,6 +674,9 @@ lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS
671
674
  lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
672
675
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
676
 
677
+ passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
678
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
679
+
674
680
  ifdef LLAMA_METAL
675
681
  metal: examples/metal/metal.cpp ggml.o $(OBJS)
676
682
  $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
@@ -90,7 +90,7 @@ extern "C" {
90
90
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
91
91
 
92
92
  // compute graph without a plan
93
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
93
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
94
94
 
95
95
  // check if the backend supports an operation
96
96
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
@@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
195
195
  ggml_backend_synchronize(backend);
196
196
  }
197
197
 
198
- void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
- backend->iface.graph_compute(backend, cgraph);
198
+ bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
+ if (!backend->iface.graph_compute(backend, cgraph)) {
200
+ return false;
201
+ }
200
202
 
201
203
  // TODO: optional sync
202
204
  ggml_backend_synchronize(backend);
205
+ return true;
203
206
  }
204
207
 
205
208
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac
597
600
  GGML_UNUSED(backend);
598
601
  }
599
602
 
600
- static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
603
+ static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
601
604
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
602
605
 
603
606
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
611
614
  cplan.work_data = cpu_ctx->work_data;
612
615
 
613
616
  ggml_graph_compute(cgraph, &cplan);
617
+ return true;
614
618
  }
615
619
 
616
620
  static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -58,7 +58,7 @@ extern "C" {
58
58
 
59
59
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
60
60
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
61
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
61
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
62
62
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
63
63
 
64
64
  // tensor copy between different backends