llama_cpp 0.11.1 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
4
- data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
3
+ metadata.gz: 13381408318e71cc1fc55c40ee9be6e62ad9e3ad6a8ce39279bb8040614e9b3b
4
+ data.tar.gz: 6456734b18865a7811f08d0d9d599771f574f4b59bd5b54a964ece7428115907
5
5
  SHA512:
6
- metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
7
- data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
6
+ metadata.gz: 1014349771d7aa3c318027de11603e96d5482e4bd5b1bcf0fd4874040245daf44c4cfb801077a698846459a7619ca9e01e0afc3507fc7bd519e7ba68a000a15d
7
+ data.tar.gz: 1315ca8954397edb0db93347a10762e35f829377ef3dba0ea9cf6c67f986972ac8e75b46c410a3ceceefc0474f2abbe6f441e56a60e789ef1d2617fc15cfb29e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,18 @@
1
+ ## [[0.12.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.0...v0.12.1)] - 2024-01-13
2
+
3
+ - Bump bundled llama.cpp from b1768 to b1833.
4
+ - Add model file type constants.
5
+ - Add `kv_cache_seq_div` method to `Context`.
6
+
7
+ ## [[0.12.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.1...v0.12.0)] - 2024-01-11
8
+
9
+ - Add `get_one` singleton method to `Batch`.
10
+
11
+ **Breaking Changes**
12
+
13
+ - Add deprecation warning to `eval`, `eval_embd`, and `sample_temperature` methods on `Context`.
14
+ - Change to avoid using deprecated methods on `generate` method and example scripts.
15
+
1
16
  ## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
2
17
 
3
18
  - Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
data/README.md CHANGED
@@ -22,14 +22,14 @@ If bundler is not being used to manage dependencies, install the gem by executin
22
22
  $ gem install llama_cpp
23
23
  ```
24
24
 
25
- There are several installation options for improving execution performance:
25
+ There are several installation options:
26
26
 
27
27
  ```sh
28
28
  # use OpenBLAS
29
29
  $ gem install llama_cpp -- --with-openblas
30
30
 
31
- # use Metal on macOS
32
- $ gem install llama_cpp -- --with-metal
31
+ # use CUDA
32
+ $ gem install llama_cpp -- --with-cuda
33
33
  ```
34
34
 
35
35
  Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
data/examples/chat.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  require 'llama_cpp'
10
10
  require 'thor'
11
11
  require 'readline'
12
+ require 'etc'
12
13
 
13
14
  class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
14
15
  default_command :main
@@ -30,12 +31,15 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
30
31
  option :typical_p, type: :numeric, desc: 'locally typical sampling, parameter p', default: 1.0
31
32
  option :temp, type: :numeric, desc: 'temperature', default: 0.8
32
33
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
34
+ option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
33
35
  def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
34
36
  mdl_params = LLaMACpp::ModelParams.new
35
37
  mdl_params.n_gpu_layers = options[:n_gpu_layers]
36
38
  model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
37
39
  ctx_params = LLaMACpp::ContextParams.new
38
40
  ctx_params.seed = options[:seed] if options[:seed] != -1
41
+ ctx_params.n_threads = options[:n_threads]
42
+ ctx_params.n_threads_batch = options[:n_threads]
39
43
  context = LLaMACpp::Context.new(model: model, params: ctx_params)
40
44
 
41
45
  antiprompt = options[:reverse_prompt] || 'User:'
@@ -70,7 +74,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
70
74
 
71
75
  0.step(embd.size - 1, options[:batch_size]) do |i|
72
76
  n_eval = [options[:batch_size], embd.size - i].min
73
- context.eval(tokens: embd[i...i + n_eval], n_past: n_past)
77
+ context.decode(LLaMACpp::Batch.get_one(tokens: embd[i...(i + n_eval)], n_tokens: n_eval, pos_zero: n_past, seq_id: 0))
74
78
  n_past += n_eval
75
79
  end
76
80
  end
@@ -95,7 +99,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
95
99
  context.sample_tail_free(candidates, z: options[:tfs_z])
96
100
  context.sample_typical(candidates, prob: options[:typical_p])
97
101
  context.sample_top_p(candidates, prob: options[:top_p])
98
- context.sample_temperature(candidates, temperature: options[:temp])
102
+ context.sample_temp(candidates, temp: options[:temp])
99
103
  id = context.sample_token(candidates)
100
104
 
101
105
  last_n_tokens.shift
@@ -7,6 +7,7 @@
7
7
 
8
8
  require 'llama_cpp'
9
9
  require 'thor'
10
+ require 'etc'
10
11
 
11
12
  class Embedding < Thor # rubocop:disable Style/Documentation
12
13
  default_command :main
@@ -15,6 +16,7 @@ class Embedding < Thor # rubocop:disable Style/Documentation
15
16
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
16
17
  option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
17
18
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
19
+ option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
18
20
  def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
19
21
  mdl_params = LLaMACpp::ModelParams.new
20
22
  mdl_params.n_gpu_layers = options[:n_gpu_layers]
@@ -22,13 +24,15 @@ class Embedding < Thor # rubocop:disable Style/Documentation
22
24
  ctx_params = LLaMACpp::ContextParams.new
23
25
  ctx_params.embedding = true
24
26
  ctx_params.seed = options[:seed] if options[:seed] != -1
27
+ ctx_params.n_threads = options[:n_threads]
28
+ ctx_params.n_threads_batch = options[:n_threads]
25
29
  context = LLaMACpp::Context.new(model: model, params: ctx_params)
26
30
 
27
31
  embd_input = context.model.tokenize(text: options[:prompt], add_bos: true)
28
32
 
29
33
  return unless embd_input.size.positive?
30
34
 
31
- context.eval(tokens: embd_input, n_past: 0)
35
+ context.decode(LLaMACpp::Batch.get_one(tokens: embd_input, n_tokens: embd_input.size, pos_zero: 0, seq_id: 0))
32
36
 
33
37
  context.embeddings.each { |val| print("#{val} ") }
34
38
  print("\n")
data/examples/simple.rb CHANGED
@@ -7,12 +7,14 @@
7
7
 
8
8
  require 'llama_cpp'
9
9
  require 'thor'
10
+ require 'etc'
10
11
 
11
12
  class Simple < Thor # rubocop:disable Style/Documentation
12
13
  default_command :main
13
14
  desc 'main', 'Simple completion'
14
15
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
15
16
  option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
17
+ option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
16
18
  def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
19
  n_len = 32
18
20
  model_params = LLaMACpp::ModelParams.new
@@ -21,7 +23,8 @@ class Simple < Thor # rubocop:disable Style/Documentation
21
23
  context_params.seed = 1234
22
24
  context_params.n_ctx = 2048
23
25
  context_params.logits_all = true
24
- context_params.n_threads = 4
26
+ context_params.n_threads = options[:n_threads]
27
+ context_params.n_threads_batch = options[:n_threads]
25
28
  context = LLaMACpp::Context.new(model: model, params: context_params)
26
29
 
27
30
  tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
@@ -51,6 +51,7 @@ public:
51
51
  static void define_class(VALUE outer) {
52
52
  rb_cLLaMABatch = rb_define_class_under(outer, "Batch", rb_cObject);
53
53
  rb_define_alloc_func(rb_cLLaMABatch, llama_batch_alloc);
54
+ rb_define_singleton_method(rb_cLLaMABatch, "get_one", RUBY_METHOD_FUNC(_llama_batch_get_one), -1);
54
55
  rb_define_method(rb_cLLaMABatch, "initialize", RUBY_METHOD_FUNC(_llama_batch_initialize), -1);
55
56
  rb_define_method(rb_cLLaMABatch, "n_tokens=", RUBY_METHOD_FUNC(_llama_batch_set_n_tokens), 1);
56
57
  rb_define_method(rb_cLLaMABatch, "n_tokens", RUBY_METHOD_FUNC(_llama_batch_get_n_tokens), 0);
@@ -75,6 +76,48 @@ public:
75
76
  private:
76
77
  static const rb_data_type_t llama_batch_type;
77
78
 
79
+ static VALUE _llama_batch_get_one(int argc, VALUE* argv, VALUE klass) {
80
+ VALUE kw_args = Qnil;
81
+ ID kw_table[4] = { rb_intern("tokens"), rb_intern("n_tokens"), rb_intern("pos_zero"), rb_intern("seq_id") };
82
+ VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
83
+ rb_scan_args(argc, argv, ":", &kw_args);
84
+ rb_get_kwargs(kw_args, kw_table, 4, 0, kw_values);
85
+
86
+ if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
87
+ rb_raise(rb_eArgError, "tokens must be an array");
88
+ return Qnil;
89
+ }
90
+ if (!RB_INTEGER_TYPE_P(kw_values[1])) {
91
+ rb_raise(rb_eArgError, "n_tokens must be an integer");
92
+ return Qnil;
93
+ }
94
+ if (!RB_INTEGER_TYPE_P(kw_values[2])) {
95
+ rb_raise(rb_eArgError, "pos_zero must be an integer");
96
+ return Qnil;
97
+ }
98
+ if (!RB_INTEGER_TYPE_P(kw_values[3])) {
99
+ rb_raise(rb_eArgError, "seq_id must be an integer");
100
+ return Qnil;
101
+ }
102
+
103
+ const size_t sz_array = RARRAY_LEN(kw_values[0]);
104
+ const int32_t n_tokens = NUM2INT(kw_values[1]);
105
+ const llama_pos pos_zero = NUM2INT(kw_values[2]);
106
+ const llama_seq_id seq_id = NUM2INT(kw_values[3]);
107
+
108
+ LLaMABatchWrapper* ptr = (LLaMABatchWrapper*)ruby_xmalloc(sizeof(LLaMABatchWrapper));
109
+ new (ptr) LLaMABatchWrapper();
110
+ ptr->batch = llama_batch_get_one(nullptr, n_tokens, pos_zero, seq_id);
111
+
112
+ ptr->batch.token = (llama_token*)malloc(sizeof(llama_token) * sz_array);
113
+ for (size_t i = 0; i < sz_array; i++) {
114
+ VALUE el = rb_ary_entry(kw_values[0], i);
115
+ ptr->batch.token[i] = NUM2INT(el);
116
+ }
117
+
118
+ return TypedData_Wrap_Struct(klass, &llama_batch_type, ptr);
119
+ }
120
+
78
121
  static VALUE _llama_batch_initialize(int argc, VALUE* argv, VALUE self) {
79
122
  VALUE kw_args = Qnil;
80
123
  ID kw_table[3] = { rb_intern("max_n_token"), rb_intern("n_embd"), rb_intern("max_n_seq") };
@@ -1983,6 +2026,7 @@ public:
1983
2026
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
1984
2027
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
1985
2028
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_shift", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_shift), 4);
2029
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
1986
2030
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
1987
2031
  rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
1988
2032
  rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
@@ -2054,6 +2098,8 @@ private:
2054
2098
  rb_scan_args(argc, argv, ":", &kw_args);
2055
2099
  rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2056
2100
 
2101
+ rb_warn("eval is deprecated. Use decode instead.");
2102
+
2057
2103
  if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2058
2104
  rb_raise(rb_eArgError, "tokens must be an Array");
2059
2105
  return Qnil;
@@ -2104,6 +2150,8 @@ private:
2104
2150
  rb_scan_args(argc, argv, ":", &kw_args);
2105
2151
  rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2106
2152
 
2153
+ rb_warn("eval_embd is deprecated. Use decode instead.");
2154
+
2107
2155
  if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2108
2156
  rb_raise(rb_eArgError, "tokens must be an Array");
2109
2157
  return Qnil;
@@ -2331,6 +2379,16 @@ private:
2331
2379
  return Qnil;
2332
2380
  }
2333
2381
 
2382
+ static VALUE _llama_context_kv_cache_seq_div(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
2383
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2384
+ if (ptr->ctx == NULL) {
2385
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2386
+ return Qnil;
2387
+ }
2388
+ llama_kv_cache_seq_div(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
2389
+ return Qnil;
2390
+ }
2391
+
2334
2392
  static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
2335
2393
  LLaMAContextWrapper* ptr = get_llama_context(self);
2336
2394
  if (ptr->ctx == NULL) {
@@ -2794,6 +2852,8 @@ private:
2794
2852
  rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
2795
2853
  rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
2796
2854
 
2855
+ rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
2856
+
2797
2857
  if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
2798
2858
  rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
2799
2859
  return Qnil;
@@ -3160,6 +3220,9 @@ extern "C" void Init_llama_cpp(void) {
3160
3220
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_S));
3161
3221
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
3162
3222
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
3223
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
3224
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
3225
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
3163
3226
 
3164
3227
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3165
3228
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.11.1'
6
+ VERSION = '0.12.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1768'
9
+ LLAMA_CPP_VERSION = 'b1833'
10
10
  end
data/lib/llama_cpp.rb CHANGED
@@ -54,7 +54,7 @@ module LLaMACpp
54
54
  embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
55
55
  end
56
56
 
57
- context.eval(tokens: embd, n_past: n_past)
57
+ context.decode(LLaMACpp::Batch.get_one(tokens: embd, n_tokens: embd.size, pos_zero: n_past, seq_id: 0))
58
58
  end
59
59
 
60
60
  n_past += embd.size
@@ -77,7 +77,7 @@ module LLaMACpp
77
77
  context.sample_tail_free(candidates, z: tfs_z)
78
78
  context.sample_typical(candidates, prob: typical_p)
79
79
  context.sample_top_p(candidates, prob: top_p)
80
- context.sample_temperature(candidates, temperature: temperature)
80
+ context.sample_temp(candidates, temp: temperature)
81
81
  id = context.sample_token(candidates)
82
82
 
83
83
  last_n_tokens.shift
data/sig/llama_cpp.rbs CHANGED
@@ -22,6 +22,9 @@ module LLaMACpp
22
22
  LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
23
23
  LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
24
24
  LLAMA_FTYPE_MOSTLY_Q6_K: Integer
25
+ LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
26
+ LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
27
+ LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
25
28
 
26
29
  LLAMA_KV_OVERRIDE_INT: Integer
27
30
  LLAMA_KV_OVERRIDE_FLOAT: Integer
@@ -149,6 +152,7 @@ module LLaMACpp
149
152
  class Batch
150
153
  public
151
154
 
155
+ def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
152
156
  def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
153
157
  def n_tokens=: (Integer) -> Integer
154
158
  def n_tokens: () -> Integer
@@ -192,6 +196,7 @@ module LLaMACpp
192
196
  def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
193
197
  def kv_cache_seq_keep: (Integer) -> void
194
198
  def kv_cache_seq_shift: (Integer, Integer, Integer, Integer) -> void
199
+ def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
195
200
  def set_rng_seed: (Integer) -> void
196
201
  def load_session_file: (session_path: String) -> void
197
202
  def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
@@ -1,8 +1,8 @@
1
1
  # Define the default target now so that it is always the first target
2
2
  BUILD_TARGETS = \
3
- main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
3
+ main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4
4
  simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
5
- speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup tests/test-c.o
5
+ speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o
6
6
 
7
7
  # Binaries only useful for tests
8
8
  TEST_TARGETS = \
@@ -620,6 +620,9 @@ quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.
620
620
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621
621
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
622
622
 
623
+ imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
624
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
625
+
623
626
  embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
624
627
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
625
628
 
@@ -671,6 +674,9 @@ lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS
671
674
  lookup: examples/lookup/lookup.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
672
675
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
673
676
 
677
+ passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
678
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
679
+
674
680
  ifdef LLAMA_METAL
675
681
  metal: examples/metal/metal.cpp ggml.o $(OBJS)
676
682
  $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
@@ -90,7 +90,7 @@ extern "C" {
90
90
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
91
91
 
92
92
  // compute graph without a plan
93
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
93
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
94
94
 
95
95
  // check if the backend supports an operation
96
96
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
@@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
195
195
  ggml_backend_synchronize(backend);
196
196
  }
197
197
 
198
- void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
- backend->iface.graph_compute(backend, cgraph);
198
+ bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
+ if (!backend->iface.graph_compute(backend, cgraph)) {
200
+ return false;
201
+ }
200
202
 
201
203
  // TODO: optional sync
202
204
  ggml_backend_synchronize(backend);
205
+ return true;
203
206
  }
204
207
 
205
208
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac
597
600
  GGML_UNUSED(backend);
598
601
  }
599
602
 
600
- static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
603
+ static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
601
604
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
602
605
 
603
606
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
611
614
  cplan.work_data = cpu_ctx->work_data;
612
615
 
613
616
  ggml_graph_compute(cgraph, &cplan);
617
+ return true;
614
618
  }
615
619
 
616
620
  static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -58,7 +58,7 @@ extern "C" {
58
58
 
59
59
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
60
60
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
61
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
61
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
62
62
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
63
63
 
64
64
  // tensor copy between different backends