cui-llama.rn 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +2 -2
- package/cpp/common.cpp +35 -1946
- package/cpp/common.h +91 -128
- package/cpp/ggml-impl.h +32 -0
- package/cpp/ggml-metal.m +5 -6
- package/cpp/ggml-quants.c +242 -48
- package/cpp/ggml.c +89 -35
- package/cpp/ggml.h +25 -63
- package/cpp/llama-sampling.cpp +218 -94
- package/cpp/llama.cpp +80 -86
- package/cpp/llama.h +36 -11
- package/cpp/rn-llama.hpp +2 -1
- package/cpp/sampling.cpp +11 -4
- package/cpp/sampling.h +4 -56
- package/package.json +1 -1
package/cpp/common.h
CHANGED
@@ -4,20 +4,11 @@
|
|
4
4
|
|
5
5
|
#include "llama.h"
|
6
6
|
|
7
|
-
#include "sampling.h"
|
8
|
-
|
9
7
|
#define LOG_NO_FILE_LINE_FUNCTION
|
10
8
|
#include "log.h"
|
11
9
|
|
12
|
-
#include <cmath>
|
13
10
|
#include <string>
|
14
11
|
#include <vector>
|
15
|
-
#include <random>
|
16
|
-
#include <thread>
|
17
|
-
#include <set>
|
18
|
-
#include <unordered_map>
|
19
|
-
#include <tuple>
|
20
|
-
#include <functional>
|
21
12
|
|
22
13
|
#ifdef _WIN32
|
23
14
|
#define DIRECTORY_SEPARATOR '\\'
|
@@ -67,11 +58,20 @@ extern char const *LLAMA_BUILD_TARGET;
|
|
67
58
|
// CPU utils
|
68
59
|
//
|
69
60
|
|
61
|
+
struct cpu_params {
|
62
|
+
int n_threads = -1;
|
63
|
+
bool cpumask[LM_GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
64
|
+
bool mask_valid = false; // Default: any CPU
|
65
|
+
enum lm_ggml_sched_priority priority = LM_GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
66
|
+
bool strict_cpu = false; // Use strict CPU placement
|
67
|
+
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
68
|
+
};
|
69
|
+
|
70
70
|
int32_t cpu_get_num_physical_cores();
|
71
71
|
int32_t cpu_get_num_math();
|
72
72
|
|
73
73
|
//
|
74
|
-
//
|
74
|
+
// Common params
|
75
75
|
//
|
76
76
|
|
77
77
|
enum llama_example {
|
@@ -89,27 +89,76 @@ enum llama_example {
|
|
89
89
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
90
90
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
91
91
|
LLAMA_EXAMPLE_LLAVA,
|
92
|
+
LLAMA_EXAMPLE_LOOKUP,
|
93
|
+
LLAMA_EXAMPLE_PARALLEL,
|
92
94
|
|
93
95
|
LLAMA_EXAMPLE_COUNT,
|
94
96
|
};
|
95
97
|
|
98
|
+
enum gpt_sampler_type {
|
99
|
+
GPT_SAMPLER_TYPE_NONE = 0,
|
100
|
+
GPT_SAMPLER_TYPE_TOP_K = 1,
|
101
|
+
GPT_SAMPLER_TYPE_TOP_P = 2,
|
102
|
+
GPT_SAMPLER_TYPE_MIN_P = 3,
|
103
|
+
GPT_SAMPLER_TYPE_TFS_Z = 4,
|
104
|
+
GPT_SAMPLER_TYPE_TYPICAL_P = 5,
|
105
|
+
GPT_SAMPLER_TYPE_TEMPERATURE = 6,
|
106
|
+
GPT_SAMPLER_TYPE_XTC = 7,
|
107
|
+
};
|
108
|
+
|
96
109
|
// dimensionality reduction methods, used by cvector-generator
|
97
110
|
enum dimre_method {
|
98
111
|
DIMRE_METHOD_PCA,
|
99
112
|
DIMRE_METHOD_MEAN,
|
100
113
|
};
|
101
114
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
115
|
+
// sampler parameters
|
116
|
+
struct gpt_sampler_params {
|
117
|
+
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
118
|
+
|
119
|
+
int32_t n_prev = 64; // number of previous tokens to remember
|
120
|
+
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
121
|
+
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
122
|
+
int32_t top_k = 40; // <= 0 to use vocab size
|
123
|
+
float top_p = 0.95f; // 1.0 = disabled
|
124
|
+
float min_p = 0.05f; // 0.0 = disabled
|
125
|
+
float tfs_z = 1.00f; // 1.0 = disabled
|
126
|
+
float xtc_t = 0.0f; // 0.0 = disabled
|
127
|
+
float xtc_p = 0.0f;
|
128
|
+
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
129
|
+
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
130
|
+
float dynatemp_range = 0.00f; // 0.0 = disabled
|
131
|
+
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
132
|
+
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
133
|
+
float penalty_repeat = 1.00f; // 1.0 = disabled
|
134
|
+
float penalty_freq = 0.00f; // 0.0 = disabled
|
135
|
+
float penalty_present = 0.00f; // 0.0 = disabled
|
136
|
+
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
137
|
+
float mirostat_tau = 5.00f; // target entropy
|
138
|
+
float mirostat_eta = 0.10f; // learning rate
|
139
|
+
bool penalize_nl = false; // consider newlines as a repeatable token
|
140
|
+
bool ignore_eos = false;
|
141
|
+
bool no_perf = false; // disable performance metrics
|
142
|
+
|
143
|
+
std::vector<enum gpt_sampler_type> samplers = {
|
144
|
+
GPT_SAMPLER_TYPE_TOP_K,
|
145
|
+
GPT_SAMPLER_TYPE_TFS_Z,
|
146
|
+
GPT_SAMPLER_TYPE_TYPICAL_P,
|
147
|
+
GPT_SAMPLER_TYPE_TOP_P,
|
148
|
+
GPT_SAMPLER_TYPE_MIN_P,
|
149
|
+
GPT_SAMPLER_TYPE_TEMPERATURE,
|
150
|
+
GPT_SAMPLER_TYPE_XTC
|
151
|
+
};
|
152
|
+
|
153
|
+
std::string grammar; // optional BNF-like grammar to constrain sampling
|
154
|
+
|
155
|
+
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
156
|
+
|
157
|
+
// print the parameters into a string
|
158
|
+
std::string print() const;
|
109
159
|
};
|
110
160
|
|
111
161
|
struct gpt_params {
|
112
|
-
enum llama_example curr_ex = LLAMA_EXAMPLE_COMMON;
|
113
162
|
|
114
163
|
bool vocab_only = false;
|
115
164
|
int32_t n_predict = -1; // new tokens to predict
|
@@ -155,23 +204,23 @@ struct gpt_params {
|
|
155
204
|
|
156
205
|
struct gpt_sampler_params sparams;
|
157
206
|
|
158
|
-
std::string model = ""; // model path
|
159
|
-
std::string model_draft = ""; // draft model for speculative decoding
|
160
|
-
std::string model_alias = "unknown"; // model alias
|
161
|
-
std::string model_url = ""; // model url to download
|
162
|
-
std::string hf_token = ""; // HF token
|
163
|
-
std::string hf_repo = ""; // HF repo
|
164
|
-
std::string hf_file = ""; // HF file
|
165
|
-
std::string prompt = "";
|
166
|
-
std::string prompt_file = ""; // store the external prompt file name
|
167
|
-
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
168
|
-
std::string input_prefix = ""; // string to prefix user inputs with
|
169
|
-
std::string input_suffix = ""; // string to suffix user inputs with
|
170
|
-
std::string logdir = ""; // directory in which to save YAML log files
|
171
|
-
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
172
|
-
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
173
|
-
std::string logits_file = ""; // file for saving *all* logits
|
174
|
-
std::string rpc_servers = ""; // comma separated list of RPC servers
|
207
|
+
std::string model = ""; // model path // NOLINT
|
208
|
+
std::string model_draft = ""; // draft model for speculative decoding // NOLINT
|
209
|
+
std::string model_alias = "unknown"; // model alias // NOLINT
|
210
|
+
std::string model_url = ""; // model url to download // NOLINT
|
211
|
+
std::string hf_token = ""; // HF token // NOLINT
|
212
|
+
std::string hf_repo = ""; // HF repo // NOLINT
|
213
|
+
std::string hf_file = ""; // HF file // NOLINT
|
214
|
+
std::string prompt = ""; // NOLINT
|
215
|
+
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
216
|
+
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
217
|
+
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
218
|
+
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
219
|
+
std::string logdir = ""; // directory in which to save YAML log files // NOLINT
|
220
|
+
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
221
|
+
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
222
|
+
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
223
|
+
std::string rpc_servers = ""; // comma separated list of RPC servers // NOLINT
|
175
224
|
|
176
225
|
std::vector<std::string> in_files; // all input files
|
177
226
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
@@ -201,7 +250,6 @@ struct gpt_params {
|
|
201
250
|
|
202
251
|
bool kl_divergence = false; // compute KL divergence
|
203
252
|
|
204
|
-
std::function<void(int, char **)> print_usage = nullptr; // print example-specific usage and example
|
205
253
|
bool usage = false; // print usage
|
206
254
|
bool use_color = false; // use color to distinguish generations and inputs
|
207
255
|
bool special = false; // enable special token output
|
@@ -216,6 +264,7 @@ struct gpt_params {
|
|
216
264
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
217
265
|
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
218
266
|
bool flash_attn = false; // flash attention
|
267
|
+
bool no_perf = false; // disable performance metrics
|
219
268
|
|
220
269
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
221
270
|
bool logits_all = false; // return logits for all tokens in the batch
|
@@ -232,7 +281,7 @@ struct gpt_params {
|
|
232
281
|
std::string cache_type_v = "f16"; // KV cache data type for the V
|
233
282
|
|
234
283
|
// multimodal models (see examples/llava)
|
235
|
-
std::string mmproj = ""; // path to multimodal projector
|
284
|
+
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
236
285
|
std::vector<std::string> image; // path to image file(s)
|
237
286
|
|
238
287
|
// embedding
|
@@ -248,15 +297,15 @@ struct gpt_params {
|
|
248
297
|
int n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
249
298
|
|
250
299
|
std::string hostname = "127.0.0.1";
|
251
|
-
std::string public_path = "";
|
252
|
-
std::string chat_template = "";
|
253
|
-
std::string system_prompt = "";
|
300
|
+
std::string public_path = ""; // NOLINT
|
301
|
+
std::string chat_template = ""; // NOLINT
|
302
|
+
std::string system_prompt = ""; // NOLINT
|
254
303
|
bool enable_chat_template = true;
|
255
304
|
|
256
305
|
std::vector<std::string> api_keys;
|
257
306
|
|
258
|
-
std::string ssl_file_key = "";
|
259
|
-
std::string ssl_file_cert = "";
|
307
|
+
std::string ssl_file_key = ""; // NOLINT
|
308
|
+
std::string ssl_file_cert = ""; // NOLINT
|
260
309
|
|
261
310
|
bool endpoint_slots = true;
|
262
311
|
bool endpoint_metrics = false;
|
@@ -311,92 +360,6 @@ struct gpt_params {
|
|
311
360
|
bool batched_bench_output_jsonl = false;
|
312
361
|
};
|
313
362
|
|
314
|
-
struct llama_arg {
|
315
|
-
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
316
|
-
std::vector<const char *> args;
|
317
|
-
const char * value_hint = nullptr; // help text or example for arg value
|
318
|
-
const char * value_hint_2 = nullptr; // for second arg value
|
319
|
-
const char * env = nullptr;
|
320
|
-
std::string help;
|
321
|
-
void (*handler_void) (gpt_params & params) = nullptr;
|
322
|
-
void (*handler_string) (gpt_params & params, const std::string &) = nullptr;
|
323
|
-
void (*handler_str_str)(gpt_params & params, const std::string &, const std::string &) = nullptr;
|
324
|
-
void (*handler_int) (gpt_params & params, int) = nullptr;
|
325
|
-
|
326
|
-
llama_arg(
|
327
|
-
const std::initializer_list<const char *> & args,
|
328
|
-
const char * value_hint,
|
329
|
-
const std::string & help,
|
330
|
-
void (*handler)(gpt_params & params, const std::string &)
|
331
|
-
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
332
|
-
|
333
|
-
llama_arg(
|
334
|
-
const std::initializer_list<const char *> & args,
|
335
|
-
const char * value_hint,
|
336
|
-
const std::string & help,
|
337
|
-
void (*handler)(gpt_params & params, int)
|
338
|
-
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
339
|
-
|
340
|
-
llama_arg(
|
341
|
-
const std::initializer_list<const char *> & args,
|
342
|
-
const std::string & help,
|
343
|
-
void (*handler)(gpt_params & params)
|
344
|
-
) : args(args), help(help), handler_void(handler) {}
|
345
|
-
|
346
|
-
// support 2 values for arg
|
347
|
-
llama_arg(
|
348
|
-
const std::initializer_list<const char *> & args,
|
349
|
-
const char * value_hint,
|
350
|
-
const char * value_hint_2,
|
351
|
-
const std::string & help,
|
352
|
-
void (*handler)(gpt_params & params, const std::string &, const std::string &)
|
353
|
-
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
354
|
-
|
355
|
-
llama_arg & set_examples(std::initializer_list<enum llama_example> examples) {
|
356
|
-
this->examples = std::move(examples);
|
357
|
-
return *this;
|
358
|
-
}
|
359
|
-
|
360
|
-
llama_arg & set_env(const char * env) {
|
361
|
-
help = help + "\n(env: " + env + ")";
|
362
|
-
this->env = env;
|
363
|
-
return *this;
|
364
|
-
}
|
365
|
-
|
366
|
-
bool in_example(enum llama_example ex) {
|
367
|
-
return examples.find(ex) != examples.end();
|
368
|
-
}
|
369
|
-
|
370
|
-
bool get_value_from_env(std::string & output) const {
|
371
|
-
if (env == nullptr) return false;
|
372
|
-
char * value = std::getenv(env);
|
373
|
-
if (value) {
|
374
|
-
output = value;
|
375
|
-
return true;
|
376
|
-
}
|
377
|
-
return false;
|
378
|
-
}
|
379
|
-
|
380
|
-
bool has_value_from_env() const {
|
381
|
-
return env != nullptr && std::getenv(env);
|
382
|
-
}
|
383
|
-
|
384
|
-
std::string to_string();
|
385
|
-
};
|
386
|
-
|
387
|
-
// initialize list of options (arguments) that can be used by the current example
|
388
|
-
std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex);
|
389
|
-
// optionally, we can provide "print_usage" to print example usage
|
390
|
-
std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex, std::function<void(int, char **)> print_usage);
|
391
|
-
|
392
|
-
// parse input arguments from CLI
|
393
|
-
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
394
|
-
bool gpt_params_parse (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
|
395
|
-
bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
|
396
|
-
|
397
|
-
// print full usage message; it will be called internally by gpt_params_parse() if "-h" is set
|
398
|
-
void gpt_params_print_usage(gpt_params & params, std::vector<llama_arg> & options);
|
399
|
-
|
400
363
|
std::string gpt_params_get_system_info(const gpt_params & params);
|
401
364
|
|
402
365
|
bool parse_cpu_range(const std::string& range, bool(&boolmask)[LM_GGML_MAX_N_THREADS]);
|
package/cpp/ggml-impl.h
CHANGED
@@ -629,8 +629,16 @@ inline static float lm_ggml_lookup_fp16_to_fp32(lm_ggml_fp16_t f) {
|
|
629
629
|
#define LM_GGML_FP32_TO_FP16(x) LM_GGML_COMPUTE_FP32_TO_FP16(x)
|
630
630
|
#endif
|
631
631
|
|
632
|
+
enum lm_ggml_cgraph_eval_order {
|
633
|
+
LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
634
|
+
LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
635
|
+
LM_GGML_CGRAPH_EVAL_ORDER_COUNT
|
636
|
+
};
|
637
|
+
|
632
638
|
// bitset
|
633
639
|
|
640
|
+
typedef uint32_t lm_ggml_bitset_t;
|
641
|
+
|
634
642
|
static_assert(sizeof(lm_ggml_bitset_t) == 4, "bitset_t constants must be updated");
|
635
643
|
#define BITSET_SHR 5 // log2(sizeof(lm_ggml_bitset_t)*8)
|
636
644
|
#define BITSET_MASK (sizeof(lm_ggml_bitset_t)*8 - 1)
|
@@ -656,6 +664,12 @@ static inline void lm_ggml_bitset_clear(lm_ggml_bitset_t * bitset, size_t i) {
|
|
656
664
|
#define LM_GGML_HASHSET_FULL ((size_t)-1)
|
657
665
|
#define LM_GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
|
658
666
|
|
667
|
+
struct lm_ggml_hash_set {
|
668
|
+
size_t size;
|
669
|
+
lm_ggml_bitset_t * used; // whether or not the keys are in use i.e. set
|
670
|
+
struct lm_ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if lm_ggml_bitset_get(used, i)
|
671
|
+
};
|
672
|
+
|
659
673
|
struct lm_ggml_hash_set lm_ggml_hash_set_new(size_t size);
|
660
674
|
void lm_ggml_hash_set_free(struct lm_ggml_hash_set * hash_set);
|
661
675
|
|
@@ -745,6 +759,24 @@ static size_t lm_ggml_hash_find_or_insert(struct lm_ggml_hash_set * hash_set, st
|
|
745
759
|
LM_GGML_ABORT("fatal error");
|
746
760
|
}
|
747
761
|
|
762
|
+
// computation graph
|
763
|
+
|
764
|
+
struct lm_ggml_cgraph {
|
765
|
+
int size;
|
766
|
+
int n_nodes;
|
767
|
+
int n_leafs;
|
768
|
+
|
769
|
+
struct lm_ggml_tensor ** nodes;
|
770
|
+
struct lm_ggml_tensor ** grads;
|
771
|
+
struct lm_ggml_tensor ** leafs;
|
772
|
+
|
773
|
+
struct lm_ggml_hash_set visited_hash_set;
|
774
|
+
|
775
|
+
enum lm_ggml_cgraph_eval_order order;
|
776
|
+
};
|
777
|
+
|
778
|
+
struct lm_ggml_cgraph lm_ggml_graph_view(struct lm_ggml_cgraph * cgraph, int i0, int i1);
|
779
|
+
|
748
780
|
#ifdef __cplusplus
|
749
781
|
}
|
750
782
|
#endif
|
package/cpp/ggml-metal.m
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#import "ggml-metal.h"
|
2
2
|
|
3
|
+
#import "ggml-impl.h"
|
3
4
|
#import "ggml-backend-impl.h"
|
4
|
-
#import "ggml.h"
|
5
5
|
|
6
6
|
#import <Foundation/Foundation.h>
|
7
7
|
|
@@ -17,8 +17,8 @@
|
|
17
17
|
#define LM_GGML_METAL_LOG_WARN(...)
|
18
18
|
#define LM_GGML_METAL_LOG_ERROR(...)
|
19
19
|
#else
|
20
|
-
#define LM_GGML_METAL_LOG_INFO(...) lm_ggml_metal_log(LM_GGML_LOG_LEVEL_INFO,
|
21
|
-
#define LM_GGML_METAL_LOG_WARN(...) lm_ggml_metal_log(LM_GGML_LOG_LEVEL_WARN,
|
20
|
+
#define LM_GGML_METAL_LOG_INFO(...) lm_ggml_metal_log(LM_GGML_LOG_LEVEL_INFO, __VA_ARGS__)
|
21
|
+
#define LM_GGML_METAL_LOG_WARN(...) lm_ggml_metal_log(LM_GGML_LOG_LEVEL_WARN, __VA_ARGS__)
|
22
22
|
#define LM_GGML_METAL_LOG_ERROR(...) lm_ggml_metal_log(LM_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
23
23
|
#endif
|
24
24
|
|
@@ -882,7 +882,7 @@ static enum lm_ggml_status lm_ggml_metal_graph_compute(
|
|
882
882
|
// create multiple command buffers and enqueue them
|
883
883
|
// then, we encode the graph into the command buffers in parallel
|
884
884
|
|
885
|
-
const int n_nodes
|
885
|
+
const int n_nodes = gf->n_nodes;
|
886
886
|
const int n_cb = ctx->n_cb;
|
887
887
|
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
888
888
|
|
@@ -3039,8 +3039,7 @@ static enum lm_ggml_status lm_ggml_metal_graph_compute(
|
|
3039
3039
|
if (status != MTLCommandBufferStatusCompleted) {
|
3040
3040
|
LM_GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
|
3041
3041
|
if (status == MTLCommandBufferStatusError) {
|
3042
|
-
|
3043
|
-
LM_GGML_METAL_LOG_INFO("error: %s\n", [error_code UTF8String]);
|
3042
|
+
LM_GGML_METAL_LOG_INFO("error: %s\n", [[command_buffer error].localizedDescription UTF8String]);
|
3044
3043
|
}
|
3045
3044
|
|
3046
3045
|
return LM_GGML_STATUS_FAILED;
|