llama_cpp 0.12.4 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -505,11 +505,17 @@ extern "C" {
505
505
 
506
506
  enum ggml_log_level {
507
507
  GGML_LOG_LEVEL_ERROR = 2,
508
- GGML_LOG_LEVEL_WARN = 3,
509
- GGML_LOG_LEVEL_INFO = 4,
508
+ GGML_LOG_LEVEL_WARN = 3,
509
+ GGML_LOG_LEVEL_INFO = 4,
510
510
  GGML_LOG_LEVEL_DEBUG = 5
511
511
  };
512
512
 
513
+ enum ggml_tensor_flag {
514
+ GGML_TENSOR_FLAG_INPUT = 1,
515
+ GGML_TENSOR_FLAG_OUTPUT = 2,
516
+ GGML_TENSOR_FLAG_PARAM = 4,
517
+ };
518
+
513
519
  // ggml object
514
520
  struct ggml_object {
515
521
  size_t offs;
@@ -543,7 +549,7 @@ extern "C" {
543
549
  // op params - allocated as int32_t for alignment
544
550
  int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
545
551
 
546
- bool is_param;
552
+ int32_t flags;
547
553
 
548
554
  struct ggml_tensor * grad;
549
555
  struct ggml_tensor * src[GGML_MAX_SRC];
@@ -567,6 +573,11 @@ extern "C" {
567
573
 
568
574
  static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
569
575
 
576
+ // Abort callback
577
+ // If not NULL, called before ggml computation
578
+ // If it returns true, the computation is aborted
579
+ typedef bool (*ggml_abort_callback)(void * data);
580
+
570
581
  // the compute plan that needs to be prepared for ggml_graph_compute()
571
582
  // since https://github.com/ggerganov/ggml/issues/287
572
583
  struct ggml_cplan {
@@ -576,8 +587,8 @@ extern "C" {
576
587
  int n_threads;
577
588
 
578
589
  // abort ggml_graph_compute when true
579
- bool (*abort_callback)(void * data);
580
- void * abort_callback_data;
590
+ ggml_abort_callback abort_callback;
591
+ void * abort_callback_data;
581
592
  };
582
593
 
583
594
  enum ggml_cgraph_eval_order {
@@ -2087,6 +2098,12 @@ extern "C" {
2087
2098
  ggml_opt_callback callback,
2088
2099
  void * callback_data);
2089
2100
 
2101
+ //
2102
+ // tensor flags
2103
+ //
2104
+ GGML_API void ggml_set_input(struct ggml_tensor * tensor);
2105
+ GGML_API void ggml_set_output(struct ggml_tensor * tensor);
2106
+
2090
2107
  //
2091
2108
  // quantization
2092
2109
  //
@@ -2273,6 +2290,7 @@ extern "C" {
2273
2290
  GGML_API int ggml_cpu_has_ssse3 (void);
2274
2291
  GGML_API int ggml_cpu_has_sycl (void);
2275
2292
  GGML_API int ggml_cpu_has_vsx (void);
2293
+ GGML_API int ggml_cpu_has_matmul_int8(void);
2276
2294
 
2277
2295
  //
2278
2296
  // Internal types and functions exposed for tests and benchmarks
@@ -2286,7 +2304,8 @@ extern "C" {
2286
2304
  #endif
2287
2305
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
2288
2306
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
2289
- typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
2307
+ typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2308
+ const void * GGML_RESTRICT y, size_t by, int nrc);
2290
2309
 
2291
2310
  typedef struct {
2292
2311
  const char * type_name;
@@ -2298,6 +2317,7 @@ extern "C" {
2298
2317
  ggml_from_float_t from_float_reference;
2299
2318
  ggml_vec_dot_t vec_dot;
2300
2319
  enum ggml_type vec_dot_type;
2320
+ int64_t nrows; // number of rows to process simultaneously;
2301
2321
  } ggml_type_traits_t;
2302
2322
 
2303
2323
  GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);