llama_cpp 0.12.7 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -350,6 +350,9 @@ extern "C" {
350
350
  GGML_TYPE_IQ3_XXS = 18,
351
351
  GGML_TYPE_IQ1_S = 19,
352
352
  GGML_TYPE_IQ4_NL = 20,
353
+ GGML_TYPE_IQ3_S = 21,
354
+ GGML_TYPE_IQ2_S = 22,
355
+ GGML_TYPE_IQ4_XS = 23,
353
356
  GGML_TYPE_I8,
354
357
  GGML_TYPE_I16,
355
358
  GGML_TYPE_I32,
@@ -363,9 +366,9 @@ extern "C" {
363
366
  };
364
367
 
365
368
  enum ggml_backend_type {
366
- GGML_BACKEND_CPU = 0,
367
- GGML_BACKEND_GPU = 10,
368
- GGML_BACKEND_GPU_SPLIT = 20,
369
+ GGML_BACKEND_TYPE_CPU = 0,
370
+ GGML_BACKEND_TYPE_GPU = 10,
371
+ GGML_BACKEND_TYPE_GPU_SPLIT = 20,
369
372
  };
370
373
 
371
374
  // model file types
@@ -389,6 +392,9 @@ extern "C" {
389
392
  GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
390
393
  GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
391
394
  GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors
395
+ GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
396
+ GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
397
+ GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
392
398
  };
393
399
 
394
400
  // available tensor operations:
@@ -496,9 +502,9 @@ extern "C" {
496
502
  };
497
503
 
498
504
  enum ggml_object_type {
499
- GGML_OBJECT_TENSOR,
500
- GGML_OBJECT_GRAPH,
501
- GGML_OBJECT_WORK_BUFFER
505
+ GGML_OBJECT_TYPE_TENSOR,
506
+ GGML_OBJECT_TYPE_GRAPH,
507
+ GGML_OBJECT_TYPE_WORK_BUFFER
502
508
  };
503
509
 
504
510
  enum ggml_log_level {
@@ -640,9 +646,9 @@ extern "C" {
640
646
  // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
641
647
  // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
642
648
  enum ggml_task_type {
643
- GGML_TASK_INIT = 0,
644
- GGML_TASK_COMPUTE,
645
- GGML_TASK_FINALIZE,
649
+ GGML_TASK_TYPE_INIT = 0,
650
+ GGML_TASK_TYPE_COMPUTE,
651
+ GGML_TASK_TYPE_FINALIZE,
646
652
  };
647
653
 
648
654
  struct ggml_compute_params {
@@ -666,6 +672,16 @@ extern "C" {
666
672
  GGML_NUMA_STRATEGY_COUNT
667
673
  };
668
674
 
675
+ //
676
+ // GUID
677
+ //
678
+
679
+ // GUID types
680
+ typedef uint8_t ggml_guid[16];
681
+ typedef ggml_guid * ggml_guid_t;
682
+
683
+ GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
684
+
669
685
  // misc
670
686
 
671
687
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@@ -1647,8 +1663,8 @@ extern "C" {
1647
1663
 
1648
1664
  // sort rows
1649
1665
  enum ggml_sort_order {
1650
- GGML_SORT_ASC,
1651
- GGML_SORT_DESC,
1666
+ GGML_SORT_ORDER_ASC,
1667
+ GGML_SORT_ORDER_DESC,
1652
1668
  };
1653
1669
 
1654
1670
  GGML_API struct ggml_tensor * ggml_argsort(
@@ -1941,8 +1957,8 @@ extern "C" {
1941
1957
 
1942
1958
  // optimization methods
1943
1959
  enum ggml_opt_type {
1944
- GGML_OPT_ADAM,
1945
- GGML_OPT_LBFGS,
1960
+ GGML_OPT_TYPE_ADAM,
1961
+ GGML_OPT_TYPE_LBFGS,
1946
1962
  };
1947
1963
 
1948
1964
  // linesearch methods
@@ -1956,12 +1972,12 @@ extern "C" {
1956
1972
 
1957
1973
  // optimization return values
1958
1974
  enum ggml_opt_result {
1959
- GGML_OPT_OK = 0,
1960
- GGML_OPT_DID_NOT_CONVERGE,
1961
- GGML_OPT_NO_CONTEXT,
1962
- GGML_OPT_INVALID_WOLFE,
1963
- GGML_OPT_FAIL,
1964
- GGML_OPT_CANCEL,
1975
+ GGML_OPT_RESULT_OK = 0,
1976
+ GGML_OPT_RESULT_DID_NOT_CONVERGE,
1977
+ GGML_OPT_RESULT_NO_CONTEXT,
1978
+ GGML_OPT_RESULT_INVALID_WOLFE,
1979
+ GGML_OPT_RESULT_FAIL,
1980
+ GGML_OPT_RESULT_CANCEL,
1965
1981
 
1966
1982
  GGML_LINESEARCH_FAIL = -128,
1967
1983
  GGML_LINESEARCH_MINIMUM_STEP,