llama_cpp 0.12.7 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/llama_cpp.cpp +72 -262
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +23 -25
- data/vendor/tmp/llama.cpp/Makefile +8 -3
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -2
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +7 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +96 -15
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1049 -38
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +25 -25
- data/vendor/tmp/llama.cpp/ggml-quants.c +1873 -218
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +292 -221
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +64 -52
- data/vendor/tmp/llama.cpp/ggml.c +318 -195
- data/vendor/tmp/llama.cpp/ggml.h +35 -19
- data/vendor/tmp/llama.cpp/llama.cpp +806 -531
- data/vendor/tmp/llama.cpp/llama.h +53 -65
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -350,6 +350,9 @@ extern "C" {
|
|
350
350
|
GGML_TYPE_IQ3_XXS = 18,
|
351
351
|
GGML_TYPE_IQ1_S = 19,
|
352
352
|
GGML_TYPE_IQ4_NL = 20,
|
353
|
+
GGML_TYPE_IQ3_S = 21,
|
354
|
+
GGML_TYPE_IQ2_S = 22,
|
355
|
+
GGML_TYPE_IQ4_XS = 23,
|
353
356
|
GGML_TYPE_I8,
|
354
357
|
GGML_TYPE_I16,
|
355
358
|
GGML_TYPE_I32,
|
@@ -363,9 +366,9 @@ extern "C" {
|
|
363
366
|
};
|
364
367
|
|
365
368
|
enum ggml_backend_type {
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
+
GGML_BACKEND_TYPE_CPU = 0,
|
370
|
+
GGML_BACKEND_TYPE_GPU = 10,
|
371
|
+
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
369
372
|
};
|
370
373
|
|
371
374
|
// model file types
|
@@ -389,6 +392,9 @@ extern "C" {
|
|
389
392
|
GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
|
390
393
|
GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
|
391
394
|
GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors
|
395
|
+
GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
|
396
|
+
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
|
397
|
+
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
392
398
|
};
|
393
399
|
|
394
400
|
// available tensor operations:
|
@@ -496,9 +502,9 @@ extern "C" {
|
|
496
502
|
};
|
497
503
|
|
498
504
|
enum ggml_object_type {
|
499
|
-
|
500
|
-
|
501
|
-
|
505
|
+
GGML_OBJECT_TYPE_TENSOR,
|
506
|
+
GGML_OBJECT_TYPE_GRAPH,
|
507
|
+
GGML_OBJECT_TYPE_WORK_BUFFER
|
502
508
|
};
|
503
509
|
|
504
510
|
enum ggml_log_level {
|
@@ -640,9 +646,9 @@ extern "C" {
|
|
640
646
|
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
641
647
|
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
642
648
|
enum ggml_task_type {
|
643
|
-
|
644
|
-
|
645
|
-
|
649
|
+
GGML_TASK_TYPE_INIT = 0,
|
650
|
+
GGML_TASK_TYPE_COMPUTE,
|
651
|
+
GGML_TASK_TYPE_FINALIZE,
|
646
652
|
};
|
647
653
|
|
648
654
|
struct ggml_compute_params {
|
@@ -666,6 +672,16 @@ extern "C" {
|
|
666
672
|
GGML_NUMA_STRATEGY_COUNT
|
667
673
|
};
|
668
674
|
|
675
|
+
//
|
676
|
+
// GUID
|
677
|
+
//
|
678
|
+
|
679
|
+
// GUID types
|
680
|
+
typedef uint8_t ggml_guid[16];
|
681
|
+
typedef ggml_guid * ggml_guid_t;
|
682
|
+
|
683
|
+
GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
|
684
|
+
|
669
685
|
// misc
|
670
686
|
|
671
687
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
@@ -1647,8 +1663,8 @@ extern "C" {
|
|
1647
1663
|
|
1648
1664
|
// sort rows
|
1649
1665
|
enum ggml_sort_order {
|
1650
|
-
|
1651
|
-
|
1666
|
+
GGML_SORT_ORDER_ASC,
|
1667
|
+
GGML_SORT_ORDER_DESC,
|
1652
1668
|
};
|
1653
1669
|
|
1654
1670
|
GGML_API struct ggml_tensor * ggml_argsort(
|
@@ -1941,8 +1957,8 @@ extern "C" {
|
|
1941
1957
|
|
1942
1958
|
// optimization methods
|
1943
1959
|
enum ggml_opt_type {
|
1944
|
-
|
1945
|
-
|
1960
|
+
GGML_OPT_TYPE_ADAM,
|
1961
|
+
GGML_OPT_TYPE_LBFGS,
|
1946
1962
|
};
|
1947
1963
|
|
1948
1964
|
// linesearch methods
|
@@ -1956,12 +1972,12 @@ extern "C" {
|
|
1956
1972
|
|
1957
1973
|
// optimization return values
|
1958
1974
|
enum ggml_opt_result {
|
1959
|
-
|
1960
|
-
|
1961
|
-
|
1962
|
-
|
1963
|
-
|
1964
|
-
|
1975
|
+
GGML_OPT_RESULT_OK = 0,
|
1976
|
+
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
1977
|
+
GGML_OPT_RESULT_NO_CONTEXT,
|
1978
|
+
GGML_OPT_RESULT_INVALID_WOLFE,
|
1979
|
+
GGML_OPT_RESULT_FAIL,
|
1980
|
+
GGML_OPT_RESULT_CANCEL,
|
1965
1981
|
|
1966
1982
|
GGML_LINESEARCH_FAIL = -128,
|
1967
1983
|
GGML_LINESEARCH_MINIMUM_STEP,
|