llama_cpp 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +49 -3
- data/ext/llama_cpp/src/ggml-alloc.c +62 -107
- data/ext/llama_cpp/src/ggml-alloc.h +11 -5
- data/ext/llama_cpp/src/ggml-backend.c +385 -0
- data/ext/llama_cpp/src/ggml-backend.h +143 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +622 -150
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.h +18 -1
- data/ext/llama_cpp/src/ggml-metal.m +358 -131
- data/ext/llama_cpp/src/ggml-metal.metal +137 -47
- data/ext/llama_cpp/src/ggml-opencl.cpp +136 -68
- data/ext/llama_cpp/src/ggml.c +812 -365
- data/ext/llama_cpp/src/ggml.h +25 -7
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/k_quants.h +5 -5
- data/ext/llama_cpp/src/llama.cpp +2387 -421
- data/ext/llama_cpp/src/llama.h +22 -6
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -0
- metadata +5 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -326,7 +326,7 @@ extern "C" {
|
|
326
326
|
GGML_TYPE_COUNT,
|
327
327
|
};
|
328
328
|
|
329
|
-
enum
|
329
|
+
enum ggml_backend_type {
|
330
330
|
GGML_BACKEND_CPU = 0,
|
331
331
|
GGML_BACKEND_GPU = 10,
|
332
332
|
GGML_BACKEND_GPU_SPLIT = 20,
|
@@ -401,10 +401,14 @@ extern "C" {
|
|
401
401
|
GGML_OP_CLAMP,
|
402
402
|
GGML_OP_CONV_1D,
|
403
403
|
GGML_OP_CONV_2D,
|
404
|
+
GGML_OP_CONV_TRANSPOSE_1D,
|
404
405
|
GGML_OP_CONV_TRANSPOSE_2D,
|
405
406
|
GGML_OP_POOL_1D,
|
406
407
|
GGML_OP_POOL_2D,
|
407
408
|
|
409
|
+
GGML_OP_CONV_1D_STAGE_0, // internal
|
410
|
+
GGML_OP_CONV_1D_STAGE_1, // internal
|
411
|
+
|
408
412
|
GGML_OP_UPSCALE, // nearest interpolate
|
409
413
|
|
410
414
|
GGML_OP_FLASH_ATTN,
|
@@ -475,8 +479,10 @@ extern "C" {
|
|
475
479
|
|
476
480
|
// n-dimensional tensor
|
477
481
|
struct ggml_tensor {
|
478
|
-
enum ggml_type
|
479
|
-
enum
|
482
|
+
enum ggml_type type;
|
483
|
+
enum ggml_backend_type backend;
|
484
|
+
|
485
|
+
struct ggml_backend_buffer * buffer;
|
480
486
|
|
481
487
|
int n_dims;
|
482
488
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
@@ -510,7 +516,7 @@ extern "C" {
|
|
510
516
|
|
511
517
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
512
518
|
|
513
|
-
char padding[
|
519
|
+
char padding[12];
|
514
520
|
};
|
515
521
|
|
516
522
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
@@ -698,6 +704,9 @@ extern "C" {
|
|
698
704
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
699
705
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
700
706
|
|
707
|
+
// Context tensor enumeration and lookup
|
708
|
+
GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
|
709
|
+
GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
|
701
710
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
702
711
|
|
703
712
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
@@ -1354,7 +1363,7 @@ extern "C" {
|
|
1354
1363
|
|
1355
1364
|
// alibi position embedding
|
1356
1365
|
// in-place, returns view(a)
|
1357
|
-
struct ggml_tensor * ggml_alibi(
|
1366
|
+
GGML_API struct ggml_tensor * ggml_alibi(
|
1358
1367
|
struct ggml_context * ctx,
|
1359
1368
|
struct ggml_tensor * a,
|
1360
1369
|
int n_past,
|
@@ -1363,7 +1372,7 @@ extern "C" {
|
|
1363
1372
|
|
1364
1373
|
// clamp
|
1365
1374
|
// in-place, returns view(a)
|
1366
|
-
struct ggml_tensor * ggml_clamp(
|
1375
|
+
GGML_API struct ggml_tensor * ggml_clamp(
|
1367
1376
|
struct ggml_context * ctx,
|
1368
1377
|
struct ggml_tensor * a,
|
1369
1378
|
float min,
|
@@ -1386,6 +1395,14 @@ extern "C" {
|
|
1386
1395
|
int s,
|
1387
1396
|
int d);
|
1388
1397
|
|
1398
|
+
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
1399
|
+
struct ggml_context * ctx,
|
1400
|
+
struct ggml_tensor * a,
|
1401
|
+
struct ggml_tensor * b,
|
1402
|
+
int s0,
|
1403
|
+
int p0,
|
1404
|
+
int d0);
|
1405
|
+
|
1389
1406
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1390
1407
|
struct ggml_context * ctx,
|
1391
1408
|
struct ggml_tensor * a,
|
@@ -1759,6 +1776,7 @@ extern "C" {
|
|
1759
1776
|
GGML_OPT_NO_CONTEXT,
|
1760
1777
|
GGML_OPT_INVALID_WOLFE,
|
1761
1778
|
GGML_OPT_FAIL,
|
1779
|
+
GGML_OPT_CANCEL,
|
1762
1780
|
|
1763
1781
|
GGML_LINESEARCH_FAIL = -128,
|
1764
1782
|
GGML_LINESEARCH_MINIMUM_STEP,
|
@@ -2089,7 +2107,7 @@ extern "C" {
|
|
2089
2107
|
enum ggml_type vec_dot_type;
|
2090
2108
|
} ggml_type_traits_t;
|
2091
2109
|
|
2092
|
-
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
2110
|
+
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
2093
2111
|
|
2094
2112
|
#ifdef __cplusplus
|
2095
2113
|
}
|