llama_cpp 0.6.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +49 -3
- data/ext/llama_cpp/src/ggml-alloc.c +62 -107
- data/ext/llama_cpp/src/ggml-alloc.h +11 -5
- data/ext/llama_cpp/src/ggml-backend.c +385 -0
- data/ext/llama_cpp/src/ggml-backend.h +143 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +622 -150
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.h +18 -1
- data/ext/llama_cpp/src/ggml-metal.m +358 -131
- data/ext/llama_cpp/src/ggml-metal.metal +137 -47
- data/ext/llama_cpp/src/ggml-opencl.cpp +136 -68
- data/ext/llama_cpp/src/ggml.c +812 -365
- data/ext/llama_cpp/src/ggml.h +25 -7
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/k_quants.h +5 -5
- data/ext/llama_cpp/src/llama.cpp +2387 -421
- data/ext/llama_cpp/src/llama.h +22 -6
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -0
- metadata +5 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -326,7 +326,7 @@ extern "C" {
|
|
326
326
|
GGML_TYPE_COUNT,
|
327
327
|
};
|
328
328
|
|
329
|
-
enum
|
329
|
+
enum ggml_backend_type {
|
330
330
|
GGML_BACKEND_CPU = 0,
|
331
331
|
GGML_BACKEND_GPU = 10,
|
332
332
|
GGML_BACKEND_GPU_SPLIT = 20,
|
@@ -401,10 +401,14 @@ extern "C" {
|
|
401
401
|
GGML_OP_CLAMP,
|
402
402
|
GGML_OP_CONV_1D,
|
403
403
|
GGML_OP_CONV_2D,
|
404
|
+
GGML_OP_CONV_TRANSPOSE_1D,
|
404
405
|
GGML_OP_CONV_TRANSPOSE_2D,
|
405
406
|
GGML_OP_POOL_1D,
|
406
407
|
GGML_OP_POOL_2D,
|
407
408
|
|
409
|
+
GGML_OP_CONV_1D_STAGE_0, // internal
|
410
|
+
GGML_OP_CONV_1D_STAGE_1, // internal
|
411
|
+
|
408
412
|
GGML_OP_UPSCALE, // nearest interpolate
|
409
413
|
|
410
414
|
GGML_OP_FLASH_ATTN,
|
@@ -475,8 +479,10 @@ extern "C" {
|
|
475
479
|
|
476
480
|
// n-dimensional tensor
|
477
481
|
struct ggml_tensor {
|
478
|
-
enum ggml_type
|
479
|
-
enum
|
482
|
+
enum ggml_type type;
|
483
|
+
enum ggml_backend_type backend;
|
484
|
+
|
485
|
+
struct ggml_backend_buffer * buffer;
|
480
486
|
|
481
487
|
int n_dims;
|
482
488
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
@@ -510,7 +516,7 @@ extern "C" {
|
|
510
516
|
|
511
517
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
512
518
|
|
513
|
-
char padding[
|
519
|
+
char padding[12];
|
514
520
|
};
|
515
521
|
|
516
522
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
@@ -698,6 +704,9 @@ extern "C" {
|
|
698
704
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
699
705
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
700
706
|
|
707
|
+
// Context tensor enumeration and lookup
|
708
|
+
GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
|
709
|
+
GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
|
701
710
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
702
711
|
|
703
712
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
@@ -1354,7 +1363,7 @@ extern "C" {
|
|
1354
1363
|
|
1355
1364
|
// alibi position embedding
|
1356
1365
|
// in-place, returns view(a)
|
1357
|
-
struct ggml_tensor * ggml_alibi(
|
1366
|
+
GGML_API struct ggml_tensor * ggml_alibi(
|
1358
1367
|
struct ggml_context * ctx,
|
1359
1368
|
struct ggml_tensor * a,
|
1360
1369
|
int n_past,
|
@@ -1363,7 +1372,7 @@ extern "C" {
|
|
1363
1372
|
|
1364
1373
|
// clamp
|
1365
1374
|
// in-place, returns view(a)
|
1366
|
-
struct ggml_tensor * ggml_clamp(
|
1375
|
+
GGML_API struct ggml_tensor * ggml_clamp(
|
1367
1376
|
struct ggml_context * ctx,
|
1368
1377
|
struct ggml_tensor * a,
|
1369
1378
|
float min,
|
@@ -1386,6 +1395,14 @@ extern "C" {
|
|
1386
1395
|
int s,
|
1387
1396
|
int d);
|
1388
1397
|
|
1398
|
+
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
1399
|
+
struct ggml_context * ctx,
|
1400
|
+
struct ggml_tensor * a,
|
1401
|
+
struct ggml_tensor * b,
|
1402
|
+
int s0,
|
1403
|
+
int p0,
|
1404
|
+
int d0);
|
1405
|
+
|
1389
1406
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1390
1407
|
struct ggml_context * ctx,
|
1391
1408
|
struct ggml_tensor * a,
|
@@ -1759,6 +1776,7 @@ extern "C" {
|
|
1759
1776
|
GGML_OPT_NO_CONTEXT,
|
1760
1777
|
GGML_OPT_INVALID_WOLFE,
|
1761
1778
|
GGML_OPT_FAIL,
|
1779
|
+
GGML_OPT_CANCEL,
|
1762
1780
|
|
1763
1781
|
GGML_LINESEARCH_FAIL = -128,
|
1764
1782
|
GGML_LINESEARCH_MINIMUM_STEP,
|
@@ -2089,7 +2107,7 @@ extern "C" {
|
|
2089
2107
|
enum ggml_type vec_dot_type;
|
2090
2108
|
} ggml_type_traits_t;
|
2091
2109
|
|
2092
|
-
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
2110
|
+
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
2093
2111
|
|
2094
2112
|
#ifdef __cplusplus
|
2095
2113
|
}
|