llama_cpp 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -326,7 +326,7 @@ extern "C" {
326
326
  GGML_TYPE_COUNT,
327
327
  };
328
328
 
329
- enum ggml_backend {
329
+ enum ggml_backend_type {
330
330
  GGML_BACKEND_CPU = 0,
331
331
  GGML_BACKEND_GPU = 10,
332
332
  GGML_BACKEND_GPU_SPLIT = 20,
@@ -401,10 +401,14 @@ extern "C" {
401
401
  GGML_OP_CLAMP,
402
402
  GGML_OP_CONV_1D,
403
403
  GGML_OP_CONV_2D,
404
+ GGML_OP_CONV_TRANSPOSE_1D,
404
405
  GGML_OP_CONV_TRANSPOSE_2D,
405
406
  GGML_OP_POOL_1D,
406
407
  GGML_OP_POOL_2D,
407
408
 
409
+ GGML_OP_CONV_1D_STAGE_0, // internal
410
+ GGML_OP_CONV_1D_STAGE_1, // internal
411
+
408
412
  GGML_OP_UPSCALE, // nearest interpolate
409
413
 
410
414
  GGML_OP_FLASH_ATTN,
@@ -475,8 +479,10 @@ extern "C" {
475
479
 
476
480
  // n-dimensional tensor
477
481
  struct ggml_tensor {
478
- enum ggml_type type;
479
- enum ggml_backend backend;
482
+ enum ggml_type type;
483
+ enum ggml_backend_type backend;
484
+
485
+ struct ggml_backend_buffer * buffer;
480
486
 
481
487
  int n_dims;
482
488
  int64_t ne[GGML_MAX_DIMS]; // number of elements
@@ -510,7 +516,7 @@ extern "C" {
510
516
 
511
517
  void * extra; // extra things e.g. for ggml-cuda.cu
512
518
 
513
- char padding[4];
519
+ char padding[12];
514
520
  };
515
521
 
516
522
  static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -698,6 +704,9 @@ extern "C" {
698
704
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
699
705
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
700
706
 
707
+ // Context tensor enumeration and lookup
708
+ GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
709
+ GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
701
710
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
702
711
 
703
712
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
@@ -1354,7 +1363,7 @@ extern "C" {
1354
1363
 
1355
1364
  // alibi position embedding
1356
1365
  // in-place, returns view(a)
1357
- struct ggml_tensor * ggml_alibi(
1366
+ GGML_API struct ggml_tensor * ggml_alibi(
1358
1367
  struct ggml_context * ctx,
1359
1368
  struct ggml_tensor * a,
1360
1369
  int n_past,
@@ -1363,7 +1372,7 @@ extern "C" {
1363
1372
 
1364
1373
  // clamp
1365
1374
  // in-place, returns view(a)
1366
- struct ggml_tensor * ggml_clamp(
1375
+ GGML_API struct ggml_tensor * ggml_clamp(
1367
1376
  struct ggml_context * ctx,
1368
1377
  struct ggml_tensor * a,
1369
1378
  float min,
@@ -1386,6 +1395,14 @@ extern "C" {
1386
1395
  int s,
1387
1396
  int d);
1388
1397
 
1398
+ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1399
+ struct ggml_context * ctx,
1400
+ struct ggml_tensor * a,
1401
+ struct ggml_tensor * b,
1402
+ int s0,
1403
+ int p0,
1404
+ int d0);
1405
+
1389
1406
  GGML_API struct ggml_tensor * ggml_conv_2d(
1390
1407
  struct ggml_context * ctx,
1391
1408
  struct ggml_tensor * a,
@@ -1759,6 +1776,7 @@ extern "C" {
1759
1776
  GGML_OPT_NO_CONTEXT,
1760
1777
  GGML_OPT_INVALID_WOLFE,
1761
1778
  GGML_OPT_FAIL,
1779
+ GGML_OPT_CANCEL,
1762
1780
 
1763
1781
  GGML_LINESEARCH_FAIL = -128,
1764
1782
  GGML_LINESEARCH_MINIMUM_STEP,
@@ -2089,7 +2107,7 @@ extern "C" {
2089
2107
  enum ggml_type vec_dot_type;
2090
2108
  } ggml_type_traits_t;
2091
2109
 
2092
- ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
2110
+ GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
2093
2111
 
2094
2112
  #ifdef __cplusplus
2095
2113
  }