llama_cpp 0.12.7 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -315,6 +315,16 @@
315
315
  extern "C" {
316
316
  #endif
317
317
 
318
+ enum ggml_status {
319
+ GGML_STATUS_ALLOC_FAILED = -2,
320
+ GGML_STATUS_FAILED = -1,
321
+ GGML_STATUS_SUCCESS = 0,
322
+ GGML_STATUS_ABORTED = 1,
323
+ };
324
+
325
+ // get ggml_status name string
326
+ GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
327
+
318
328
  typedef uint16_t ggml_fp16_t;
319
329
 
320
330
  // convert FP16 <-> FP32
@@ -350,6 +360,9 @@ extern "C" {
350
360
  GGML_TYPE_IQ3_XXS = 18,
351
361
  GGML_TYPE_IQ1_S = 19,
352
362
  GGML_TYPE_IQ4_NL = 20,
363
+ GGML_TYPE_IQ3_S = 21,
364
+ GGML_TYPE_IQ2_S = 22,
365
+ GGML_TYPE_IQ4_XS = 23,
353
366
  GGML_TYPE_I8,
354
367
  GGML_TYPE_I16,
355
368
  GGML_TYPE_I32,
@@ -363,9 +376,9 @@ extern "C" {
363
376
  };
364
377
 
365
378
  enum ggml_backend_type {
366
- GGML_BACKEND_CPU = 0,
367
- GGML_BACKEND_GPU = 10,
368
- GGML_BACKEND_GPU_SPLIT = 20,
379
+ GGML_BACKEND_TYPE_CPU = 0,
380
+ GGML_BACKEND_TYPE_GPU = 10,
381
+ GGML_BACKEND_TYPE_GPU_SPLIT = 20,
369
382
  };
370
383
 
371
384
  // model file types
@@ -389,6 +402,9 @@ extern "C" {
389
402
  GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
390
403
  GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
391
404
  GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors
405
+ GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
406
+ GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
407
+ GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
392
408
  };
393
409
 
394
410
  // available tensor operations:
@@ -448,6 +464,8 @@ extern "C" {
448
464
  GGML_OP_POOL_2D,
449
465
  GGML_OP_UPSCALE, // nearest interpolate
450
466
  GGML_OP_PAD,
467
+ GGML_OP_ARANGE,
468
+ GGML_OP_TIMESTEP_EMBEDDING,
451
469
  GGML_OP_ARGSORT,
452
470
  GGML_OP_LEAKY_RELU,
453
471
 
@@ -496,9 +514,9 @@ extern "C" {
496
514
  };
497
515
 
498
516
  enum ggml_object_type {
499
- GGML_OBJECT_TENSOR,
500
- GGML_OBJECT_GRAPH,
501
- GGML_OBJECT_WORK_BUFFER
517
+ GGML_OBJECT_TYPE_TENSOR,
518
+ GGML_OBJECT_TYPE_GRAPH,
519
+ GGML_OBJECT_TYPE_WORK_BUFFER
502
520
  };
503
521
 
504
522
  enum ggml_log_level {
@@ -640,9 +658,9 @@ extern "C" {
640
658
  // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
641
659
  // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
642
660
  enum ggml_task_type {
643
- GGML_TASK_INIT = 0,
644
- GGML_TASK_COMPUTE,
645
- GGML_TASK_FINALIZE,
661
+ GGML_TASK_TYPE_INIT = 0,
662
+ GGML_TASK_TYPE_COMPUTE,
663
+ GGML_TASK_TYPE_FINALIZE,
646
664
  };
647
665
 
648
666
  struct ggml_compute_params {
@@ -666,6 +684,16 @@ extern "C" {
666
684
  GGML_NUMA_STRATEGY_COUNT
667
685
  };
668
686
 
687
+ //
688
+ // GUID
689
+ //
690
+
691
+ // GUID types
692
+ typedef uint8_t ggml_guid[16];
693
+ typedef ggml_guid * ggml_guid_t;
694
+
695
+ GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
696
+
669
697
  // misc
670
698
 
671
699
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@@ -1645,10 +1673,19 @@ extern "C" {
1645
1673
  int p2,
1646
1674
  int p3);
1647
1675
 
1676
+ // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1677
+ // timesteps: [N,]
1678
+ // return: [N, dim]
1679
+ GGML_API struct ggml_tensor * ggml_timestep_embedding(
1680
+ struct ggml_context * ctx,
1681
+ struct ggml_tensor * timesteps,
1682
+ int dim,
1683
+ int max_period);
1684
+
1648
1685
  // sort rows
1649
1686
  enum ggml_sort_order {
1650
- GGML_SORT_ASC,
1651
- GGML_SORT_DESC,
1687
+ GGML_SORT_ORDER_ASC,
1688
+ GGML_SORT_ORDER_DESC,
1652
1689
  };
1653
1690
 
1654
1691
  GGML_API struct ggml_tensor * ggml_argsort(
@@ -1656,6 +1693,12 @@ extern "C" {
1656
1693
  struct ggml_tensor * a,
1657
1694
  enum ggml_sort_order order);
1658
1695
 
1696
+ GGML_API struct ggml_tensor * ggml_arange(
1697
+ struct ggml_context * ctx,
1698
+ float start,
1699
+ float stop,
1700
+ float step);
1701
+
1659
1702
  // top k elements per row
1660
1703
  GGML_API struct ggml_tensor * ggml_top_k(
1661
1704
  struct ggml_context * ctx,
@@ -1907,12 +1950,11 @@ extern "C" {
1907
1950
 
1908
1951
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1909
1952
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1910
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1911
- GGML_API int ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1912
-
1953
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1954
+ GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1913
1955
  // same as ggml_graph_compute() but the work data is allocated as a part of the context
1914
1956
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1915
- GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1957
+ GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1916
1958
 
1917
1959
  GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1918
1960
 
@@ -1941,8 +1983,8 @@ extern "C" {
1941
1983
 
1942
1984
  // optimization methods
1943
1985
  enum ggml_opt_type {
1944
- GGML_OPT_ADAM,
1945
- GGML_OPT_LBFGS,
1986
+ GGML_OPT_TYPE_ADAM,
1987
+ GGML_OPT_TYPE_LBFGS,
1946
1988
  };
1947
1989
 
1948
1990
  // linesearch methods
@@ -1956,12 +1998,12 @@ extern "C" {
1956
1998
 
1957
1999
  // optimization return values
1958
2000
  enum ggml_opt_result {
1959
- GGML_OPT_OK = 0,
1960
- GGML_OPT_DID_NOT_CONVERGE,
1961
- GGML_OPT_NO_CONTEXT,
1962
- GGML_OPT_INVALID_WOLFE,
1963
- GGML_OPT_FAIL,
1964
- GGML_OPT_CANCEL,
2001
+ GGML_OPT_RESULT_OK = 0,
2002
+ GGML_OPT_RESULT_DID_NOT_CONVERGE,
2003
+ GGML_OPT_RESULT_NO_CONTEXT,
2004
+ GGML_OPT_RESULT_INVALID_WOLFE,
2005
+ GGML_OPT_RESULT_FAIL,
2006
+ GGML_OPT_RESULT_CANCEL,
1965
2007
 
1966
2008
  GGML_LINESEARCH_FAIL = -128,
1967
2009
  GGML_LINESEARCH_MINIMUM_STEP,