llama_cpp 0.12.7 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -315,6 +315,16 @@
315
315
  extern "C" {
316
316
  #endif
317
317
 
318
+ enum ggml_status {
319
+ GGML_STATUS_ALLOC_FAILED = -2,
320
+ GGML_STATUS_FAILED = -1,
321
+ GGML_STATUS_SUCCESS = 0,
322
+ GGML_STATUS_ABORTED = 1,
323
+ };
324
+
325
+ // get ggml_status name string
326
+ GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
327
+
318
328
  typedef uint16_t ggml_fp16_t;
319
329
 
320
330
  // convert FP16 <-> FP32
@@ -350,6 +360,9 @@ extern "C" {
350
360
  GGML_TYPE_IQ3_XXS = 18,
351
361
  GGML_TYPE_IQ1_S = 19,
352
362
  GGML_TYPE_IQ4_NL = 20,
363
+ GGML_TYPE_IQ3_S = 21,
364
+ GGML_TYPE_IQ2_S = 22,
365
+ GGML_TYPE_IQ4_XS = 23,
353
366
  GGML_TYPE_I8,
354
367
  GGML_TYPE_I16,
355
368
  GGML_TYPE_I32,
@@ -363,9 +376,9 @@ extern "C" {
363
376
  };
364
377
 
365
378
  enum ggml_backend_type {
366
- GGML_BACKEND_CPU = 0,
367
- GGML_BACKEND_GPU = 10,
368
- GGML_BACKEND_GPU_SPLIT = 20,
379
+ GGML_BACKEND_TYPE_CPU = 0,
380
+ GGML_BACKEND_TYPE_GPU = 10,
381
+ GGML_BACKEND_TYPE_GPU_SPLIT = 20,
369
382
  };
370
383
 
371
384
  // model file types
@@ -389,6 +402,9 @@ extern "C" {
389
402
  GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
390
403
  GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
391
404
  GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors
405
+ GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
406
+ GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
407
+ GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
392
408
  };
393
409
 
394
410
  // available tensor operations:
@@ -448,6 +464,8 @@ extern "C" {
448
464
  GGML_OP_POOL_2D,
449
465
  GGML_OP_UPSCALE, // nearest interpolate
450
466
  GGML_OP_PAD,
467
+ GGML_OP_ARANGE,
468
+ GGML_OP_TIMESTEP_EMBEDDING,
451
469
  GGML_OP_ARGSORT,
452
470
  GGML_OP_LEAKY_RELU,
453
471
 
@@ -496,9 +514,9 @@ extern "C" {
496
514
  };
497
515
 
498
516
  enum ggml_object_type {
499
- GGML_OBJECT_TENSOR,
500
- GGML_OBJECT_GRAPH,
501
- GGML_OBJECT_WORK_BUFFER
517
+ GGML_OBJECT_TYPE_TENSOR,
518
+ GGML_OBJECT_TYPE_GRAPH,
519
+ GGML_OBJECT_TYPE_WORK_BUFFER
502
520
  };
503
521
 
504
522
  enum ggml_log_level {
@@ -640,9 +658,9 @@ extern "C" {
640
658
  // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
641
659
  // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
642
660
  enum ggml_task_type {
643
- GGML_TASK_INIT = 0,
644
- GGML_TASK_COMPUTE,
645
- GGML_TASK_FINALIZE,
661
+ GGML_TASK_TYPE_INIT = 0,
662
+ GGML_TASK_TYPE_COMPUTE,
663
+ GGML_TASK_TYPE_FINALIZE,
646
664
  };
647
665
 
648
666
  struct ggml_compute_params {
@@ -666,6 +684,16 @@ extern "C" {
666
684
  GGML_NUMA_STRATEGY_COUNT
667
685
  };
668
686
 
687
+ //
688
+ // GUID
689
+ //
690
+
691
+ // GUID types
692
+ typedef uint8_t ggml_guid[16];
693
+ typedef ggml_guid * ggml_guid_t;
694
+
695
+ GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
696
+
669
697
  // misc
670
698
 
671
699
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@@ -1645,10 +1673,19 @@ extern "C" {
1645
1673
  int p2,
1646
1674
  int p3);
1647
1675
 
1676
+ // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1677
+ // timesteps: [N,]
1678
+ // return: [N, dim]
1679
+ GGML_API struct ggml_tensor * ggml_timestep_embedding(
1680
+ struct ggml_context * ctx,
1681
+ struct ggml_tensor * timesteps,
1682
+ int dim,
1683
+ int max_period);
1684
+
1648
1685
  // sort rows
1649
1686
  enum ggml_sort_order {
1650
- GGML_SORT_ASC,
1651
- GGML_SORT_DESC,
1687
+ GGML_SORT_ORDER_ASC,
1688
+ GGML_SORT_ORDER_DESC,
1652
1689
  };
1653
1690
 
1654
1691
  GGML_API struct ggml_tensor * ggml_argsort(
@@ -1656,6 +1693,12 @@ extern "C" {
1656
1693
  struct ggml_tensor * a,
1657
1694
  enum ggml_sort_order order);
1658
1695
 
1696
+ GGML_API struct ggml_tensor * ggml_arange(
1697
+ struct ggml_context * ctx,
1698
+ float start,
1699
+ float stop,
1700
+ float step);
1701
+
1659
1702
  // top k elements per row
1660
1703
  GGML_API struct ggml_tensor * ggml_top_k(
1661
1704
  struct ggml_context * ctx,
@@ -1907,12 +1950,11 @@ extern "C" {
1907
1950
 
1908
1951
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1909
1952
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1910
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1911
- GGML_API int ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1912
-
1953
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1954
+ GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1913
1955
  // same as ggml_graph_compute() but the work data is allocated as a part of the context
1914
1956
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1915
- GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1957
+ GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1916
1958
 
1917
1959
  GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1918
1960
 
@@ -1941,8 +1983,8 @@ extern "C" {
1941
1983
 
1942
1984
  // optimization methods
1943
1985
  enum ggml_opt_type {
1944
- GGML_OPT_ADAM,
1945
- GGML_OPT_LBFGS,
1986
+ GGML_OPT_TYPE_ADAM,
1987
+ GGML_OPT_TYPE_LBFGS,
1946
1988
  };
1947
1989
 
1948
1990
  // linesearch methods
@@ -1956,12 +1998,12 @@ extern "C" {
1956
1998
 
1957
1999
  // optimization return values
1958
2000
  enum ggml_opt_result {
1959
- GGML_OPT_OK = 0,
1960
- GGML_OPT_DID_NOT_CONVERGE,
1961
- GGML_OPT_NO_CONTEXT,
1962
- GGML_OPT_INVALID_WOLFE,
1963
- GGML_OPT_FAIL,
1964
- GGML_OPT_CANCEL,
2001
+ GGML_OPT_RESULT_OK = 0,
2002
+ GGML_OPT_RESULT_DID_NOT_CONVERGE,
2003
+ GGML_OPT_RESULT_NO_CONTEXT,
2004
+ GGML_OPT_RESULT_INVALID_WOLFE,
2005
+ GGML_OPT_RESULT_FAIL,
2006
+ GGML_OPT_RESULT_CANCEL,
1965
2007
 
1966
2008
  GGML_LINESEARCH_FAIL = -128,
1967
2009
  GGML_LINESEARCH_MINIMUM_STEP,