llama_cpp 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -130,13 +130,16 @@
130
130
  // The data of the tensor is accessed via the "data" pointer. For example:
131
131
  //
132
132
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
133
+ // const int nx = 2;
134
+ // const int ny = 3;
134
135
  //
135
- // // a[2, 1] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
136
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
137
137
  //
138
- // // a[0, 2] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
138
+ // for (int y = 0; y < ny; y++) {
139
+ // for (int x = 0; x < nx; x++) {
140
+ // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
141
+ // }
142
+ // }
140
143
  //
141
144
  // ...
142
145
  // }
@@ -211,12 +214,17 @@
211
214
  #define GGML_MAX_OP_PARAMS 32
212
215
  #define GGML_DEFAULT_N_THREADS 4
213
216
 
217
+ #if UINTPTR_MAX == 0xFFFFFFFF
218
+ #define GGML_MEM_ALIGN 4
219
+ #else
220
+ #define GGML_MEM_ALIGN 16
221
+ #endif
214
222
 
215
223
  #define GGML_EXIT_SUCCESS 0
216
224
  #define GGML_EXIT_ABORTED 1
217
225
 
218
226
  #define GGUF_MAGIC 0x46554747 // "GGUF"
219
- #define GGUF_VERSION 1
227
+ #define GGUF_VERSION 2
220
228
 
221
229
  #define GGUF_DEFAULT_ALIGNMENT 32
222
230
 
@@ -471,6 +479,9 @@ extern "C" {
471
479
  int64_t perf_cycles;
472
480
  int64_t perf_time_us;
473
481
 
482
+ struct ggml_tensor * view_src;
483
+ size_t view_offs;
484
+
474
485
  void * data;
475
486
 
476
487
  char name[GGML_MAX_NAME];
@@ -653,7 +664,7 @@ extern "C" {
653
664
  GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
654
665
 
655
666
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
656
- GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
667
+ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
657
668
 
658
669
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
659
670
 
@@ -944,11 +955,11 @@ extern "C" {
944
955
 
945
956
  // a - x
946
957
  // b - dy
947
- // TODO: update with configurable eps
948
958
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
949
959
  struct ggml_context * ctx,
950
960
  struct ggml_tensor * a,
951
- struct ggml_tensor * b);
961
+ struct ggml_tensor * b,
962
+ float eps);
952
963
 
953
964
  // A: n columns, m rows
954
965
  // B: n columns, p rows (i.e. we transpose it internally)
@@ -1604,7 +1615,8 @@ extern "C" {
1604
1615
  struct ggml_tensor * tensor);
1605
1616
 
1606
1617
 
1607
- GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1618
+ GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1619
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1608
1620
 
1609
1621
  GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1610
1622
  GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1669,6 +1681,8 @@ extern "C" {
1669
1681
  GGML_LINESEARCH_INVALID_PARAMETERS,
1670
1682
  };
1671
1683
 
1684
+ typedef void (*ggml_opt_callback)(void * data, float * sched);
1685
+
1672
1686
  // optimization parameters
1673
1687
  //
1674
1688
  // see ggml.c (ggml_opt_default_params) for default values
@@ -1704,12 +1718,14 @@ extern "C" {
1704
1718
 
1705
1719
  float sched; // schedule multiplier (fixed, decay or warmup)
1706
1720
  float decay; // weight decay for AdamW, use 0.0f to disable
1721
+ int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
1707
1722
  float alpha; // learning rate
1708
1723
  float beta1;
1709
1724
  float beta2;
1710
1725
  float eps; // epsilon for numerical stability
1711
1726
  float eps_f; // epsilon for convergence test
1712
1727
  float eps_g; // epsilon for convergence test
1728
+ float gclip; // gradient clipping
1713
1729
  } adam;
1714
1730
 
1715
1731
  // LBFGS parameters
@@ -1737,14 +1753,12 @@ extern "C" {
1737
1753
 
1738
1754
  bool just_initialized;
1739
1755
 
1756
+ float loss_before;
1757
+ float loss_after;
1758
+
1740
1759
  struct {
1741
- struct ggml_tensor * x; // view of the parameters
1742
- struct ggml_tensor * g1; // gradient
1743
- struct ggml_tensor * g2; // gradient squared
1744
1760
  struct ggml_tensor * m; // first moment
1745
1761
  struct ggml_tensor * v; // second moment
1746
- struct ggml_tensor * mh; // first moment hat
1747
- struct ggml_tensor * vh; // second moment hat
1748
1762
  struct ggml_tensor * pf; // past function values
1749
1763
  float fx_best;
1750
1764
  float fx_prev;
@@ -1781,10 +1795,10 @@ extern "C" {
1781
1795
 
1782
1796
  // initialize optimizer context
1783
1797
  GGML_API void ggml_opt_init(
1784
- struct ggml_context * ctx,
1798
+ struct ggml_context * ctx,
1785
1799
  struct ggml_opt_context * opt,
1786
- struct ggml_opt_params params,
1787
- int64_t nx);
1800
+ struct ggml_opt_params params,
1801
+ int64_t nx);
1788
1802
 
1789
1803
  // continue optimizing the function defined by the tensor f
1790
1804
  GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1798,7 +1812,9 @@ extern "C" {
1798
1812
  struct ggml_opt_context * opt,
1799
1813
  struct ggml_tensor * f,
1800
1814
  struct ggml_cgraph * gf,
1801
- struct ggml_cgraph * gb);
1815
+ struct ggml_cgraph * gb,
1816
+ ggml_opt_callback callback,
1817
+ void * callback_data);
1802
1818
 
1803
1819
  //
1804
1820
  // quantization
@@ -1827,6 +1843,9 @@ extern "C" {
1827
1843
  GGUF_TYPE_BOOL = 7,
1828
1844
  GGUF_TYPE_STRING = 8,
1829
1845
  GGUF_TYPE_ARRAY = 9,
1846
+ GGUF_TYPE_UINT64 = 10,
1847
+ GGUF_TYPE_INT64 = 11,
1848
+ GGUF_TYPE_FLOAT64 = 12,
1830
1849
  GGUF_TYPE_COUNT, // marks the end of the enum
1831
1850
  };
1832
1851
 
@@ -1867,6 +1886,9 @@ extern "C" {
1867
1886
  GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1868
1887
  GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1869
1888
  GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
+ GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
+ GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
+ GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1870
1892
  GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1871
1893
  GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1872
1894
  GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
@@ -1886,6 +1908,9 @@ extern "C" {
1886
1908
  GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
1887
1909
  GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
1888
1910
  GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
1911
+ GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
1912
+ GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
1913
+ GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
1889
1914
  GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
1890
1915
  GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
1891
1916
  GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
@@ -1944,6 +1969,7 @@ extern "C" {
1944
1969
  GGML_API int ggml_cpu_has_clblast (void);
1945
1970
  GGML_API int ggml_cpu_has_gpublas (void);
1946
1971
  GGML_API int ggml_cpu_has_sse3 (void);
1972
+ GGML_API int ggml_cpu_has_ssse3 (void);
1947
1973
  GGML_API int ggml_cpu_has_vsx (void);
1948
1974
 
1949
1975
  //
@@ -2694,13 +2694,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
2694
2694
  const __m256i q8l = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
2695
2695
  __m256i p16l = _mm256_maddubs_epi16(q4l, q8l);
2696
2696
  p16l = _mm256_madd_epi16(scale_l, p16l);
2697
- sumi = _mm256_add_epi32(sumi, p16l);
2698
2697
 
2699
2698
  const __m256i q8h = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
2700
2699
  __m256i p16h = _mm256_maddubs_epi16(q4h, q8h);
2701
2700
  p16h = _mm256_madd_epi16(scale_h, p16h);
2702
- sumi = _mm256_add_epi32(sumi, p16h);
2701
+ const __m256i sumj = _mm256_add_epi32(p16l, p16h);
2703
2702
 
2703
+ sumi = _mm256_add_epi32(sumi, sumj);
2704
2704
  }
2705
2705
 
2706
2706
  __m256 vd = _mm256_set1_ps(d);