llama_cpp 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -130,13 +130,16 @@
130
130
  // The data of the tensor is accessed via the "data" pointer. For example:
131
131
  //
132
132
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
133
+ // const int nx = 2;
134
+ // const int ny = 3;
134
135
  //
135
- // // a[2, 1] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
136
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
137
137
  //
138
- // // a[0, 2] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
138
+ // for (int y = 0; y < ny; y++) {
139
+ // for (int x = 0; x < nx; x++) {
140
+ // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
141
+ // }
142
+ // }
140
143
  //
141
144
  // ...
142
145
  // }
@@ -211,12 +214,17 @@
211
214
  #define GGML_MAX_OP_PARAMS 32
212
215
  #define GGML_DEFAULT_N_THREADS 4
213
216
 
217
+ #if UINTPTR_MAX == 0xFFFFFFFF
218
+ #define GGML_MEM_ALIGN 4
219
+ #else
220
+ #define GGML_MEM_ALIGN 16
221
+ #endif
214
222
 
215
223
  #define GGML_EXIT_SUCCESS 0
216
224
  #define GGML_EXIT_ABORTED 1
217
225
 
218
226
  #define GGUF_MAGIC 0x46554747 // "GGUF"
219
- #define GGUF_VERSION 1
227
+ #define GGUF_VERSION 2
220
228
 
221
229
  #define GGUF_DEFAULT_ALIGNMENT 32
222
230
 
@@ -471,6 +479,9 @@ extern "C" {
471
479
  int64_t perf_cycles;
472
480
  int64_t perf_time_us;
473
481
 
482
+ struct ggml_tensor * view_src;
483
+ size_t view_offs;
484
+
474
485
  void * data;
475
486
 
476
487
  char name[GGML_MAX_NAME];
@@ -653,7 +664,7 @@ extern "C" {
653
664
  GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
654
665
 
655
666
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
656
- GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
667
+ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
657
668
 
658
669
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
659
670
 
@@ -944,11 +955,11 @@ extern "C" {
944
955
 
945
956
  // a - x
946
957
  // b - dy
947
- // TODO: update with configurable eps
948
958
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
949
959
  struct ggml_context * ctx,
950
960
  struct ggml_tensor * a,
951
- struct ggml_tensor * b);
961
+ struct ggml_tensor * b,
962
+ float eps);
952
963
 
953
964
  // A: n columns, m rows
954
965
  // B: n columns, p rows (i.e. we transpose it internally)
@@ -1604,7 +1615,8 @@ extern "C" {
1604
1615
  struct ggml_tensor * tensor);
1605
1616
 
1606
1617
 
1607
- GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1618
+ GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1619
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1608
1620
 
1609
1621
  GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1610
1622
  GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1669,6 +1681,8 @@ extern "C" {
1669
1681
  GGML_LINESEARCH_INVALID_PARAMETERS,
1670
1682
  };
1671
1683
 
1684
+ typedef void (*ggml_opt_callback)(void * data, float * sched);
1685
+
1672
1686
  // optimization parameters
1673
1687
  //
1674
1688
  // see ggml.c (ggml_opt_default_params) for default values
@@ -1704,12 +1718,14 @@ extern "C" {
1704
1718
 
1705
1719
  float sched; // schedule multiplier (fixed, decay or warmup)
1706
1720
  float decay; // weight decay for AdamW, use 0.0f to disable
1721
+ int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
1707
1722
  float alpha; // learning rate
1708
1723
  float beta1;
1709
1724
  float beta2;
1710
1725
  float eps; // epsilon for numerical stability
1711
1726
  float eps_f; // epsilon for convergence test
1712
1727
  float eps_g; // epsilon for convergence test
1728
+ float gclip; // gradient clipping
1713
1729
  } adam;
1714
1730
 
1715
1731
  // LBFGS parameters
@@ -1737,14 +1753,12 @@ extern "C" {
1737
1753
 
1738
1754
  bool just_initialized;
1739
1755
 
1756
+ float loss_before;
1757
+ float loss_after;
1758
+
1740
1759
  struct {
1741
- struct ggml_tensor * x; // view of the parameters
1742
- struct ggml_tensor * g1; // gradient
1743
- struct ggml_tensor * g2; // gradient squared
1744
1760
  struct ggml_tensor * m; // first moment
1745
1761
  struct ggml_tensor * v; // second moment
1746
- struct ggml_tensor * mh; // first moment hat
1747
- struct ggml_tensor * vh; // second moment hat
1748
1762
  struct ggml_tensor * pf; // past function values
1749
1763
  float fx_best;
1750
1764
  float fx_prev;
@@ -1781,10 +1795,10 @@ extern "C" {
1781
1795
 
1782
1796
  // initialize optimizer context
1783
1797
  GGML_API void ggml_opt_init(
1784
- struct ggml_context * ctx,
1798
+ struct ggml_context * ctx,
1785
1799
  struct ggml_opt_context * opt,
1786
- struct ggml_opt_params params,
1787
- int64_t nx);
1800
+ struct ggml_opt_params params,
1801
+ int64_t nx);
1788
1802
 
1789
1803
  // continue optimizing the function defined by the tensor f
1790
1804
  GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1798,7 +1812,9 @@ extern "C" {
1798
1812
  struct ggml_opt_context * opt,
1799
1813
  struct ggml_tensor * f,
1800
1814
  struct ggml_cgraph * gf,
1801
- struct ggml_cgraph * gb);
1815
+ struct ggml_cgraph * gb,
1816
+ ggml_opt_callback callback,
1817
+ void * callback_data);
1802
1818
 
1803
1819
  //
1804
1820
  // quantization
@@ -1827,6 +1843,9 @@ extern "C" {
1827
1843
  GGUF_TYPE_BOOL = 7,
1828
1844
  GGUF_TYPE_STRING = 8,
1829
1845
  GGUF_TYPE_ARRAY = 9,
1846
+ GGUF_TYPE_UINT64 = 10,
1847
+ GGUF_TYPE_INT64 = 11,
1848
+ GGUF_TYPE_FLOAT64 = 12,
1830
1849
  GGUF_TYPE_COUNT, // marks the end of the enum
1831
1850
  };
1832
1851
 
@@ -1867,6 +1886,9 @@ extern "C" {
1867
1886
  GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1868
1887
  GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1869
1888
  GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
+ GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
+ GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
+ GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1870
1892
  GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1871
1893
  GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1872
1894
  GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
@@ -1886,6 +1908,9 @@ extern "C" {
1886
1908
  GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
1887
1909
  GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
1888
1910
  GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
1911
+ GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
1912
+ GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
1913
+ GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
1889
1914
  GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
1890
1915
  GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
1891
1916
  GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
@@ -1944,6 +1969,7 @@ extern "C" {
1944
1969
  GGML_API int ggml_cpu_has_clblast (void);
1945
1970
  GGML_API int ggml_cpu_has_gpublas (void);
1946
1971
  GGML_API int ggml_cpu_has_sse3 (void);
1972
+ GGML_API int ggml_cpu_has_ssse3 (void);
1947
1973
  GGML_API int ggml_cpu_has_vsx (void);
1948
1974
 
1949
1975
  //
@@ -2694,13 +2694,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
2694
2694
  const __m256i q8l = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
2695
2695
  __m256i p16l = _mm256_maddubs_epi16(q4l, q8l);
2696
2696
  p16l = _mm256_madd_epi16(scale_l, p16l);
2697
- sumi = _mm256_add_epi32(sumi, p16l);
2698
2697
 
2699
2698
  const __m256i q8h = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
2700
2699
  __m256i p16h = _mm256_maddubs_epi16(q4h, q8h);
2701
2700
  p16h = _mm256_madd_epi16(scale_h, p16h);
2702
- sumi = _mm256_add_epi32(sumi, p16h);
2701
+ const __m256i sumj = _mm256_add_epi32(p16l, p16h);
2703
2702
 
2703
+ sumi = _mm256_add_epi32(sumi, sumj);
2704
2704
  }
2705
2705
 
2706
2706
  __m256 vd = _mm256_set1_ps(d);