llama_cpp 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -219,7 +219,7 @@
219
219
  #define GGML_MAX_CONTEXTS 64
220
220
  #define GGML_MAX_SRC 6
221
221
  #define GGML_MAX_NAME 64
222
- #define GGML_MAX_OP_PARAMS 32
222
+ #define GGML_MAX_OP_PARAMS 64
223
223
  #define GGML_DEFAULT_N_THREADS 4
224
224
 
225
225
  #if UINTPTR_MAX == 0xFFFFFFFF
@@ -709,7 +709,7 @@ extern "C" {
709
709
  // Context tensor enumeration and lookup
710
710
  GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
711
711
  GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
712
- GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
712
+ GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name);
713
713
 
714
714
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
715
715
  GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
@@ -1326,8 +1326,13 @@ extern "C" {
1326
1326
  int n_dims,
1327
1327
  int mode,
1328
1328
  int n_ctx,
1329
+ int n_orig_ctx,
1329
1330
  float freq_base,
1330
- float freq_scale);
1331
+ float freq_scale,
1332
+ float ext_factor,
1333
+ float attn_factor,
1334
+ float beta_fast,
1335
+ float beta_slow);
1331
1336
 
1332
1337
  // in-place, returns view(a)
1333
1338
  GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
@@ -1337,8 +1342,17 @@ extern "C" {
1337
1342
  int n_dims,
1338
1343
  int mode,
1339
1344
  int n_ctx,
1345
+ int n_orig_ctx,
1340
1346
  float freq_base,
1341
- float freq_scale);
1347
+ float freq_scale,
1348
+ float ext_factor,
1349
+ float attn_factor,
1350
+ float beta_fast,
1351
+ float beta_slow);
1352
+
1353
+ // compute correction dims for YaRN RoPE scaling
1354
+ void ggml_rope_yarn_corr_dims(
1355
+ int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1342
1356
 
1343
1357
  // xPos RoPE, in-place, returns view(a)
1344
1358
  GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
@@ -1930,12 +1944,19 @@ extern "C" {
1930
1944
  // quantization
1931
1945
  //
1932
1946
 
1947
+ // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
1933
1948
  GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
1934
1949
  GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
1935
1950
  GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
1936
1951
  GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
1937
1952
  GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
1938
1953
 
1954
+ GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
1955
+ GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
1956
+ GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
1957
+ GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
1958
+ GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
1959
+
1939
1960
  GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1940
1961
 
1941
1962
  //