llama_cpp 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -219,7 +219,7 @@
219
219
  #define GGML_MAX_CONTEXTS 64
220
220
  #define GGML_MAX_SRC 6
221
221
  #define GGML_MAX_NAME 64
222
- #define GGML_MAX_OP_PARAMS 32
222
+ #define GGML_MAX_OP_PARAMS 64
223
223
  #define GGML_DEFAULT_N_THREADS 4
224
224
 
225
225
  #if UINTPTR_MAX == 0xFFFFFFFF
@@ -709,7 +709,7 @@ extern "C" {
709
709
  // Context tensor enumeration and lookup
710
710
  GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
711
711
  GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
712
- GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
712
+ GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name);
713
713
 
714
714
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
715
715
  GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
@@ -1326,8 +1326,13 @@ extern "C" {
1326
1326
  int n_dims,
1327
1327
  int mode,
1328
1328
  int n_ctx,
1329
+ int n_orig_ctx,
1329
1330
  float freq_base,
1330
- float freq_scale);
1331
+ float freq_scale,
1332
+ float ext_factor,
1333
+ float attn_factor,
1334
+ float beta_fast,
1335
+ float beta_slow);
1331
1336
 
1332
1337
  // in-place, returns view(a)
1333
1338
  GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
@@ -1337,8 +1342,17 @@ extern "C" {
1337
1342
  int n_dims,
1338
1343
  int mode,
1339
1344
  int n_ctx,
1345
+ int n_orig_ctx,
1340
1346
  float freq_base,
1341
- float freq_scale);
1347
+ float freq_scale,
1348
+ float ext_factor,
1349
+ float attn_factor,
1350
+ float beta_fast,
1351
+ float beta_slow);
1352
+
1353
+ // compute correction dims for YaRN RoPE scaling
1354
+ void ggml_rope_yarn_corr_dims(
1355
+ int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1342
1356
 
1343
1357
  // xPos RoPE, in-place, returns view(a)
1344
1358
  GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
@@ -1930,12 +1944,19 @@ extern "C" {
1930
1944
  // quantization
1931
1945
  //
1932
1946
 
1947
+ // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
1933
1948
  GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
1934
1949
  GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
1935
1950
  GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
1936
1951
  GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
1937
1952
  GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
1938
1953
 
1954
+ GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
1955
+ GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
1956
+ GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
1957
+ GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
1958
+ GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
1959
+
1939
1960
  GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1940
1961
 
1941
1962
  //