llama_cpp 0.9.0 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -219,7 +219,7 @@
219
219
  #define GGML_MAX_CONTEXTS 64
220
220
  #define GGML_MAX_SRC 6
221
221
  #define GGML_MAX_NAME 64
222
- #define GGML_MAX_OP_PARAMS 32
222
+ #define GGML_MAX_OP_PARAMS 64
223
223
  #define GGML_DEFAULT_N_THREADS 4
224
224
 
225
225
  #if UINTPTR_MAX == 0xFFFFFFFF
@@ -709,7 +709,7 @@ extern "C" {
709
709
  // Context tensor enumeration and lookup
710
710
  GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
711
711
  GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
712
- GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
712
+ GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name);
713
713
 
714
714
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
715
715
  GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
@@ -1326,8 +1326,13 @@ extern "C" {
1326
1326
  int n_dims,
1327
1327
  int mode,
1328
1328
  int n_ctx,
1329
+ int n_orig_ctx,
1329
1330
  float freq_base,
1330
- float freq_scale);
1331
+ float freq_scale,
1332
+ float ext_factor,
1333
+ float attn_factor,
1334
+ float beta_fast,
1335
+ float beta_slow);
1331
1336
 
1332
1337
  // in-place, returns view(a)
1333
1338
  GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
@@ -1337,8 +1342,17 @@ extern "C" {
1337
1342
  int n_dims,
1338
1343
  int mode,
1339
1344
  int n_ctx,
1345
+ int n_orig_ctx,
1340
1346
  float freq_base,
1341
- float freq_scale);
1347
+ float freq_scale,
1348
+ float ext_factor,
1349
+ float attn_factor,
1350
+ float beta_fast,
1351
+ float beta_slow);
1352
+
1353
+ // compute correction dims for YaRN RoPE scaling
1354
+ void ggml_rope_yarn_corr_dims(
1355
+ int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1342
1356
 
1343
1357
  // xPos RoPE, in-place, returns view(a)
1344
1358
  GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
@@ -1358,8 +1372,13 @@ extern "C" {
1358
1372
  int n_dims,
1359
1373
  int mode,
1360
1374
  int n_ctx,
1375
+ int n_orig_ctx,
1361
1376
  float freq_base,
1362
1377
  float freq_scale,
1378
+ float ext_factor,
1379
+ float attn_factor,
1380
+ float beta_fast,
1381
+ float beta_slow,
1363
1382
  float xpos_base,
1364
1383
  bool xpos_down);
1365
1384
 
@@ -1930,12 +1949,19 @@ extern "C" {
1930
1949
  // quantization
1931
1950
  //
1932
1951
 
1952
+ // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
1933
1953
  GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
1934
1954
  GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
1935
1955
  GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
1936
1956
  GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
1937
1957
  GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
1938
1958
 
1959
+ GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
1960
+ GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
1961
+ GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
1962
+ GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
1963
+ GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
1964
+
1939
1965
  GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1940
1966
 
1941
1967
  //