llama_cpp 0.14.4 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -338,14 +338,14 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
338
338
  return GGML_FP32_TO_FP16(x);
339
339
  }
340
340
 
341
- void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n) {
342
- for (int i = 0; i < n; i++) {
341
+ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
342
+ for (int64_t i = 0; i < n; i++) {
343
343
  y[i] = GGML_FP16_TO_FP32(x[i]);
344
344
  }
345
345
  }
346
346
 
347
- void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
348
- int i = 0;
347
+ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
348
+ int64_t i = 0;
349
349
  #if defined(__F16C__)
350
350
  for (; i + 7 < n; i += 8) {
351
351
  __m256 x_vec = _mm256_loadu_ps(x + i);
@@ -20331,11 +20331,11 @@ size_t ggml_quantize_chunk(
20331
20331
  enum ggml_type type,
20332
20332
  const float * src,
20333
20333
  void * dst,
20334
- int start,
20335
- int nrows,
20336
- int n_per_row,
20334
+ int64_t start,
20335
+ int64_t nrows,
20336
+ int64_t n_per_row,
20337
20337
  const float * imatrix) {
20338
- const int n = nrows * n_per_row;
20338
+ const int64_t n = (int64_t) nrows * n_per_row;
20339
20339
 
20340
20340
  if (ggml_quantize_requires_imatrix(type)) {
20341
20341
  GGML_ASSERT(imatrix != NULL);
@@ -332,8 +332,8 @@ extern "C" {
332
332
  GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
333
333
  GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
334
334
 
335
- GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);
336
- GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);
335
+ GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
336
+ GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
337
337
 
338
338
  struct ggml_object;
339
339
  struct ggml_context;
@@ -2210,9 +2210,9 @@ extern "C" {
2210
2210
  enum ggml_type type,
2211
2211
  const float * src,
2212
2212
  void * dst,
2213
- int start,
2214
- int nrows,
2215
- int n_per_row,
2213
+ int64_t start,
2214
+ int64_t nrows,
2215
+ int64_t n_per_row,
2216
2216
  const float * imatrix);
2217
2217
 
2218
2218
  //
@@ -2377,8 +2377,8 @@ extern "C" {
2377
2377
  #else
2378
2378
  #define GGML_RESTRICT restrict
2379
2379
  #endif
2380
- typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
2381
- typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
2380
+ typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2381
+ typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2382
2382
  typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2383
2383
  const void * GGML_RESTRICT y, size_t by, int nrc);
2384
2384