llama_cpp 0.14.4 → 0.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +23 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +10 -0
- data/vendor/tmp/llama.cpp/LICENSE +1 -1
- data/vendor/tmp/llama.cpp/Makefile +11 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +7 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +155 -155
- data/vendor/tmp/llama.cpp/ggml-quants.h +82 -82
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +878 -216
- data/vendor/tmp/llama.cpp/ggml.c +8 -8
- data/vendor/tmp/llama.cpp/ggml.h +7 -7
- data/vendor/tmp/llama.cpp/llama.cpp +686 -124
- data/vendor/tmp/llama.cpp/llama.h +81 -13
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -338,14 +338,14 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
|
338
338
|
return GGML_FP32_TO_FP16(x);
|
339
339
|
}
|
340
340
|
|
341
|
-
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y,
|
342
|
-
for (
|
341
|
+
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
342
|
+
for (int64_t i = 0; i < n; i++) {
|
343
343
|
y[i] = GGML_FP16_TO_FP32(x[i]);
|
344
344
|
}
|
345
345
|
}
|
346
346
|
|
347
|
-
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y,
|
348
|
-
|
347
|
+
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
348
|
+
int64_t i = 0;
|
349
349
|
#if defined(__F16C__)
|
350
350
|
for (; i + 7 < n; i += 8) {
|
351
351
|
__m256 x_vec = _mm256_loadu_ps(x + i);
|
@@ -20331,11 +20331,11 @@ size_t ggml_quantize_chunk(
|
|
20331
20331
|
enum ggml_type type,
|
20332
20332
|
const float * src,
|
20333
20333
|
void * dst,
|
20334
|
-
|
20335
|
-
|
20336
|
-
|
20334
|
+
int64_t start,
|
20335
|
+
int64_t nrows,
|
20336
|
+
int64_t n_per_row,
|
20337
20337
|
const float * imatrix) {
|
20338
|
-
const
|
20338
|
+
const int64_t n = (int64_t) nrows * n_per_row;
|
20339
20339
|
|
20340
20340
|
if (ggml_quantize_requires_imatrix(type)) {
|
20341
20341
|
GGML_ASSERT(imatrix != NULL);
|
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -332,8 +332,8 @@ extern "C" {
|
|
332
332
|
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
|
333
333
|
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
|
334
334
|
|
335
|
-
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y,
|
336
|
-
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y,
|
335
|
+
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
|
336
|
+
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
|
337
337
|
|
338
338
|
struct ggml_object;
|
339
339
|
struct ggml_context;
|
@@ -2210,9 +2210,9 @@ extern "C" {
|
|
2210
2210
|
enum ggml_type type,
|
2211
2211
|
const float * src,
|
2212
2212
|
void * dst,
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2213
|
+
int64_t start,
|
2214
|
+
int64_t nrows,
|
2215
|
+
int64_t n_per_row,
|
2216
2216
|
const float * imatrix);
|
2217
2217
|
|
2218
2218
|
//
|
@@ -2377,8 +2377,8 @@ extern "C" {
|
|
2377
2377
|
#else
|
2378
2378
|
#define GGML_RESTRICT restrict
|
2379
2379
|
#endif
|
2380
|
-
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y,
|
2381
|
-
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y,
|
2380
|
+
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2381
|
+
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
2382
2382
|
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
2383
2383
|
const void * GGML_RESTRICT y, size_t by, int nrc);
|
2384
2384
|
|