llama_cpp 0.14.4 → 0.14.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +23 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +10 -0
- data/vendor/tmp/llama.cpp/LICENSE +1 -1
- data/vendor/tmp/llama.cpp/Makefile +11 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +7 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +155 -155
- data/vendor/tmp/llama.cpp/ggml-quants.h +82 -82
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +878 -216
- data/vendor/tmp/llama.cpp/ggml.c +8 -8
- data/vendor/tmp/llama.cpp/ggml.h +7 -7
- data/vendor/tmp/llama.cpp/llama.cpp +686 -124
- data/vendor/tmp/llama.cpp/llama.h +81 -13
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -338,14 +338,14 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
|
338
338
|
return GGML_FP32_TO_FP16(x);
|
339
339
|
}
|
340
340
|
|
341
|
-
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y,
|
342
|
-
for (
|
341
|
+
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
342
|
+
for (int64_t i = 0; i < n; i++) {
|
343
343
|
y[i] = GGML_FP16_TO_FP32(x[i]);
|
344
344
|
}
|
345
345
|
}
|
346
346
|
|
347
|
-
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y,
|
348
|
-
|
347
|
+
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
348
|
+
int64_t i = 0;
|
349
349
|
#if defined(__F16C__)
|
350
350
|
for (; i + 7 < n; i += 8) {
|
351
351
|
__m256 x_vec = _mm256_loadu_ps(x + i);
|
@@ -20331,11 +20331,11 @@ size_t ggml_quantize_chunk(
|
|
20331
20331
|
enum ggml_type type,
|
20332
20332
|
const float * src,
|
20333
20333
|
void * dst,
|
20334
|
-
|
20335
|
-
|
20336
|
-
|
20334
|
+
int64_t start,
|
20335
|
+
int64_t nrows,
|
20336
|
+
int64_t n_per_row,
|
20337
20337
|
const float * imatrix) {
|
20338
|
-
const
|
20338
|
+
const int64_t n = (int64_t) nrows * n_per_row;
|
20339
20339
|
|
20340
20340
|
if (ggml_quantize_requires_imatrix(type)) {
|
20341
20341
|
GGML_ASSERT(imatrix != NULL);
|
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -332,8 +332,8 @@ extern "C" {
|
|
332
332
|
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
|
333
333
|
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
|
334
334
|
|
335
|
-
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y,
|
336
|
-
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y,
|
335
|
+
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
|
336
|
+
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
|
337
337
|
|
338
338
|
struct ggml_object;
|
339
339
|
struct ggml_context;
|
@@ -2210,9 +2210,9 @@ extern "C" {
|
|
2210
2210
|
enum ggml_type type,
|
2211
2211
|
const float * src,
|
2212
2212
|
void * dst,
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2213
|
+
int64_t start,
|
2214
|
+
int64_t nrows,
|
2215
|
+
int64_t n_per_row,
|
2216
2216
|
const float * imatrix);
|
2217
2217
|
|
2218
2218
|
//
|
@@ -2377,8 +2377,8 @@ extern "C" {
|
|
2377
2377
|
#else
|
2378
2378
|
#define GGML_RESTRICT restrict
|
2379
2379
|
#endif
|
2380
|
-
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y,
|
2381
|
-
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y,
|
2380
|
+
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2381
|
+
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
2382
2382
|
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
2383
2383
|
const void * GGML_RESTRICT y, size_t by, int nrc);
|
2384
2384
|
|