llama_cpp 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -3
- data/examples/prompt_jp.txt +1 -1
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +32 -2
- data/ext/llama_cpp/src/ggml-alloc.c +6 -11
- data/ext/llama_cpp/src/ggml-cuda.cu +1108 -699
- data/ext/llama_cpp/src/ggml-metal.m +93 -24
- data/ext/llama_cpp/src/ggml-metal.metal +407 -174
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +75 -43
- data/ext/llama_cpp/src/ggml.h +42 -32
- data/ext/llama_cpp/src/k_quants.c +4 -1
- data/ext/llama_cpp/src/llama.cpp +1040 -201
- data/ext/llama_cpp/src/llama.h +13 -7
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +4 -0
- metadata +2 -2
@@ -847,7 +847,7 @@ std::array<std::string, 2> mul_str_values = {
|
|
847
847
|
"mul_f32", "float"
|
848
848
|
};
|
849
849
|
|
850
|
-
std::string& replace(std::string& s, const std::string& from, const std::string& to) {
|
850
|
+
static std::string& replace(std::string& s, const std::string& from, const std::string& to) {
|
851
851
|
size_t pos = 0;
|
852
852
|
while ((pos = s.find(from, pos)) != std::string::npos) {
|
853
853
|
s.replace(pos, from.length(), to);
|
@@ -856,7 +856,7 @@ std::string& replace(std::string& s, const std::string& from, const std::string&
|
|
856
856
|
return s;
|
857
857
|
}
|
858
858
|
|
859
|
-
std::string generate_kernels() {
|
859
|
+
static std::string generate_kernels() {
|
860
860
|
std::stringstream src;
|
861
861
|
src << program_source << '\n';
|
862
862
|
src << k_quants_source << '\n';
|
@@ -1788,7 +1788,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|
1788
1788
|
return false;
|
1789
1789
|
}
|
1790
1790
|
|
1791
|
-
bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
|
1791
|
+
static bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
|
1792
1792
|
// If device doesn't support FP16
|
1793
1793
|
if (!fp16_support) {
|
1794
1794
|
return false;
|
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
#define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
|
2
1
|
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
3
2
|
|
4
3
|
#include "ggml.h"
|
@@ -47,6 +46,10 @@
|
|
47
46
|
// disable "possible loss of data" to avoid hundreds of casts
|
48
47
|
// we should just be careful :)
|
49
48
|
#pragma warning(disable: 4244 4267)
|
49
|
+
|
50
|
+
// disable POSIX deprecation warnigns
|
51
|
+
// these functions are never going away, anyway
|
52
|
+
#pragma warning(disable: 4996)
|
50
53
|
#endif
|
51
54
|
|
52
55
|
#if defined(_WIN32)
|
@@ -280,7 +283,7 @@ typedef double ggml_float;
|
|
280
283
|
// 16-bit float
|
281
284
|
// on Arm, we use __fp16
|
282
285
|
// on x86, we use uint16_t
|
283
|
-
#
|
286
|
+
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
284
287
|
|
285
288
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
286
289
|
//
|
@@ -307,12 +310,14 @@ typedef double ggml_float;
|
|
307
310
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
308
311
|
#include <intrin.h>
|
309
312
|
#else
|
313
|
+
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
|
310
314
|
#if !defined(__riscv)
|
311
315
|
#include <immintrin.h>
|
312
316
|
#endif
|
313
317
|
#endif
|
314
318
|
#endif
|
315
319
|
#endif
|
320
|
+
#endif
|
316
321
|
|
317
322
|
#ifdef __riscv_v_intrinsic
|
318
323
|
#include <riscv_vector.h>
|
@@ -4298,10 +4303,21 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
|
4298
4303
|
}
|
4299
4304
|
|
4300
4305
|
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
4301
|
-
size_t nbytes
|
4302
|
-
|
4303
|
-
|
4306
|
+
size_t nbytes;
|
4307
|
+
size_t blck_size = ggml_blck_size(tensor->type);
|
4308
|
+
if (blck_size == 1) {
|
4309
|
+
nbytes = ggml_type_size(tensor->type);
|
4310
|
+
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
4311
|
+
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
4312
|
+
}
|
4304
4313
|
}
|
4314
|
+
else {
|
4315
|
+
nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
|
4316
|
+
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
4317
|
+
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
4318
|
+
}
|
4319
|
+
}
|
4320
|
+
|
4305
4321
|
return nbytes;
|
4306
4322
|
}
|
4307
4323
|
|
@@ -17278,10 +17294,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17278
17294
|
} else {
|
17279
17295
|
// wait for other threads to finish
|
17280
17296
|
const int last = node_n;
|
17281
|
-
|
17282
|
-
//sched_yield
|
17297
|
+
while (true) {
|
17298
|
+
// TODO: this sched_yield can have significant impact on the performance - either positive or negative
|
17299
|
+
// depending on the workload and the operating system.
|
17300
|
+
// since it is not clear what is the best approach, it should potentially become user-configurable
|
17301
|
+
// ref: https://github.com/ggerganov/ggml/issues/291
|
17302
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
17303
|
+
sched_yield();
|
17304
|
+
#endif
|
17305
|
+
|
17283
17306
|
node_n = atomic_load(&state->shared->node_n);
|
17284
|
-
|
17307
|
+
if (node_n != last) break;
|
17308
|
+
};
|
17285
17309
|
}
|
17286
17310
|
|
17287
17311
|
// check if we should stop
|
@@ -18332,10 +18356,11 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
18332
18356
|
for (int i = 0; i < cgraph->n_leafs; i++) {
|
18333
18357
|
struct ggml_tensor * node = cgraph->leafs[i];
|
18334
18358
|
|
18335
|
-
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
18359
|
+
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
|
18336
18360
|
i,
|
18337
18361
|
node->ne[0], node->ne[1],
|
18338
|
-
ggml_op_name(node->op)
|
18362
|
+
ggml_op_name(node->op),
|
18363
|
+
ggml_get_name(node));
|
18339
18364
|
}
|
18340
18365
|
|
18341
18366
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -18872,7 +18897,6 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18872
18897
|
// strong Wolfe condition (GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE)
|
18873
18898
|
return count;
|
18874
18899
|
}
|
18875
|
-
return count;
|
18876
18900
|
}
|
18877
18901
|
}
|
18878
18902
|
|
@@ -20095,27 +20119,27 @@ const char * gguf_type_name(enum gguf_type type) {
|
|
20095
20119
|
return GGUF_TYPE_NAME[type];
|
20096
20120
|
}
|
20097
20121
|
|
20098
|
-
int gguf_get_version(struct gguf_context * ctx) {
|
20122
|
+
int gguf_get_version(const struct gguf_context * ctx) {
|
20099
20123
|
return ctx->header.version;
|
20100
20124
|
}
|
20101
20125
|
|
20102
|
-
size_t gguf_get_alignment(struct gguf_context * ctx) {
|
20126
|
+
size_t gguf_get_alignment(const struct gguf_context * ctx) {
|
20103
20127
|
return ctx->alignment;
|
20104
20128
|
}
|
20105
20129
|
|
20106
|
-
size_t gguf_get_data_offset(struct gguf_context * ctx) {
|
20130
|
+
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
|
20107
20131
|
return ctx->offset;
|
20108
20132
|
}
|
20109
20133
|
|
20110
|
-
void * gguf_get_data(struct gguf_context * ctx) {
|
20134
|
+
void * gguf_get_data(const struct gguf_context * ctx) {
|
20111
20135
|
return ctx->data;
|
20112
20136
|
}
|
20113
20137
|
|
20114
|
-
int gguf_get_n_kv(struct gguf_context * ctx) {
|
20138
|
+
int gguf_get_n_kv(const struct gguf_context * ctx) {
|
20115
20139
|
return ctx->header.n_kv;
|
20116
20140
|
}
|
20117
20141
|
|
20118
|
-
int gguf_find_key(struct gguf_context * ctx, const char * key) {
|
20142
|
+
int gguf_find_key(const struct gguf_context * ctx, const char * key) {
|
20119
20143
|
// return -1 if key not found
|
20120
20144
|
int keyfound = -1;
|
20121
20145
|
|
@@ -20131,85 +20155,85 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
|
|
20131
20155
|
return keyfound;
|
20132
20156
|
}
|
20133
20157
|
|
20134
|
-
const char * gguf_get_key(struct gguf_context * ctx, int i) {
|
20158
|
+
const char * gguf_get_key(const struct gguf_context * ctx, int i) {
|
20135
20159
|
return ctx->kv[i].key.data;
|
20136
20160
|
}
|
20137
20161
|
|
20138
|
-
enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
|
20162
|
+
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) {
|
20139
20163
|
return ctx->kv[i].type;
|
20140
20164
|
}
|
20141
20165
|
|
20142
|
-
enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
|
20166
|
+
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) {
|
20143
20167
|
return ctx->kv[i].value.arr.type;
|
20144
20168
|
}
|
20145
20169
|
|
20146
|
-
const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
|
20170
|
+
const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) {
|
20147
20171
|
return ctx->kv[i].value.arr.data;
|
20148
20172
|
}
|
20149
20173
|
|
20150
|
-
const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
|
20174
|
+
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
|
20151
20175
|
struct gguf_kv * kv = &ctx->kv[key_id];
|
20152
20176
|
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
|
20153
20177
|
return str->data;
|
20154
20178
|
}
|
20155
20179
|
|
20156
|
-
int gguf_get_arr_n(struct gguf_context * ctx, int i) {
|
20180
|
+
int gguf_get_arr_n(const struct gguf_context * ctx, int i) {
|
20157
20181
|
return ctx->kv[i].value.arr.n;
|
20158
20182
|
}
|
20159
20183
|
|
20160
|
-
uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
|
20184
|
+
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) {
|
20161
20185
|
return ctx->kv[i].value.uint8;
|
20162
20186
|
}
|
20163
20187
|
|
20164
|
-
int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
|
20188
|
+
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) {
|
20165
20189
|
return ctx->kv[i].value.int8;
|
20166
20190
|
}
|
20167
20191
|
|
20168
|
-
uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
|
20192
|
+
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) {
|
20169
20193
|
return ctx->kv[i].value.uint16;
|
20170
20194
|
}
|
20171
20195
|
|
20172
|
-
int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
|
20196
|
+
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) {
|
20173
20197
|
return ctx->kv[i].value.int16;
|
20174
20198
|
}
|
20175
20199
|
|
20176
|
-
uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
|
20200
|
+
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) {
|
20177
20201
|
return ctx->kv[i].value.uint32;
|
20178
20202
|
}
|
20179
20203
|
|
20180
|
-
int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
|
20204
|
+
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) {
|
20181
20205
|
return ctx->kv[i].value.int32;
|
20182
20206
|
}
|
20183
20207
|
|
20184
|
-
float gguf_get_val_f32(struct gguf_context * ctx, int i) {
|
20208
|
+
float gguf_get_val_f32(const struct gguf_context * ctx, int i) {
|
20185
20209
|
return ctx->kv[i].value.float32;
|
20186
20210
|
}
|
20187
20211
|
|
20188
|
-
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
|
20212
|
+
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) {
|
20189
20213
|
return ctx->kv[i].value.uint64;
|
20190
20214
|
}
|
20191
20215
|
|
20192
|
-
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
|
20216
|
+
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) {
|
20193
20217
|
return ctx->kv[i].value.int64;
|
20194
20218
|
}
|
20195
20219
|
|
20196
|
-
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
|
20220
|
+
double gguf_get_val_f64(const struct gguf_context * ctx, int i) {
|
20197
20221
|
return ctx->kv[i].value.float64;
|
20198
20222
|
}
|
20199
20223
|
|
20200
|
-
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
|
20224
|
+
bool gguf_get_val_bool(const struct gguf_context * ctx, int i) {
|
20201
20225
|
return ctx->kv[i].value.bool_;
|
20202
20226
|
}
|
20203
20227
|
|
20204
|
-
const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
|
20228
|
+
const char * gguf_get_val_str (const struct gguf_context * ctx, int i) {
|
20205
20229
|
return ctx->kv[i].value.str.data;
|
20206
20230
|
}
|
20207
20231
|
|
20208
|
-
int gguf_get_n_tensors(struct gguf_context * ctx) {
|
20232
|
+
int gguf_get_n_tensors(const struct gguf_context * ctx) {
|
20209
20233
|
return ctx->header.n_tensors;
|
20210
20234
|
}
|
20211
20235
|
|
20212
|
-
int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
|
20236
|
+
int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
|
20213
20237
|
// return -1 if tensor not found
|
20214
20238
|
int tensorfound = -1;
|
20215
20239
|
|
@@ -20225,11 +20249,11 @@ int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
|
|
20225
20249
|
return tensorfound;
|
20226
20250
|
}
|
20227
20251
|
|
20228
|
-
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
|
20252
|
+
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
|
20229
20253
|
return ctx->infos[i].offset;
|
20230
20254
|
}
|
20231
20255
|
|
20232
|
-
char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
|
20256
|
+
char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
|
20233
20257
|
return ctx->infos[i].name.data;
|
20234
20258
|
}
|
20235
20259
|
|
@@ -20512,7 +20536,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
|
|
20512
20536
|
buf->offset += el_size;
|
20513
20537
|
}
|
20514
20538
|
|
20515
|
-
static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
|
20539
|
+
static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
|
20516
20540
|
// write header
|
20517
20541
|
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
20518
20542
|
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|
@@ -20627,7 +20651,7 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|
20627
20651
|
}
|
20628
20652
|
}
|
20629
20653
|
|
20630
|
-
void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
|
20654
|
+
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
|
20631
20655
|
FILE * file = fopen(fname, "wb");
|
20632
20656
|
if (!file) {
|
20633
20657
|
GGML_ASSERT(false && "failed to open file for writing");
|
@@ -20644,7 +20668,7 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only
|
|
20644
20668
|
fclose(file);
|
20645
20669
|
}
|
20646
20670
|
|
20647
|
-
size_t gguf_get_meta_size(struct gguf_context * ctx) {
|
20671
|
+
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
|
20648
20672
|
// no allocs - only compute size
|
20649
20673
|
struct gguf_buf buf = gguf_buf_init(0);
|
20650
20674
|
|
@@ -20653,7 +20677,7 @@ size_t gguf_get_meta_size(struct gguf_context * ctx) {
|
|
20653
20677
|
return buf.offset;
|
20654
20678
|
}
|
20655
20679
|
|
20656
|
-
void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
|
20680
|
+
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
|
20657
20681
|
struct gguf_buf buf = gguf_buf_init(16*1024);
|
20658
20682
|
|
20659
20683
|
gguf_write_to_buf(ctx, &buf, true);
|
@@ -20729,6 +20753,14 @@ int ggml_cpu_has_arm_fma(void) {
|
|
20729
20753
|
#endif
|
20730
20754
|
}
|
20731
20755
|
|
20756
|
+
int ggml_cpu_has_metal(void) {
|
20757
|
+
#if defined(GGML_USE_METAL)
|
20758
|
+
return 1;
|
20759
|
+
#else
|
20760
|
+
return 0;
|
20761
|
+
#endif
|
20762
|
+
}
|
20763
|
+
|
20732
20764
|
int ggml_cpu_has_f16c(void) {
|
20733
20765
|
#if defined(__F16C__)
|
20734
20766
|
return 1;
|
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -195,6 +195,14 @@
|
|
195
195
|
# define GGML_DEPRECATED(func, hint) func
|
196
196
|
#endif
|
197
197
|
|
198
|
+
#ifndef __GNUC__
|
199
|
+
# define GGML_ATTRIBUTE_FORMAT(...)
|
200
|
+
#elif defined(__MINGW32__)
|
201
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
202
|
+
#else
|
203
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
204
|
+
#endif
|
205
|
+
|
198
206
|
#include <stdint.h>
|
199
207
|
#include <stddef.h>
|
200
208
|
#include <stdbool.h>
|
@@ -685,6 +693,7 @@ extern "C" {
|
|
685
693
|
|
686
694
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
687
695
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
696
|
+
GGML_ATTRIBUTE_FORMAT(2, 3)
|
688
697
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
689
698
|
|
690
699
|
//
|
@@ -1866,39 +1875,39 @@ extern "C" {
|
|
1866
1875
|
|
1867
1876
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
1877
|
|
1869
|
-
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
-
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
-
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
-
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1878
|
+
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
1879
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
1880
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
1881
|
+
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
1873
1882
|
|
1874
|
-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
1883
|
+
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
1884
|
+
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
1885
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
|
1877
1886
|
|
1878
|
-
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
1879
|
-
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
1887
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
|
1888
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
|
1880
1889
|
|
1881
1890
|
// results are undefined if the wrong type is used for the key
|
1882
|
-
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
1883
|
-
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
1884
|
-
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
1885
|
-
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
1886
|
-
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
1887
|
-
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
1888
|
-
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
1889
|
-
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
1890
|
-
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
1891
|
-
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
1892
|
-
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
1893
|
-
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
1894
|
-
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
1895
|
-
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
1896
|
-
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
-
|
1898
|
-
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
-
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
-
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
-
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1891
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
|
1892
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
|
1893
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
|
1894
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
|
1895
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
|
1896
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
|
1897
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
|
1898
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
|
1899
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
|
1900
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
|
1901
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
|
1902
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
|
1903
|
+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
|
1904
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
|
1905
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
1906
|
+
|
1907
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
1908
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
1909
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
1910
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
1902
1911
|
|
1903
1912
|
// overrides existing values or adds a new one
|
1904
1913
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
@@ -1943,11 +1952,11 @@ extern "C" {
|
|
1943
1952
|
//
|
1944
1953
|
|
1945
1954
|
// write the entire context to a binary file
|
1946
|
-
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
1955
|
+
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
1956
|
|
1948
1957
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
-
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
-
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
1958
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
1959
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
1951
1960
|
|
1952
1961
|
//
|
1953
1962
|
// system info
|
@@ -1961,6 +1970,7 @@ extern "C" {
|
|
1961
1970
|
GGML_API int ggml_cpu_has_fma (void);
|
1962
1971
|
GGML_API int ggml_cpu_has_neon (void);
|
1963
1972
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
1973
|
+
GGML_API int ggml_cpu_has_metal (void);
|
1964
1974
|
GGML_API int ggml_cpu_has_f16c (void);
|
1965
1975
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
1966
1976
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|
2609
2609
|
|
2610
2610
|
memcpy(utmp, x[i].scales, 12);
|
2611
2611
|
|
2612
|
-
|
2612
|
+
uint32x2_t mins8 = { 0 };
|
2613
|
+
mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
|
2614
|
+
mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
|
2615
|
+
|
2613
2616
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
2614
2617
|
utmp[0] &= kmask1;
|
2615
2618
|
|