llama_cpp 0.14.0 → 0.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -53,26 +53,30 @@ extern "C" {
|
|
53
53
|
//
|
54
54
|
#include <arm_neon.h>
|
55
55
|
|
56
|
+
typedef __fp16 ggml_fp16_internal_t;
|
57
|
+
|
56
58
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
57
59
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
58
60
|
|
59
61
|
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
60
62
|
|
61
63
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
62
|
-
|
64
|
+
ggml_fp16_internal_t tmp;
|
63
65
|
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
64
66
|
return (float)tmp;
|
65
67
|
}
|
66
68
|
|
67
69
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
68
70
|
ggml_fp16_t res;
|
69
|
-
|
71
|
+
ggml_fp16_internal_t tmp = f;
|
70
72
|
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
71
73
|
return res;
|
72
74
|
}
|
73
75
|
|
74
76
|
#else
|
75
77
|
|
78
|
+
typedef uint16_t ggml_fp16_internal_t;
|
79
|
+
|
76
80
|
#ifdef __wasm_simd128__
|
77
81
|
#include <wasm_simd128.h>
|
78
82
|
#else
|
@@ -1951,6 +1951,11 @@ static struct ggml_backend_i kompute_backend_i = {
|
|
1951
1951
|
/* .graph_plan_compute = */ NULL,
|
1952
1952
|
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
|
1953
1953
|
/* .supports_op = */ ggml_backend_kompute_supports_op,
|
1954
|
+
/* .event_new = */ NULL,
|
1955
|
+
/* .event_free = */ NULL,
|
1956
|
+
/* .event_record = */ NULL,
|
1957
|
+
/* .event_wait = */ NULL,
|
1958
|
+
/* .event_synchronize = */ NULL,
|
1954
1959
|
};
|
1955
1960
|
|
1956
1961
|
static ggml_guid_t ggml_backend_kompute_guid() {
|
@@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
280
280
|
id<MTLLibrary> metal_library;
|
281
281
|
|
282
282
|
// load library
|
283
|
+
//
|
284
|
+
// - first check if the library is embedded
|
285
|
+
// - then check if the library is in the bundle
|
286
|
+
// - if not found, load the source and compile it
|
287
|
+
// - if that fails, return NULL
|
283
288
|
{
|
284
289
|
NSBundle * bundle = nil;
|
285
290
|
#ifdef SWIFT_PACKAGE
|
@@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
287
292
|
#else
|
288
293
|
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
289
294
|
#endif
|
295
|
+
|
290
296
|
NSError * error = nil;
|
291
|
-
|
292
|
-
|
297
|
+
|
298
|
+
#if GGML_METAL_EMBED_LIBRARY
|
299
|
+
const bool try_metallib = false;
|
300
|
+
#else
|
301
|
+
const bool try_metallib = true;
|
302
|
+
#endif
|
303
|
+
|
304
|
+
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
|
305
|
+
if (try_metallib && path_lib != nil) {
|
293
306
|
// pre-compiled library found
|
294
|
-
NSURL * libURL = [NSURL fileURLWithPath:
|
295
|
-
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [
|
307
|
+
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
|
308
|
+
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
|
309
|
+
|
296
310
|
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
297
311
|
if (error) {
|
298
312
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
@@ -305,38 +319,41 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
305
319
|
extern const char ggml_metallib_start[];
|
306
320
|
extern const char ggml_metallib_end[];
|
307
321
|
|
308
|
-
NSString * src
|
322
|
+
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
|
309
323
|
#else
|
310
324
|
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
|
311
325
|
|
312
|
-
NSString *
|
313
|
-
NSString *
|
326
|
+
NSString * path_source;
|
327
|
+
NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
314
328
|
|
315
|
-
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__,
|
329
|
+
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
|
316
330
|
|
317
|
-
if (
|
318
|
-
|
331
|
+
if (path_resource) {
|
332
|
+
path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
|
319
333
|
} else {
|
320
|
-
|
334
|
+
path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
321
335
|
}
|
322
|
-
|
336
|
+
|
337
|
+
if (path_source == nil) {
|
323
338
|
GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
|
324
|
-
|
339
|
+
path_source = @"ggml-metal.metal";
|
325
340
|
}
|
326
|
-
|
327
|
-
|
341
|
+
|
342
|
+
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
|
343
|
+
|
344
|
+
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
|
328
345
|
if (error) {
|
329
346
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
330
347
|
return NULL;
|
331
348
|
}
|
332
|
-
#endif
|
349
|
+
#endif // GGML_METAL_EMBED_LIBRARY
|
333
350
|
|
334
351
|
@autoreleasepool {
|
335
352
|
// dictionary of preprocessor macros
|
336
353
|
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
337
354
|
|
338
355
|
#ifdef GGML_QKK_64
|
339
|
-
prep[@"
|
356
|
+
prep[@"GGML_QKK_64"] = @(1);
|
340
357
|
#endif
|
341
358
|
|
342
359
|
MTLCompileOptions* options = [MTLCompileOptions new];
|
@@ -1642,8 +1659,8 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
1642
1659
|
// TODO: make this more general
|
1643
1660
|
GGML_ASSERT(n_as <= 8);
|
1644
1661
|
|
1645
|
-
// max size of the src1ids array in the kernel
|
1646
|
-
GGML_ASSERT(ne11 <=
|
1662
|
+
// max size of the src1ids array in the kernel shared buffer
|
1663
|
+
GGML_ASSERT(ne11 <= 4096);
|
1647
1664
|
|
1648
1665
|
const int64_t ne20 = src2 ? src2->ne[0] : 0;
|
1649
1666
|
const int64_t ne21 = src2 ? src2->ne[1] : 0;
|
@@ -1741,7 +1758,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
1741
1758
|
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
|
1742
1759
|
}
|
1743
1760
|
|
1744
|
-
[encoder setThreadgroupMemoryLength:8192 atIndex:0];
|
1761
|
+
[encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0];
|
1745
1762
|
|
1746
1763
|
[encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
|
1747
1764
|
} else {
|
@@ -2820,6 +2837,11 @@ static struct ggml_backend_i ggml_backend_metal_i = {
|
|
2820
2837
|
/* .graph_plan_compute = */ NULL,
|
2821
2838
|
/* .graph_compute = */ ggml_backend_metal_graph_compute,
|
2822
2839
|
/* .supports_op = */ ggml_backend_metal_supports_op,
|
2840
|
+
/* .event_new = */ NULL,
|
2841
|
+
/* .event_free = */ NULL,
|
2842
|
+
/* .event_record = */ NULL,
|
2843
|
+
/* .event_wait = */ NULL,
|
2844
|
+
/* .event_synchronize = */ NULL,
|
2823
2845
|
};
|
2824
2846
|
|
2825
2847
|
void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
|