llama_cpp 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,26 +53,30 @@ extern "C" {
53
53
  //
54
54
  #include <arm_neon.h>
55
55
 
56
+ typedef __fp16 ggml_fp16_internal_t;
57
+
56
58
  #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
57
59
  #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
58
60
 
59
61
  #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
60
62
 
61
63
  static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
62
- __fp16 tmp;
64
+ ggml_fp16_internal_t tmp;
63
65
  memcpy(&tmp, &h, sizeof(ggml_fp16_t));
64
66
  return (float)tmp;
65
67
  }
66
68
 
67
69
  static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
68
70
  ggml_fp16_t res;
69
- __fp16 tmp = f;
71
+ ggml_fp16_internal_t tmp = f;
70
72
  memcpy(&res, &tmp, sizeof(ggml_fp16_t));
71
73
  return res;
72
74
  }
73
75
 
74
76
  #else
75
77
 
78
+ typedef uint16_t ggml_fp16_internal_t;
79
+
76
80
  #ifdef __wasm_simd128__
77
81
  #include <wasm_simd128.h>
78
82
  #else
@@ -1951,6 +1951,11 @@ static struct ggml_backend_i kompute_backend_i = {
1951
1951
  /* .graph_plan_compute = */ NULL,
1952
1952
  /* .graph_compute = */ ggml_backend_kompute_graph_compute,
1953
1953
  /* .supports_op = */ ggml_backend_kompute_supports_op,
1954
+ /* .event_new = */ NULL,
1955
+ /* .event_free = */ NULL,
1956
+ /* .event_record = */ NULL,
1957
+ /* .event_wait = */ NULL,
1958
+ /* .event_synchronize = */ NULL,
1954
1959
  };
1955
1960
 
1956
1961
  static ggml_guid_t ggml_backend_kompute_guid() {
@@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
280
280
  id<MTLLibrary> metal_library;
281
281
 
282
282
  // load library
283
+ //
284
+ // - first check if the library is embedded
285
+ // - then check if the library is in the bundle
286
+ // - if not found, load the source and compile it
287
+ // - if that fails, return NULL
283
288
  {
284
289
  NSBundle * bundle = nil;
285
290
  #ifdef SWIFT_PACKAGE
@@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
287
292
  #else
288
293
  bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
289
294
  #endif
295
+
290
296
  NSError * error = nil;
291
- NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
292
- if (libPath != nil) {
297
+
298
+ #if GGML_METAL_EMBED_LIBRARY
299
+ const bool try_metallib = false;
300
+ #else
301
+ const bool try_metallib = true;
302
+ #endif
303
+
304
+ NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
305
+ if (try_metallib && path_lib != nil) {
293
306
  // pre-compiled library found
294
- NSURL * libURL = [NSURL fileURLWithPath:libPath];
295
- GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
307
+ NSURL * libURL = [NSURL fileURLWithPath:path_lib];
308
+ GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
309
+
296
310
  metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
297
311
  if (error) {
298
312
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
@@ -305,38 +319,41 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
305
319
  extern const char ggml_metallib_start[];
306
320
  extern const char ggml_metallib_end[];
307
321
 
308
- NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
322
+ NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
309
323
  #else
310
324
  GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
311
325
 
312
- NSString * sourcePath;
313
- NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
326
+ NSString * path_source;
327
+ NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
314
328
 
315
- GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, ggmlMetalPathResources ? [ggmlMetalPathResources UTF8String] : "nil");
329
+ GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
316
330
 
317
- if (ggmlMetalPathResources) {
318
- sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"];
331
+ if (path_resource) {
332
+ path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
319
333
  } else {
320
- sourcePath = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
334
+ path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
321
335
  }
322
- if (sourcePath == nil) {
336
+
337
+ if (path_source == nil) {
323
338
  GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
324
- sourcePath = @"ggml-metal.metal";
339
+ path_source = @"ggml-metal.metal";
325
340
  }
326
- GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [sourcePath UTF8String]);
327
- NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];
341
+
342
+ GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
343
+
344
+ NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
328
345
  if (error) {
329
346
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
330
347
  return NULL;
331
348
  }
332
- #endif
349
+ #endif // GGML_METAL_EMBED_LIBRARY
333
350
 
334
351
  @autoreleasepool {
335
352
  // dictionary of preprocessor macros
336
353
  NSMutableDictionary * prep = [NSMutableDictionary dictionary];
337
354
 
338
355
  #ifdef GGML_QKK_64
339
- prep[@"QK_K"] = @(64);
356
+ prep[@"GGML_QKK_64"] = @(1);
340
357
  #endif
341
358
 
342
359
  MTLCompileOptions* options = [MTLCompileOptions new];
@@ -1642,8 +1659,8 @@ static enum ggml_status ggml_metal_graph_compute(
1642
1659
  // TODO: make this more general
1643
1660
  GGML_ASSERT(n_as <= 8);
1644
1661
 
1645
- // max size of the src1ids array in the kernel stack
1646
- GGML_ASSERT(ne11 <= 512);
1662
+ // max size of the src1ids array in the kernel shared buffer
1663
+ GGML_ASSERT(ne11 <= 4096);
1647
1664
 
1648
1665
  const int64_t ne20 = src2 ? src2->ne[0] : 0;
1649
1666
  const int64_t ne21 = src2 ? src2->ne[1] : 0;
@@ -1741,7 +1758,7 @@ static enum ggml_status ggml_metal_graph_compute(
1741
1758
  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
1742
1759
  }
1743
1760
 
1744
- [encoder setThreadgroupMemoryLength:8192 atIndex:0];
1761
+ [encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0];
1745
1762
 
1746
1763
  [encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
1747
1764
  } else {
@@ -2820,6 +2837,11 @@ static struct ggml_backend_i ggml_backend_metal_i = {
2820
2837
  /* .graph_plan_compute = */ NULL,
2821
2838
  /* .graph_compute = */ ggml_backend_metal_graph_compute,
2822
2839
  /* .supports_op = */ ggml_backend_metal_supports_op,
2840
+ /* .event_new = */ NULL,
2841
+ /* .event_free = */ NULL,
2842
+ /* .event_record = */ NULL,
2843
+ /* .event_wait = */ NULL,
2844
+ /* .event_synchronize = */ NULL,
2823
2845
  };
2824
2846
 
2825
2847
  void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {