whisper.rn 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
- package/android/src/main/jni.cpp +13 -0
- package/cpp/ggml-alloc.c +78 -26
- package/cpp/ggml-alloc.h +9 -0
- package/cpp/ggml-backend-impl.h +1 -1
- package/cpp/ggml-backend-reg.cpp +19 -3
- package/cpp/ggml-backend.cpp +72 -20
- package/cpp/ggml-backend.h +2 -1
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/cpp/ggml-cpu/arch-fallback.h +50 -2
- package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/cpp/ggml-cpu/ggml-cpu.c +139 -58
- package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/ggml-cpu/ops.cpp +170 -18
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/repack.cpp +531 -5
- package/cpp/ggml-cpu/repack.h +14 -0
- package/cpp/ggml-cpu/simd-mappings.h +16 -18
- package/cpp/ggml-cpu/vec.cpp +41 -1
- package/cpp/ggml-cpu/vec.h +241 -138
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +0 -4
- package/cpp/ggml-metal/ggml-metal-context.m +26 -16
- package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
- package/cpp/ggml-metal/ggml-metal-device.h +87 -65
- package/cpp/ggml-metal/ggml-metal-device.m +263 -104
- package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
- package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
- package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
- package/cpp/ggml-metal/ggml-metal.cpp +6 -5
- package/cpp/ggml-metal/ggml-metal.metal +404 -34
- package/cpp/ggml.c +110 -31
- package/cpp/ggml.h +51 -12
- package/cpp/jsi/RNWhisperJSI.cpp +1 -0
- package/cpp/whisper.cpp +17 -4
- package/ios/CMakeLists.txt +21 -1
- package/ios/RNWhisperContext.mm +5 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/jest-mock.js +2 -0
- package/lib/commonjs/jest-mock.js.map +1 -1
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +156 -12
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/jest-mock.js +2 -0
- package/lib/module/jest-mock.js.map +1 -1
- package/lib/module/realtime-transcription/RealtimeTranscriber.js +155 -12
- package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +29 -0
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/types.d.ts +7 -0
- package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +1 -0
- package/src/jest-mock.ts +2 -0
- package/src/realtime-transcription/RealtimeTranscriber.ts +179 -9
- package/src/realtime-transcription/types.ts +9 -0
- package/src/version.json +1 -1
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#import "ggml-metal-device.h"
|
|
2
2
|
|
|
3
3
|
#import "ggml-impl.h"
|
|
4
|
-
#import "ggml-threading.h"
|
|
5
4
|
|
|
6
5
|
#include <Foundation/Foundation.h>
|
|
7
6
|
|
|
@@ -75,14 +74,6 @@ void wsp_ggml_metal_cv_set_bool(wsp_ggml_metal_cv_t cv, bool value, int32_t idx)
|
|
|
75
74
|
|
|
76
75
|
struct wsp_ggml_metal_pipeline {
|
|
77
76
|
id<MTLComputePipelineState> obj;
|
|
78
|
-
|
|
79
|
-
// suggested dispatch sizes
|
|
80
|
-
int nsg;
|
|
81
|
-
|
|
82
|
-
int nr0;
|
|
83
|
-
int nr1;
|
|
84
|
-
|
|
85
|
-
size_t smem;
|
|
86
77
|
};
|
|
87
78
|
|
|
88
79
|
wsp_ggml_metal_pipeline_t wsp_ggml_metal_pipeline_init(void) {
|
|
@@ -90,10 +81,6 @@ wsp_ggml_metal_pipeline_t wsp_ggml_metal_pipeline_init(void) {
|
|
|
90
81
|
|
|
91
82
|
*res = (struct wsp_ggml_metal_pipeline) {
|
|
92
83
|
/*.obj =*/ nil,
|
|
93
|
-
/*.nsg =*/ 0,
|
|
94
|
-
/*.nr0 =*/ 0,
|
|
95
|
-
/*.nr1 =*/ 0,
|
|
96
|
-
/*.smem =*/ 0,
|
|
97
84
|
};
|
|
98
85
|
|
|
99
86
|
return res;
|
|
@@ -105,40 +92,8 @@ void wsp_ggml_metal_pipeline_free(wsp_ggml_metal_pipeline_t pipeline) {
|
|
|
105
92
|
free(pipeline);
|
|
106
93
|
}
|
|
107
94
|
|
|
108
|
-
|
|
109
|
-
pipeline->
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
int wsp_ggml_metal_pipeline_get_nsg(wsp_ggml_metal_pipeline_t pipeline) {
|
|
113
|
-
return pipeline->nsg;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
void wsp_ggml_metal_pipeline_set_nr0(wsp_ggml_metal_pipeline_t pipeline, int nr0) {
|
|
117
|
-
pipeline->nr0 = nr0;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
int wsp_ggml_metal_pipeline_get_nr0(wsp_ggml_metal_pipeline_t pipeline) {
|
|
121
|
-
return pipeline->nr0;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
void wsp_ggml_metal_pipeline_set_nr1(wsp_ggml_metal_pipeline_t pipeline, int nr1) {
|
|
125
|
-
pipeline->nr1 = nr1;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
int wsp_ggml_metal_pipeline_get_nr1(wsp_ggml_metal_pipeline_t pipeline) {
|
|
129
|
-
return pipeline->nr1;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
void wsp_ggml_metal_pipeline_set_smem(wsp_ggml_metal_pipeline_t pipeline, size_t smem) {
|
|
133
|
-
pipeline->smem = smem;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
size_t wsp_ggml_metal_pipeline_get_smem(wsp_ggml_metal_pipeline_t pipeline) {
|
|
137
|
-
return pipeline->smem;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
int wsp_ggml_metal_pipeline_max_theads_per_threadgroup(wsp_ggml_metal_pipeline_t pipeline) {
|
|
141
|
-
return pipeline->obj.maxTotalThreadsPerThreadgroup;
|
|
95
|
+
int wsp_ggml_metal_pipeline_max_theads_per_threadgroup(struct wsp_ggml_metal_pipeline_with_params pipeline) {
|
|
96
|
+
return pipeline.pipeline->obj.maxTotalThreadsPerThreadgroup;
|
|
142
97
|
}
|
|
143
98
|
|
|
144
99
|
struct wsp_ggml_metal_library {
|
|
@@ -146,6 +101,8 @@ struct wsp_ggml_metal_library {
|
|
|
146
101
|
id<MTLDevice> device;
|
|
147
102
|
|
|
148
103
|
wsp_ggml_metal_pipelines_t pipelines; // cache of compiled pipelines
|
|
104
|
+
|
|
105
|
+
NSLock * lock;
|
|
149
106
|
};
|
|
150
107
|
|
|
151
108
|
wsp_ggml_metal_library_t wsp_ggml_metal_library_init(wsp_ggml_metal_device_t dev) {
|
|
@@ -296,9 +253,10 @@ wsp_ggml_metal_library_t wsp_ggml_metal_library_init(wsp_ggml_metal_device_t dev
|
|
|
296
253
|
|
|
297
254
|
wsp_ggml_metal_library_t res = calloc(1, sizeof(struct wsp_ggml_metal_library));
|
|
298
255
|
|
|
299
|
-
res->obj
|
|
300
|
-
res->device
|
|
256
|
+
res->obj = library;
|
|
257
|
+
res->device = device;
|
|
301
258
|
res->pipelines = wsp_ggml_metal_pipelines_init();
|
|
259
|
+
res->lock = [NSLock new];
|
|
302
260
|
|
|
303
261
|
return res;
|
|
304
262
|
}
|
|
@@ -365,6 +323,7 @@ wsp_ggml_metal_library_t wsp_ggml_metal_library_init_from_source(wsp_ggml_metal_
|
|
|
365
323
|
res->obj = library;
|
|
366
324
|
res->device = device;
|
|
367
325
|
res->pipelines = wsp_ggml_metal_pipelines_init();
|
|
326
|
+
res->lock = [NSLock new];
|
|
368
327
|
|
|
369
328
|
return res;
|
|
370
329
|
}
|
|
@@ -380,26 +339,47 @@ void wsp_ggml_metal_library_free(wsp_ggml_metal_library_t lib) {
|
|
|
380
339
|
|
|
381
340
|
wsp_ggml_metal_pipelines_free(lib->pipelines);
|
|
382
341
|
|
|
342
|
+
[lib->lock release];
|
|
343
|
+
|
|
383
344
|
free(lib);
|
|
384
345
|
}
|
|
385
346
|
|
|
386
|
-
|
|
387
|
-
|
|
347
|
+
struct wsp_ggml_metal_pipeline_with_params wsp_ggml_metal_library_get_pipeline(wsp_ggml_metal_library_t lib, const char * name) {
|
|
348
|
+
[lib->lock lock];
|
|
349
|
+
|
|
350
|
+
struct wsp_ggml_metal_pipeline_with_params res = {
|
|
351
|
+
/*.pipeline =*/ nil,
|
|
352
|
+
/*.nr0 =*/ 0,
|
|
353
|
+
/*.nr1 =*/ 0,
|
|
354
|
+
/*.nsg =*/ 0,
|
|
355
|
+
/*.smem =*/ 0,
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
res.pipeline = wsp_ggml_metal_pipelines_get(lib->pipelines, name);
|
|
359
|
+
|
|
360
|
+
[lib->lock unlock];
|
|
361
|
+
|
|
362
|
+
return res;
|
|
388
363
|
}
|
|
389
364
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
365
|
+
struct wsp_ggml_metal_pipeline_with_params wsp_ggml_metal_library_compile_pipeline(wsp_ggml_metal_library_t lib, const char * base, const char * name, wsp_ggml_metal_cv_t cv) {
|
|
366
|
+
struct wsp_ggml_metal_pipeline_with_params res = {
|
|
367
|
+
/*.pipeline =*/ nil,
|
|
368
|
+
/*.nr0 =*/ 0,
|
|
369
|
+
/*.nr1 =*/ 0,
|
|
370
|
+
/*.nsg =*/ 0,
|
|
371
|
+
/*.smem =*/ 0,
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
[lib->lock lock];
|
|
393
375
|
|
|
394
|
-
|
|
395
|
-
if (res) {
|
|
396
|
-
|
|
376
|
+
res.pipeline = wsp_ggml_metal_pipelines_get(lib->pipelines, name);
|
|
377
|
+
if (res.pipeline) {
|
|
378
|
+
[lib->lock unlock];
|
|
397
379
|
|
|
398
380
|
return res;
|
|
399
381
|
}
|
|
400
382
|
|
|
401
|
-
res = wsp_ggml_metal_pipeline_init();
|
|
402
|
-
|
|
403
383
|
@autoreleasepool {
|
|
404
384
|
NSError * error = nil;
|
|
405
385
|
|
|
@@ -414,36 +394,53 @@ wsp_ggml_metal_pipeline_t wsp_ggml_metal_library_compile_pipeline(wsp_ggml_metal
|
|
|
414
394
|
mtl_function = [lib->obj newFunctionWithName:base_func constantValues:cv->obj error:&error];
|
|
415
395
|
}
|
|
416
396
|
if (!mtl_function) {
|
|
417
|
-
|
|
397
|
+
[lib->lock unlock];
|
|
418
398
|
|
|
419
399
|
WSP_GGML_LOG_ERROR("%s: failed to compile pipeline: base = '%s', name = '%s'\n", __func__, base, name);
|
|
420
400
|
if (error) {
|
|
421
401
|
WSP_GGML_LOG_ERROR("%s: %s\n", __func__, [[error description] UTF8String]);
|
|
422
402
|
}
|
|
423
403
|
|
|
424
|
-
return
|
|
404
|
+
return res;
|
|
425
405
|
}
|
|
426
406
|
|
|
427
|
-
|
|
407
|
+
id<MTLComputePipelineState> obj = [lib->device newComputePipelineStateWithFunction:mtl_function error:&error];
|
|
428
408
|
|
|
429
409
|
[mtl_function release];
|
|
430
410
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
411
|
+
if (!obj) {
|
|
412
|
+
[lib->lock unlock];
|
|
413
|
+
|
|
414
|
+
WSP_GGML_LOG_ERROR("%s: failed to create pipeline state: base = '%s', name = '%s'\n", __func__, base, name);
|
|
415
|
+
if (error) {
|
|
416
|
+
WSP_GGML_LOG_ERROR("%s: %s\n", __func__, [[error description] UTF8String]);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return res;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
WSP_GGML_LOG_DEBUG("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, name,
|
|
423
|
+
(void *) obj,
|
|
424
|
+
(int) obj.maxTotalThreadsPerThreadgroup,
|
|
425
|
+
(int) obj.threadExecutionWidth);
|
|
434
426
|
|
|
435
|
-
if (
|
|
436
|
-
|
|
427
|
+
if (obj.maxTotalThreadsPerThreadgroup == 0 || obj.threadExecutionWidth == 0) {
|
|
428
|
+
[obj release];
|
|
429
|
+
|
|
430
|
+
[lib->lock unlock];
|
|
437
431
|
|
|
438
432
|
WSP_GGML_LOG_ERROR("%s: incompatible pipeline %s\n", __func__, name);
|
|
439
433
|
|
|
440
|
-
return
|
|
434
|
+
return res;
|
|
441
435
|
}
|
|
442
436
|
|
|
443
|
-
|
|
437
|
+
res.pipeline = wsp_ggml_metal_pipeline_init();
|
|
438
|
+
res.pipeline->obj = obj;
|
|
439
|
+
|
|
440
|
+
wsp_ggml_metal_pipelines_add(lib->pipelines, name, res.pipeline);
|
|
444
441
|
}
|
|
445
442
|
|
|
446
|
-
|
|
443
|
+
[lib->lock unlock];
|
|
447
444
|
|
|
448
445
|
return res;
|
|
449
446
|
}
|
|
@@ -485,8 +482,8 @@ void wsp_ggml_metal_encoder_debug_group_pop (wsp_ggml_metal_encoder_t encoder) {
|
|
|
485
482
|
[encoder->obj popDebugGroup];
|
|
486
483
|
}
|
|
487
484
|
|
|
488
|
-
void wsp_ggml_metal_encoder_set_pipeline(wsp_ggml_metal_encoder_t encoder,
|
|
489
|
-
[encoder->obj setComputePipelineState:pipeline->obj];
|
|
485
|
+
void wsp_ggml_metal_encoder_set_pipeline(wsp_ggml_metal_encoder_t encoder, struct wsp_ggml_metal_pipeline_with_params pipeline) {
|
|
486
|
+
[encoder->obj setComputePipelineState:pipeline.pipeline->obj];
|
|
490
487
|
}
|
|
491
488
|
|
|
492
489
|
void wsp_ggml_metal_encoder_set_bytes(wsp_ggml_metal_encoder_t encoder, void * data, size_t size, int idx) {
|
|
@@ -521,11 +518,106 @@ struct wsp_ggml_metal_device {
|
|
|
521
518
|
// ref: https://github.com/ggml-org/llama.cpp/pull/15906
|
|
522
519
|
id<MTLCommandQueue> mtl_queue;
|
|
523
520
|
|
|
521
|
+
wsp_ggml_metal_rsets_t rsets;
|
|
522
|
+
|
|
524
523
|
wsp_ggml_metal_library_t library;
|
|
525
524
|
|
|
526
525
|
struct wsp_ggml_metal_device_props props;
|
|
527
526
|
};
|
|
528
527
|
|
|
528
|
+
//
|
|
529
|
+
// MTLResidenceSet wrapper
|
|
530
|
+
//
|
|
531
|
+
|
|
532
|
+
struct wsp_ggml_metal_rsets {
|
|
533
|
+
NSLock * lock;
|
|
534
|
+
|
|
535
|
+
NSMutableArray * data;
|
|
536
|
+
|
|
537
|
+
// number of seconds since the last graph computation
|
|
538
|
+
// keep the residency sets wired for that amount of time to avoid being collected by the OS
|
|
539
|
+
int keep_alive_s;
|
|
540
|
+
|
|
541
|
+
// background heartbeat thread to keep the residency sets alive
|
|
542
|
+
atomic_bool d_stop;
|
|
543
|
+
atomic_int d_loop;
|
|
544
|
+
|
|
545
|
+
dispatch_group_t d_group;
|
|
546
|
+
};
|
|
547
|
+
|
|
548
|
+
wsp_ggml_metal_rsets_t wsp_ggml_metal_rsets_init(void) {
|
|
549
|
+
wsp_ggml_metal_rsets_t res = calloc(1, sizeof(struct wsp_ggml_metal_rsets));
|
|
550
|
+
|
|
551
|
+
res->lock = [[NSLock alloc] init];
|
|
552
|
+
res->data = [[NSMutableArray alloc] init];
|
|
553
|
+
|
|
554
|
+
// by default keep the memory wired for 3 minutes
|
|
555
|
+
res->keep_alive_s = 3*60;
|
|
556
|
+
|
|
557
|
+
const char * WSP_GGML_METAL_RESIDENCY_KEEP_ALIVE_S = getenv("WSP_GGML_METAL_RESIDENCY_KEEP_ALIVE_S");
|
|
558
|
+
if (WSP_GGML_METAL_RESIDENCY_KEEP_ALIVE_S) {
|
|
559
|
+
res->keep_alive_s = atoi(WSP_GGML_METAL_RESIDENCY_KEEP_ALIVE_S);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
if (res->keep_alive_s <= 0) {
|
|
563
|
+
res->keep_alive_s = 3*60;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
WSP_GGML_LOG_INFO("%s: creating a residency set collection (keep_alive = %d s)\n", __func__, res->keep_alive_s);
|
|
567
|
+
|
|
568
|
+
atomic_store_explicit(&res->d_stop, false, memory_order_relaxed);
|
|
569
|
+
atomic_store_explicit(&res->d_loop, 2*res->keep_alive_s, memory_order_relaxed);
|
|
570
|
+
|
|
571
|
+
res->d_group = dispatch_group_create();
|
|
572
|
+
|
|
573
|
+
// start a background thread that periodically requests residency for all the currently active sets in the collection
|
|
574
|
+
// the requests stop after a certain amount of time (keep_alive_s) of inactivity
|
|
575
|
+
dispatch_queue_t d_queue = dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0);
|
|
576
|
+
dispatch_group_async(res->d_group, d_queue, ^{
|
|
577
|
+
#if defined(WSP_GGML_METAL_HAS_RESIDENCY_SETS)
|
|
578
|
+
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {
|
|
579
|
+
while (!atomic_load_explicit(&res->d_stop, memory_order_relaxed)) {
|
|
580
|
+
if (atomic_load_explicit(&res->d_loop, memory_order_relaxed) > 0) {
|
|
581
|
+
[res->lock lock];
|
|
582
|
+
|
|
583
|
+
for (int i = 0; i < (int) res->data.count; ++i) {
|
|
584
|
+
[res->data[i] requestResidency];
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
atomic_fetch_sub_explicit(&res->d_loop, 1, memory_order_relaxed);
|
|
588
|
+
|
|
589
|
+
[res->lock unlock];
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// half a second
|
|
593
|
+
usleep(500 * 1000);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
#endif
|
|
597
|
+
});
|
|
598
|
+
|
|
599
|
+
return res;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
void wsp_ggml_metal_rsets_free(wsp_ggml_metal_rsets_t rsets) {
|
|
603
|
+
if (rsets == NULL) {
|
|
604
|
+
return;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// note: if you hit this assert, most likely you haven't deallocated all Metal resources before exiting
|
|
608
|
+
WSP_GGML_ASSERT([rsets->data count] == 0);
|
|
609
|
+
|
|
610
|
+
atomic_store_explicit(&rsets->d_stop, true, memory_order_relaxed);
|
|
611
|
+
|
|
612
|
+
dispatch_group_wait(rsets->d_group, DISPATCH_TIME_FOREVER);
|
|
613
|
+
dispatch_release(rsets->d_group);
|
|
614
|
+
|
|
615
|
+
[rsets->data release];
|
|
616
|
+
[rsets->lock release];
|
|
617
|
+
|
|
618
|
+
free(rsets);
|
|
619
|
+
}
|
|
620
|
+
|
|
529
621
|
wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
530
622
|
wsp_ggml_metal_device_t dev = calloc(1, sizeof(struct wsp_ggml_metal_device));
|
|
531
623
|
|
|
@@ -611,8 +703,8 @@ wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
|
611
703
|
WSP_GGML_LOG_WARN("%s: - the tensor API is not supported in this environment - disabling\n", __func__);
|
|
612
704
|
dev->props.has_tensor = false;
|
|
613
705
|
} else {
|
|
614
|
-
|
|
615
|
-
if (!ppl) {
|
|
706
|
+
struct wsp_ggml_metal_pipeline_with_params ppl = wsp_ggml_metal_library_compile_pipeline(lib, "dummy_kernel", "dummy_kernel", nil);
|
|
707
|
+
if (!ppl.pipeline) {
|
|
616
708
|
WSP_GGML_LOG_WARN("%s: - the tensor API is not supported in this environment - disabling\n", __func__);
|
|
617
709
|
dev->props.has_tensor = false;
|
|
618
710
|
}
|
|
@@ -661,8 +753,8 @@ wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
|
661
753
|
WSP_GGML_LOG_WARN("%s: - the tensor API does not support bfloat - disabling bfloat support\n", __func__);
|
|
662
754
|
dev->props.has_bfloat = false;
|
|
663
755
|
} else {
|
|
664
|
-
|
|
665
|
-
if (!ppl) {
|
|
756
|
+
struct wsp_ggml_metal_pipeline_with_params ppl = wsp_ggml_metal_library_compile_pipeline(lib, "dummy_kernel", "dummy_kernel", nil);
|
|
757
|
+
if (!ppl.pipeline) {
|
|
666
758
|
WSP_GGML_LOG_WARN("%s: - the tensor API does not support bfloat - disabling bfloat support\n", __func__);
|
|
667
759
|
dev->props.has_bfloat = false;
|
|
668
760
|
}
|
|
@@ -677,12 +769,21 @@ wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
|
677
769
|
#endif
|
|
678
770
|
|
|
679
771
|
dev->props.use_shared_buffers = dev->props.has_unified_memory;
|
|
772
|
+
#if TARGET_OS_OSX
|
|
773
|
+
// In case of eGPU, shared memory may be preferable.
|
|
774
|
+
dev->props.use_shared_buffers |= [dev->mtl_device location] == MTLDeviceLocationExternal;
|
|
775
|
+
#endif
|
|
680
776
|
if (getenv("WSP_GGML_METAL_SHARED_BUFFERS_DISABLE") != NULL) {
|
|
681
777
|
dev->props.use_shared_buffers = false;
|
|
682
778
|
}
|
|
779
|
+
if (getenv("WSP_GGML_METAL_SHARED_BUFFERS_ENABLE") != NULL) {
|
|
780
|
+
dev->props.use_shared_buffers = true;
|
|
781
|
+
}
|
|
683
782
|
|
|
684
783
|
dev->props.supports_gpu_family_apple7 = [dev->mtl_device supportsFamily:MTLGPUFamilyApple7];
|
|
685
784
|
|
|
785
|
+
dev->props.op_offload_min_batch_size = getenv("WSP_GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("WSP_GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
|
|
786
|
+
|
|
686
787
|
dev->props.max_buffer_size = dev->mtl_device.maxBufferLength;
|
|
687
788
|
dev->props.max_working_set_size = dev->mtl_device.recommendedMaxWorkingSetSize;
|
|
688
789
|
dev->props.max_theadgroup_memory_size = dev->mtl_device.maxThreadgroupMemoryLength;
|
|
@@ -694,7 +795,11 @@ wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
|
694
795
|
WSP_GGML_LOG_ERROR("%s: error: failed to create library\n", __func__);
|
|
695
796
|
}
|
|
696
797
|
|
|
697
|
-
|
|
798
|
+
if (dev->props.use_residency_sets) {
|
|
799
|
+
dev->rsets = wsp_ggml_metal_rsets_init();
|
|
800
|
+
} else {
|
|
801
|
+
dev->rsets = nil;
|
|
802
|
+
}
|
|
698
803
|
|
|
699
804
|
// print MTL GPU family:
|
|
700
805
|
WSP_GGML_LOG_INFO("%s: GPU name: %s\n", __func__, dev->props.name);
|
|
@@ -747,6 +852,8 @@ wsp_ggml_metal_device_t wsp_ggml_metal_device_init(void) {
|
|
|
747
852
|
void wsp_ggml_metal_device_free(wsp_ggml_metal_device_t dev) {
|
|
748
853
|
assert(dev != NULL);
|
|
749
854
|
|
|
855
|
+
wsp_ggml_metal_rsets_free(dev->rsets);
|
|
856
|
+
|
|
750
857
|
wsp_ggml_metal_library_free(dev->library);
|
|
751
858
|
dev->library = NULL;
|
|
752
859
|
|
|
@@ -775,6 +882,42 @@ wsp_ggml_metal_library_t wsp_ggml_metal_device_get_library(wsp_ggml_metal_device
|
|
|
775
882
|
return dev->library;
|
|
776
883
|
}
|
|
777
884
|
|
|
885
|
+
void wsp_ggml_metal_device_rsets_add(wsp_ggml_metal_device_t dev, wsp_ggml_metal_rset_t rset) {
|
|
886
|
+
if (rset == nil) {
|
|
887
|
+
return;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
WSP_GGML_ASSERT(dev->rsets);
|
|
891
|
+
|
|
892
|
+
[dev->rsets->lock lock];
|
|
893
|
+
|
|
894
|
+
[dev->rsets->data addObject:rset];
|
|
895
|
+
|
|
896
|
+
[dev->rsets->lock unlock];
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
void wsp_ggml_metal_device_rsets_rm(wsp_ggml_metal_device_t dev, wsp_ggml_metal_rset_t rset) {
|
|
900
|
+
if (rset == nil) {
|
|
901
|
+
return;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
WSP_GGML_ASSERT(dev->rsets);
|
|
905
|
+
|
|
906
|
+
[dev->rsets->lock lock];
|
|
907
|
+
|
|
908
|
+
[dev->rsets->data removeObject:rset];
|
|
909
|
+
|
|
910
|
+
[dev->rsets->lock unlock];
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
void wsp_ggml_metal_device_rsets_keep_alive(wsp_ggml_metal_device_t dev) {
|
|
914
|
+
if (dev->rsets == NULL) {
|
|
915
|
+
return;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
atomic_store_explicit(&dev->rsets->d_loop, 2*dev->rsets->keep_alive_s, memory_order_relaxed);
|
|
919
|
+
}
|
|
920
|
+
|
|
778
921
|
void wsp_ggml_metal_device_get_memory(wsp_ggml_metal_device_t dev, size_t * free, size_t * total) {
|
|
779
922
|
if (@available(macOS 10.12, iOS 16.0, *)) {
|
|
780
923
|
*total = dev->mtl_device.recommendedMaxWorkingSetSize;
|
|
@@ -820,6 +963,8 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
820
963
|
case WSP_GGML_UNARY_OP_HARDSWISH:
|
|
821
964
|
case WSP_GGML_UNARY_OP_HARDSIGMOID:
|
|
822
965
|
case WSP_GGML_UNARY_OP_EXP:
|
|
966
|
+
case WSP_GGML_UNARY_OP_SOFTPLUS:
|
|
967
|
+
case WSP_GGML_UNARY_OP_EXPM1:
|
|
823
968
|
return wsp_ggml_is_contiguous(op->src[0]) && op->src[0]->type == WSP_GGML_TYPE_F32;
|
|
824
969
|
default:
|
|
825
970
|
return false;
|
|
@@ -852,6 +997,7 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
852
997
|
case WSP_GGML_OP_ACC:
|
|
853
998
|
case WSP_GGML_OP_REPEAT:
|
|
854
999
|
case WSP_GGML_OP_SCALE:
|
|
1000
|
+
case WSP_GGML_OP_FILL:
|
|
855
1001
|
case WSP_GGML_OP_CONV_TRANSPOSE_1D:
|
|
856
1002
|
return true;
|
|
857
1003
|
case WSP_GGML_OP_CONV_TRANSPOSE_2D:
|
|
@@ -869,6 +1015,8 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
869
1015
|
return wsp_ggml_is_contiguous(op->src[0]) && op->src[0]->type == WSP_GGML_TYPE_F32;
|
|
870
1016
|
case WSP_GGML_OP_SUM:
|
|
871
1017
|
return has_simdgroup_reduction && wsp_ggml_is_contiguous(op->src[0]);
|
|
1018
|
+
case WSP_GGML_OP_TRI:
|
|
1019
|
+
return wsp_ggml_is_contiguous_rows(op->src[0]);
|
|
872
1020
|
case WSP_GGML_OP_SUM_ROWS:
|
|
873
1021
|
case WSP_GGML_OP_CUMSUM:
|
|
874
1022
|
case WSP_GGML_OP_MEAN:
|
|
@@ -877,6 +1025,11 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
877
1025
|
return has_simdgroup_reduction && wsp_ggml_is_contiguous_rows(op->src[0]);
|
|
878
1026
|
case WSP_GGML_OP_L2_NORM:
|
|
879
1027
|
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && wsp_ggml_is_contiguous_1(op->src[0]));
|
|
1028
|
+
case WSP_GGML_OP_COUNT_EQUAL:
|
|
1029
|
+
return has_simdgroup_reduction &&
|
|
1030
|
+
op->src[0]->type == WSP_GGML_TYPE_I32 &&
|
|
1031
|
+
op->src[1]->type == WSP_GGML_TYPE_I32 &&
|
|
1032
|
+
op->type == WSP_GGML_TYPE_I64;
|
|
880
1033
|
case WSP_GGML_OP_ARGMAX:
|
|
881
1034
|
return has_simdgroup_reduction;
|
|
882
1035
|
case WSP_GGML_OP_NORM:
|
|
@@ -894,10 +1047,15 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
894
1047
|
case WSP_GGML_OP_POOL_1D:
|
|
895
1048
|
return false;
|
|
896
1049
|
case WSP_GGML_OP_UPSCALE:
|
|
897
|
-
return op->src[0]->type == WSP_GGML_TYPE_F32 && op->op_params[0] == WSP_GGML_SCALE_MODE_NEAREST;
|
|
1050
|
+
return op->src[0]->type == WSP_GGML_TYPE_F32 && op->op_params[0] == WSP_GGML_SCALE_MODE_NEAREST && !(op->op_params[0] & WSP_GGML_SCALE_FLAG_ANTIALIAS);
|
|
898
1051
|
case WSP_GGML_OP_POOL_2D:
|
|
899
1052
|
return op->src[0]->type == WSP_GGML_TYPE_F32;
|
|
900
1053
|
case WSP_GGML_OP_PAD:
|
|
1054
|
+
// TODO: add circular padding support for metal, see https://github.com/ggml-org/llama.cpp/pull/16985
|
|
1055
|
+
if (wsp_ggml_get_op_params_i32(op, 8) != 0) {
|
|
1056
|
+
return false;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
901
1059
|
return (wsp_ggml_get_op_params_i32(op, 0) == 0) && (wsp_ggml_get_op_params_i32(op, 2) == 0) &&
|
|
902
1060
|
(wsp_ggml_get_op_params_i32(op, 4) == 0) && (wsp_ggml_get_op_params_i32(op, 6) == 0);
|
|
903
1061
|
case WSP_GGML_OP_PAD_REFLECT_1D:
|
|
@@ -905,12 +1063,14 @@ bool wsp_ggml_metal_device_supports_op(wsp_ggml_metal_device_t dev, const struct
|
|
|
905
1063
|
case WSP_GGML_OP_LEAKY_RELU:
|
|
906
1064
|
return op->src[0]->type == WSP_GGML_TYPE_F32;
|
|
907
1065
|
case WSP_GGML_OP_ARGSORT:
|
|
1066
|
+
case WSP_GGML_OP_TOP_K:
|
|
908
1067
|
case WSP_GGML_OP_ARANGE:
|
|
909
1068
|
return true;
|
|
910
1069
|
case WSP_GGML_OP_FLASH_ATTN_EXT:
|
|
911
1070
|
// for new head sizes, add checks here
|
|
912
1071
|
if (op->src[0]->ne[0] != 32 &&
|
|
913
1072
|
op->src[0]->ne[0] != 40 &&
|
|
1073
|
+
op->src[0]->ne[0] != 48 &&
|
|
914
1074
|
op->src[0]->ne[0] != 64 &&
|
|
915
1075
|
op->src[0]->ne[0] != 72 &&
|
|
916
1076
|
op->src[0]->ne[0] != 80 &&
|
|
@@ -1061,9 +1221,8 @@ struct wsp_ggml_metal_buffer {
|
|
|
1061
1221
|
// note: cannot use explicity "id<MTLResidencySet>" here because it is not available on certain OSes
|
|
1062
1222
|
id rset;
|
|
1063
1223
|
|
|
1064
|
-
// pointers to global device
|
|
1065
|
-
|
|
1066
|
-
id<MTLCommandQueue> queue;
|
|
1224
|
+
// pointers to global device
|
|
1225
|
+
wsp_ggml_metal_device_t dev;
|
|
1067
1226
|
};
|
|
1068
1227
|
|
|
1069
1228
|
static void wsp_ggml_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
|
|
@@ -1106,7 +1265,7 @@ static bool wsp_ggml_metal_buffer_rset_init(wsp_ggml_metal_buffer_t buf) {
|
|
|
1106
1265
|
desc.initialCapacity = buf->n_buffers;
|
|
1107
1266
|
|
|
1108
1267
|
NSError * error;
|
|
1109
|
-
buf->rset = [buf->
|
|
1268
|
+
buf->rset = [buf->dev->mtl_device newResidencySetWithDescriptor:desc error:&error];
|
|
1110
1269
|
if (error) {
|
|
1111
1270
|
WSP_GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
|
1112
1271
|
[desc release];
|
|
@@ -1167,6 +1326,8 @@ static void * wsp_ggml_metal_host_malloc(size_t n) {
|
|
|
1167
1326
|
wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_init(wsp_ggml_metal_device_t dev, size_t size, bool shared) {
|
|
1168
1327
|
wsp_ggml_metal_buffer_t res = calloc(1, sizeof(struct wsp_ggml_metal_buffer));
|
|
1169
1328
|
|
|
1329
|
+
res->dev = dev;
|
|
1330
|
+
|
|
1170
1331
|
const size_t size_page = sysconf(_SC_PAGESIZE);
|
|
1171
1332
|
|
|
1172
1333
|
size_t size_aligned = size;
|
|
@@ -1191,9 +1352,6 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_init(wsp_ggml_metal_device_t dev,
|
|
|
1191
1352
|
|
|
1192
1353
|
res->owned = true;
|
|
1193
1354
|
|
|
1194
|
-
res->device = wsp_ggml_metal_device_get_obj(dev);
|
|
1195
|
-
res->queue = wsp_ggml_metal_device_get_queue(dev);
|
|
1196
|
-
|
|
1197
1355
|
res->n_buffers = 1;
|
|
1198
1356
|
|
|
1199
1357
|
if (res->all_data != NULL) {
|
|
@@ -1202,12 +1360,12 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_init(wsp_ggml_metal_device_t dev,
|
|
|
1202
1360
|
|
|
1203
1361
|
if (size_aligned > 0) {
|
|
1204
1362
|
if (props_dev->use_shared_buffers && shared) {
|
|
1205
|
-
res->buffers[0].metal = [res->
|
|
1363
|
+
res->buffers[0].metal = [res->dev->mtl_device newBufferWithBytesNoCopy:res->all_data
|
|
1206
1364
|
length:size_aligned
|
|
1207
1365
|
options:MTLResourceStorageModeShared
|
|
1208
1366
|
deallocator:nil];
|
|
1209
1367
|
} else {
|
|
1210
|
-
res->buffers[0].metal = [res->
|
|
1368
|
+
res->buffers[0].metal = [res->dev->mtl_device newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate];
|
|
1211
1369
|
}
|
|
1212
1370
|
}
|
|
1213
1371
|
|
|
@@ -1228,6 +1386,8 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_init(wsp_ggml_metal_device_t dev,
|
|
|
1228
1386
|
return NULL;
|
|
1229
1387
|
}
|
|
1230
1388
|
|
|
1389
|
+
wsp_ggml_metal_device_rsets_add(dev, res->rset);
|
|
1390
|
+
|
|
1231
1391
|
//wsp_ggml_metal_log_allocated_size(device, size_aligned);
|
|
1232
1392
|
|
|
1233
1393
|
return res;
|
|
@@ -1236,6 +1396,8 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_init(wsp_ggml_metal_device_t dev,
|
|
|
1236
1396
|
wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
|
1237
1397
|
wsp_ggml_metal_buffer_t res = calloc(1, sizeof(struct wsp_ggml_metal_buffer));
|
|
1238
1398
|
|
|
1399
|
+
res->dev = dev;
|
|
1400
|
+
|
|
1239
1401
|
res->all_data = ptr;
|
|
1240
1402
|
res->all_size = size;
|
|
1241
1403
|
|
|
@@ -1258,9 +1420,6 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1258
1420
|
size_aligned += (size_page - (size_aligned % size_page));
|
|
1259
1421
|
}
|
|
1260
1422
|
|
|
1261
|
-
res->device = wsp_ggml_metal_device_get_obj(dev);
|
|
1262
|
-
res->queue = wsp_ggml_metal_device_get_queue(dev);
|
|
1263
|
-
|
|
1264
1423
|
const struct wsp_ggml_metal_device_props * props_dev = wsp_ggml_metal_device_get_props(dev);
|
|
1265
1424
|
|
|
1266
1425
|
// the buffer fits into the max buffer size allowed by the device
|
|
@@ -1270,7 +1429,7 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1270
1429
|
res->buffers[res->n_buffers].metal = nil;
|
|
1271
1430
|
|
|
1272
1431
|
if (size_aligned > 0) {
|
|
1273
|
-
res->buffers[res->n_buffers].metal = [res->
|
|
1432
|
+
res->buffers[res->n_buffers].metal = [res->dev->mtl_device newBufferWithBytesNoCopy:ptr length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
|
1274
1433
|
|
|
1275
1434
|
if (res->buffers[res->n_buffers].metal == nil) {
|
|
1276
1435
|
WSP_GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
|
|
@@ -1279,7 +1438,7 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1279
1438
|
}
|
|
1280
1439
|
}
|
|
1281
1440
|
|
|
1282
|
-
wsp_ggml_metal_log_allocated_size(res->
|
|
1441
|
+
wsp_ggml_metal_log_allocated_size(res->dev->mtl_device, size_aligned);
|
|
1283
1442
|
|
|
1284
1443
|
++res->n_buffers;
|
|
1285
1444
|
} else {
|
|
@@ -1297,7 +1456,7 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1297
1456
|
res->buffers[res->n_buffers].metal = nil;
|
|
1298
1457
|
|
|
1299
1458
|
if (size_step_aligned > 0) {
|
|
1300
|
-
res->buffers[res->n_buffers].metal = [res->
|
|
1459
|
+
res->buffers[res->n_buffers].metal = [res->dev->mtl_device newBufferWithBytesNoCopy:(void *) ((uint8_t *) ptr + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
|
1301
1460
|
|
|
1302
1461
|
if (res->buffers[res->n_buffers].metal == nil) {
|
|
1303
1462
|
WSP_GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_step_aligned / 1024.0 / 1024.0);
|
|
@@ -1306,7 +1465,7 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1306
1465
|
}
|
|
1307
1466
|
}
|
|
1308
1467
|
|
|
1309
|
-
wsp_ggml_metal_log_allocated_size(res->
|
|
1468
|
+
wsp_ggml_metal_log_allocated_size(res->dev->mtl_device, size_step_aligned);
|
|
1310
1469
|
|
|
1311
1470
|
if (i + size_step < size) {
|
|
1312
1471
|
WSP_GGML_LOG_INFO("\n");
|
|
@@ -1324,10 +1483,14 @@ wsp_ggml_metal_buffer_t wsp_ggml_metal_buffer_map(wsp_ggml_metal_device_t dev, v
|
|
|
1324
1483
|
return NULL;
|
|
1325
1484
|
}
|
|
1326
1485
|
|
|
1486
|
+
wsp_ggml_metal_device_rsets_add(dev, res->rset);
|
|
1487
|
+
|
|
1327
1488
|
return res;
|
|
1328
1489
|
}
|
|
1329
1490
|
|
|
1330
1491
|
void wsp_ggml_metal_buffer_free(wsp_ggml_metal_buffer_t buf) {
|
|
1492
|
+
wsp_ggml_metal_device_rsets_rm(buf->dev, buf->rset);
|
|
1493
|
+
|
|
1331
1494
|
for (int i = 0; i < buf->n_buffers; i++) {
|
|
1332
1495
|
[buf->buffers[i].metal release];
|
|
1333
1496
|
}
|
|
@@ -1364,8 +1527,7 @@ void wsp_ggml_metal_buffer_memset_tensor(wsp_ggml_metal_buffer_t buf, struct wsp
|
|
|
1364
1527
|
struct wsp_ggml_metal_buffer_id bid_dst = wsp_ggml_metal_buffer_get_id(buf, tensor);
|
|
1365
1528
|
bid_dst.offs += offset;
|
|
1366
1529
|
|
|
1367
|
-
id<
|
|
1368
|
-
id<MTLCommandBuffer> cmd_buf = [queue commandBufferWithUnretainedReferences];
|
|
1530
|
+
id<MTLCommandBuffer> cmd_buf = [buf->dev->mtl_queue commandBufferWithUnretainedReferences];
|
|
1369
1531
|
|
|
1370
1532
|
{
|
|
1371
1533
|
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
|
|
@@ -1391,7 +1553,7 @@ void wsp_ggml_metal_buffer_set_tensor(wsp_ggml_metal_buffer_t buf, struct wsp_gg
|
|
|
1391
1553
|
@autoreleasepool {
|
|
1392
1554
|
// src
|
|
1393
1555
|
void * data_ptr = (void *)(uintptr_t) data; // "const cast" the src data
|
|
1394
|
-
id<MTLBuffer> buf_src = [buf->
|
|
1556
|
+
id<MTLBuffer> buf_src = [buf->dev->mtl_device newBufferWithBytesNoCopy:data_ptr
|
|
1395
1557
|
length:size
|
|
1396
1558
|
options:MTLResourceStorageModeShared
|
|
1397
1559
|
deallocator:nil];
|
|
@@ -1406,8 +1568,7 @@ void wsp_ggml_metal_buffer_set_tensor(wsp_ggml_metal_buffer_t buf, struct wsp_gg
|
|
|
1406
1568
|
// this is alternative to waitUntilCompleted, which should be faster, but don't seem to make much difference
|
|
1407
1569
|
dispatch_semaphore_t completion_semaphore = dispatch_semaphore_create(0);
|
|
1408
1570
|
|
|
1409
|
-
id<
|
|
1410
|
-
id<MTLCommandBuffer> cmd_buf = [queue commandBufferWithUnretainedReferences];
|
|
1571
|
+
id<MTLCommandBuffer> cmd_buf = [buf->dev->mtl_queue commandBufferWithUnretainedReferences];
|
|
1411
1572
|
|
|
1412
1573
|
{
|
|
1413
1574
|
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
|
|
@@ -1449,15 +1610,14 @@ void wsp_ggml_metal_buffer_get_tensor(wsp_ggml_metal_buffer_t buf, const struct
|
|
|
1449
1610
|
bid_src.offs += offset;
|
|
1450
1611
|
|
|
1451
1612
|
// dst
|
|
1452
|
-
id<MTLBuffer> buf_dst = [buf->
|
|
1613
|
+
id<MTLBuffer> buf_dst = [buf->dev->mtl_device newBufferWithBytesNoCopy:data
|
|
1453
1614
|
length:size
|
|
1454
1615
|
options:MTLResourceStorageModeShared
|
|
1455
1616
|
deallocator:nil];
|
|
1456
1617
|
|
|
1457
1618
|
WSP_GGML_ASSERT(buf_dst);
|
|
1458
1619
|
|
|
1459
|
-
id<
|
|
1460
|
-
id<MTLCommandBuffer> cmd_buf = [queue commandBufferWithUnretainedReferences];
|
|
1620
|
+
id<MTLCommandBuffer> cmd_buf = [buf->dev->mtl_queue commandBufferWithUnretainedReferences];
|
|
1461
1621
|
|
|
1462
1622
|
{
|
|
1463
1623
|
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
|
|
@@ -1483,8 +1643,7 @@ void wsp_ggml_metal_buffer_clear(wsp_ggml_metal_buffer_t buf, uint8_t value) {
|
|
|
1483
1643
|
}
|
|
1484
1644
|
|
|
1485
1645
|
@autoreleasepool {
|
|
1486
|
-
id<
|
|
1487
|
-
id<MTLCommandBuffer> cmd_buf = [queue commandBufferWithUnretainedReferences];
|
|
1646
|
+
id<MTLCommandBuffer> cmd_buf = [buf->dev->mtl_queue commandBufferWithUnretainedReferences];
|
|
1488
1647
|
|
|
1489
1648
|
{
|
|
1490
1649
|
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
|