whisper.rn 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
- package/android/src/main/jni.cpp +13 -0
- package/cpp/ggml-alloc.c +78 -26
- package/cpp/ggml-alloc.h +9 -0
- package/cpp/ggml-backend-impl.h +1 -1
- package/cpp/ggml-backend-reg.cpp +19 -3
- package/cpp/ggml-backend.cpp +72 -20
- package/cpp/ggml-backend.h +2 -1
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/cpp/ggml-cpu/arch-fallback.h +50 -2
- package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/cpp/ggml-cpu/ggml-cpu.c +139 -58
- package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/ggml-cpu/ops.cpp +170 -18
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/repack.cpp +531 -5
- package/cpp/ggml-cpu/repack.h +14 -0
- package/cpp/ggml-cpu/simd-mappings.h +16 -18
- package/cpp/ggml-cpu/vec.cpp +41 -1
- package/cpp/ggml-cpu/vec.h +241 -138
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +0 -4
- package/cpp/ggml-metal/ggml-metal-context.m +26 -16
- package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
- package/cpp/ggml-metal/ggml-metal-device.h +87 -65
- package/cpp/ggml-metal/ggml-metal-device.m +263 -104
- package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
- package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
- package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
- package/cpp/ggml-metal/ggml-metal.cpp +6 -5
- package/cpp/ggml-metal/ggml-metal.metal +404 -34
- package/cpp/ggml.c +110 -31
- package/cpp/ggml.h +51 -12
- package/cpp/jsi/RNWhisperJSI.cpp +1 -0
- package/cpp/whisper.cpp +17 -4
- package/ios/CMakeLists.txt +21 -1
- package/ios/RNWhisperContext.mm +5 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/jest-mock.js +2 -0
- package/lib/commonjs/jest-mock.js.map +1 -1
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +156 -12
- package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/jest-mock.js +2 -0
- package/lib/module/jest-mock.js.map +1 -1
- package/lib/module/realtime-transcription/RealtimeTranscriber.js +155 -12
- package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +29 -0
- package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
- package/lib/typescript/realtime-transcription/types.d.ts +7 -0
- package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +1 -0
- package/src/jest-mock.ts +2 -0
- package/src/realtime-transcription/RealtimeTranscriber.ts +179 -9
- package/src/realtime-transcription/types.ts +9 -0
- package/src/version.json +1 -1
package/README.md
CHANGED
|
@@ -98,7 +98,7 @@ Voice Activity Detection allows you to detect speech segments in audio data usin
|
|
|
98
98
|
import { initWhisperVad } from 'whisper.rn'
|
|
99
99
|
|
|
100
100
|
const vadContext = await initWhisperVad({
|
|
101
|
-
filePath: require('./assets/ggml-silero-
|
|
101
|
+
filePath: require('./assets/ggml-silero-v6.2.0.bin'), // VAD model file
|
|
102
102
|
useGpu: true, // Use GPU acceleration (iOS only)
|
|
103
103
|
nThreads: 4, // Number of threads for processing
|
|
104
104
|
})
|
|
@@ -425,6 +425,10 @@ public class WhisperContext {
|
|
|
425
425
|
}
|
|
426
426
|
data.putString("result", builder.toString());
|
|
427
427
|
data.putArray("segments", segments);
|
|
428
|
+
String language = getDetectedLanguage(context);
|
|
429
|
+
if (language != null) {
|
|
430
|
+
data.putString("language", language);
|
|
431
|
+
}
|
|
428
432
|
return data;
|
|
429
433
|
}
|
|
430
434
|
|
|
@@ -556,6 +560,7 @@ public class WhisperContext {
|
|
|
556
560
|
protected static native int getTextSegmentT0(long context, int index);
|
|
557
561
|
protected static native int getTextSegmentT1(long context, int index);
|
|
558
562
|
protected static native boolean getTextSegmentSpeakerTurnNext(long context, int index);
|
|
563
|
+
protected static native String getDetectedLanguage(long context);
|
|
559
564
|
|
|
560
565
|
protected static native void createRealtimeTranscribeJob(
|
|
561
566
|
int job_id,
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -632,6 +632,19 @@ Java_com_rnwhisper_WhisperContext_getTextSegmentSpeakerTurnNext(
|
|
|
632
632
|
return whisper_full_get_segment_speaker_turn_next(context, index);
|
|
633
633
|
}
|
|
634
634
|
|
|
635
|
+
JNIEXPORT jstring JNICALL
|
|
636
|
+
Java_com_rnwhisper_WhisperContext_getDetectedLanguage(
|
|
637
|
+
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
638
|
+
UNUSED(thiz);
|
|
639
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
640
|
+
int lang_id = whisper_full_lang_id(context);
|
|
641
|
+
const char *lang_str = whisper_lang_str(lang_id);
|
|
642
|
+
if (lang_str == nullptr) {
|
|
643
|
+
return nullptr;
|
|
644
|
+
}
|
|
645
|
+
return env->NewStringUTF(lang_str);
|
|
646
|
+
}
|
|
647
|
+
|
|
635
648
|
JNIEXPORT jstring JNICALL
|
|
636
649
|
Java_com_rnwhisper_WhisperContext_bench(
|
|
637
650
|
JNIEnv *env,
|
package/cpp/ggml-alloc.c
CHANGED
|
@@ -25,6 +25,7 @@ static bool wsp_ggml_is_view(const struct wsp_ggml_tensor * t) {
|
|
|
25
25
|
// ops that return true for this function must not use restrict pointers for their backend implementations
|
|
26
26
|
bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
|
|
27
27
|
switch (op) {
|
|
28
|
+
case WSP_GGML_OP_FILL:
|
|
28
29
|
case WSP_GGML_OP_SCALE:
|
|
29
30
|
case WSP_GGML_OP_DIAG_MASK_ZERO:
|
|
30
31
|
case WSP_GGML_OP_DIAG_MASK_INF:
|
|
@@ -311,16 +312,9 @@ static struct buffer_address wsp_ggml_dyn_tallocr_alloc(struct wsp_ggml_dyn_tall
|
|
|
311
312
|
}
|
|
312
313
|
|
|
313
314
|
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
|
314
|
-
static void
|
|
315
|
+
static void wsp_ggml_dyn_tallocr_free_bytes(struct wsp_ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
|
|
315
316
|
size = aligned_offset(NULL, size, alloc->alignment);
|
|
316
317
|
|
|
317
|
-
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
|
318
|
-
__func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
|
|
319
|
-
|
|
320
|
-
#ifdef WSP_GGML_ALLOCATOR_DEBUG
|
|
321
|
-
remove_allocated_tensor(alloc, addr, tensor);
|
|
322
|
-
#endif
|
|
323
|
-
|
|
324
318
|
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
|
325
319
|
|
|
326
320
|
// see if we can merge with an existing block
|
|
@@ -356,8 +350,6 @@ static void wsp_ggml_dyn_tallocr_free_tensor(struct wsp_ggml_dyn_tallocr * alloc
|
|
|
356
350
|
}
|
|
357
351
|
// otherwise, add a new block
|
|
358
352
|
wsp_ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
|
359
|
-
|
|
360
|
-
WSP_GGML_UNUSED(tensor);
|
|
361
353
|
}
|
|
362
354
|
|
|
363
355
|
static void wsp_ggml_dyn_tallocr_reset(struct wsp_ggml_dyn_tallocr * alloc) {
|
|
@@ -602,7 +594,9 @@ static bool wsp_ggml_gallocr_is_own(wsp_ggml_gallocr_t galloc, struct wsp_ggml_t
|
|
|
602
594
|
}
|
|
603
595
|
|
|
604
596
|
static bool wsp_ggml_gallocr_is_allocated(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * t) {
|
|
605
|
-
return t->data != NULL
|
|
597
|
+
return t->data != NULL // tensor data already set externally
|
|
598
|
+
|| t->buffer // tensor on external buffer (but not yet allocated)
|
|
599
|
+
|| wsp_ggml_gallocr_is_own(galloc, t); // tensor will be allocated by galloc
|
|
606
600
|
}
|
|
607
601
|
|
|
608
602
|
// free the extra space at the end if the new tensor is smaller
|
|
@@ -615,13 +609,17 @@ static void wsp_ggml_gallocr_free_extra_space(wsp_ggml_gallocr_t galloc, struct
|
|
|
615
609
|
|
|
616
610
|
WSP_GGML_ASSERT(parent_size >= node_size);
|
|
617
611
|
|
|
612
|
+
// note: we want after the freeing the chunks to continue to be aligned
|
|
613
|
+
struct wsp_ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
|
614
|
+
parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
|
|
615
|
+
node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
|
|
616
|
+
|
|
618
617
|
if (parent_size > node_size) {
|
|
619
|
-
struct wsp_ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
|
620
618
|
struct buffer_address p_addr = p_hn->addr;
|
|
621
619
|
p_addr.offset += node_size;
|
|
622
620
|
size_t extra_size = parent_size - node_size;
|
|
623
621
|
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
|
624
|
-
|
|
622
|
+
wsp_ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
|
|
625
623
|
}
|
|
626
624
|
}
|
|
627
625
|
|
|
@@ -705,7 +703,14 @@ static void wsp_ggml_gallocr_free_node(wsp_ggml_gallocr_t galloc, struct wsp_ggm
|
|
|
705
703
|
struct wsp_ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
|
706
704
|
wsp_ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
|
707
705
|
size_t size = wsp_ggml_backend_buft_get_alloc_size(buft, node);
|
|
708
|
-
|
|
706
|
+
|
|
707
|
+
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
|
708
|
+
__func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
|
|
709
|
+
#ifdef WSP_GGML_ALLOCATOR_DEBUG
|
|
710
|
+
remove_allocated_tensor(alloc, hn->addr, node);
|
|
711
|
+
#endif
|
|
712
|
+
|
|
713
|
+
wsp_ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
|
|
709
714
|
hn->allocated = false;
|
|
710
715
|
}
|
|
711
716
|
|
|
@@ -820,7 +825,8 @@ static void wsp_ggml_gallocr_alloc_graph_impl(wsp_ggml_gallocr_t galloc, struct
|
|
|
820
825
|
}
|
|
821
826
|
}
|
|
822
827
|
|
|
823
|
-
bool
|
|
828
|
+
static bool wsp_ggml_gallocr_reserve_n_impl(
|
|
829
|
+
wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, bool no_alloc) {
|
|
824
830
|
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
|
|
825
831
|
// add 25% margin to avoid hash collisions
|
|
826
832
|
min_hash_size += min_hash_size / 4;
|
|
@@ -921,15 +927,23 @@ bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgrap
|
|
|
921
927
|
}
|
|
922
928
|
if (realloc) {
|
|
923
929
|
#ifndef NDEBUG
|
|
924
|
-
|
|
925
|
-
|
|
930
|
+
{
|
|
931
|
+
size_t cur_size = galloc->buffers[i] ? wsp_ggml_vbuffer_size(galloc->buffers[i]) : 0;
|
|
932
|
+
if (cur_size > 0) {
|
|
933
|
+
WSP_GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n",
|
|
934
|
+
__func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
926
937
|
#endif
|
|
927
|
-
|
|
928
938
|
wsp_ggml_vbuffer_free(galloc->buffers[i]);
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
939
|
+
if (no_alloc) {
|
|
940
|
+
galloc->buffers[i] = NULL;
|
|
941
|
+
} else {
|
|
942
|
+
galloc->buffers[i] = wsp_ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], WSP_GGML_BACKEND_BUFFER_USAGE_COMPUTE);
|
|
943
|
+
if (galloc->buffers[i] == NULL) {
|
|
944
|
+
WSP_GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), new_size);
|
|
945
|
+
return false;
|
|
946
|
+
}
|
|
933
947
|
}
|
|
934
948
|
}
|
|
935
949
|
}
|
|
@@ -937,6 +951,21 @@ bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgrap
|
|
|
937
951
|
return true;
|
|
938
952
|
}
|
|
939
953
|
|
|
954
|
+
void wsp_ggml_gallocr_reserve_n_size(
|
|
955
|
+
wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, size_t * sizes) {
|
|
956
|
+
WSP_GGML_ASSERT(wsp_ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ true));
|
|
957
|
+
for (int i = 0; i < galloc->n_buffers; i++) {
|
|
958
|
+
sizes[i] = 0;
|
|
959
|
+
for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
|
|
960
|
+
sizes[i] += galloc->buf_tallocs[i]->chunks[c]->max_size;
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
|
966
|
+
return wsp_ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ false);
|
|
967
|
+
}
|
|
968
|
+
|
|
940
969
|
bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph *graph) {
|
|
941
970
|
return wsp_ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
|
|
942
971
|
}
|
|
@@ -1139,7 +1168,8 @@ static bool alloc_tensor_range(struct wsp_ggml_context * ctx,
|
|
|
1139
1168
|
return true;
|
|
1140
1169
|
}
|
|
1141
1170
|
|
|
1142
|
-
wsp_ggml_backend_buffer_t
|
|
1171
|
+
static wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(
|
|
1172
|
+
struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
|
|
1143
1173
|
WSP_GGML_ASSERT(wsp_ggml_get_no_alloc(ctx) == true);
|
|
1144
1174
|
|
|
1145
1175
|
size_t alignment = wsp_ggml_backend_buft_get_alignment(buft);
|
|
@@ -1147,6 +1177,7 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
|
|
|
1147
1177
|
|
|
1148
1178
|
wsp_ggml_backend_buffer_t * buffers = NULL;
|
|
1149
1179
|
size_t n_buffers = 0;
|
|
1180
|
+
*nbytes_total = 0;
|
|
1150
1181
|
|
|
1151
1182
|
size_t cur_buf_size = 0;
|
|
1152
1183
|
struct wsp_ggml_tensor * first = wsp_ggml_get_first_tensor(ctx);
|
|
@@ -1158,10 +1189,11 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
|
|
|
1158
1189
|
|
|
1159
1190
|
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
|
|
1160
1191
|
// allocate tensors in the current buffer
|
|
1161
|
-
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
|
1192
|
+
if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
|
1162
1193
|
return NULL;
|
|
1163
1194
|
}
|
|
1164
1195
|
first = t;
|
|
1196
|
+
*nbytes_total += cur_buf_size;
|
|
1165
1197
|
cur_buf_size = this_size;
|
|
1166
1198
|
} else {
|
|
1167
1199
|
cur_buf_size += this_size;
|
|
@@ -1170,15 +1202,21 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
|
|
|
1170
1202
|
|
|
1171
1203
|
// allocate remaining tensors
|
|
1172
1204
|
if (cur_buf_size > 0) {
|
|
1173
|
-
|
|
1205
|
+
*nbytes_total += cur_buf_size;
|
|
1206
|
+
if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
|
|
1174
1207
|
return NULL;
|
|
1175
1208
|
}
|
|
1176
1209
|
}
|
|
1177
1210
|
|
|
1211
|
+
if (no_alloc) {
|
|
1212
|
+
return NULL;
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1178
1215
|
if (n_buffers == 0) {
|
|
1179
1216
|
#ifndef NDEBUG
|
|
1180
1217
|
WSP_GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
|
|
1181
1218
|
#endif
|
|
1219
|
+
WSP_GGML_ASSERT(!buffers);
|
|
1182
1220
|
return NULL;
|
|
1183
1221
|
}
|
|
1184
1222
|
|
|
@@ -1188,10 +1226,24 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
|
|
|
1188
1226
|
} else {
|
|
1189
1227
|
buffer = wsp_ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
|
|
1190
1228
|
}
|
|
1191
|
-
|
|
1229
|
+
if (buffers) {
|
|
1230
|
+
free(buffers); // can be NULL if context is empty or no_alloc
|
|
1231
|
+
}
|
|
1192
1232
|
return buffer;
|
|
1193
1233
|
}
|
|
1194
1234
|
|
|
1235
|
+
size_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft) {
|
|
1236
|
+
size_t nbytes_total = 0;
|
|
1237
|
+
wsp_ggml_backend_buffer_t buf = wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
|
|
1238
|
+
WSP_GGML_ASSERT(!buf);
|
|
1239
|
+
return nbytes_total;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft) {
|
|
1243
|
+
size_t nbytes_total = 0;
|
|
1244
|
+
return wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1195
1247
|
wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend) {
|
|
1196
1248
|
return wsp_ggml_backend_alloc_ctx_tensors_from_buft(ctx, wsp_ggml_backend_get_default_buffer_type(backend));
|
|
1197
1249
|
}
|
package/cpp/ggml-alloc.h
CHANGED
|
@@ -53,7 +53,14 @@ WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
|
|
|
53
53
|
// call with a worst-case graph to avoid buffer reallocations
|
|
54
54
|
// not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
|
55
55
|
// returns false if the buffer allocation failed
|
|
56
|
+
// wsp_ggml_gallocr_resrve_n_size writes the buffer sizes per galloc buffer that would be allocated by wsp_ggml_gallocr_reserve_n to sizes
|
|
56
57
|
WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
|
|
58
|
+
WSP_GGML_API void wsp_ggml_gallocr_reserve_n_size(
|
|
59
|
+
wsp_ggml_gallocr_t galloc,
|
|
60
|
+
struct wsp_ggml_cgraph * graph,
|
|
61
|
+
const int * node_buffer_ids,
|
|
62
|
+
const int * leaf_buffer_ids,
|
|
63
|
+
size_t * sizes);
|
|
57
64
|
WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
|
|
58
65
|
wsp_ggml_gallocr_t galloc,
|
|
59
66
|
struct wsp_ggml_cgraph * graph,
|
|
@@ -68,6 +75,8 @@ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc,
|
|
|
68
75
|
|
|
69
76
|
// Utils
|
|
70
77
|
// Create a buffer and allocate all the tensors in a wsp_ggml_context
|
|
78
|
+
// wsp_ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by wsp_ggml_backend_alloc_ctx_tensors_from_buft
|
|
79
|
+
WSP_GGML_API size_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
|
|
71
80
|
WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
|
|
72
81
|
WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
|
|
73
82
|
|
package/cpp/ggml-backend-impl.h
CHANGED
|
@@ -144,7 +144,7 @@ extern "C" {
|
|
|
144
144
|
// device description: short informative description of the device, could be the model name
|
|
145
145
|
const char * (*get_description)(wsp_ggml_backend_dev_t dev);
|
|
146
146
|
|
|
147
|
-
// device memory in bytes
|
|
147
|
+
// device memory in bytes: 0 bytes to indicate no memory to report
|
|
148
148
|
void (*get_memory)(wsp_ggml_backend_dev_t dev, size_t * free, size_t * total);
|
|
149
149
|
|
|
150
150
|
// device type
|
package/cpp/ggml-backend-reg.cpp
CHANGED
|
@@ -73,6 +73,10 @@
|
|
|
73
73
|
#include "ggml-cann.h"
|
|
74
74
|
#endif
|
|
75
75
|
|
|
76
|
+
#ifdef WSP_GGML_USE_ZENDNN
|
|
77
|
+
#include "ggml-zendnn.h"
|
|
78
|
+
#endif
|
|
79
|
+
|
|
76
80
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
77
81
|
#if defined(__clang__)
|
|
78
82
|
# pragma clang diagnostic push
|
|
@@ -203,6 +207,9 @@ struct wsp_ggml_backend_registry {
|
|
|
203
207
|
#ifdef WSP_GGML_USE_OPENCL
|
|
204
208
|
register_backend(wsp_ggml_backend_opencl_reg());
|
|
205
209
|
#endif
|
|
210
|
+
#ifdef WSP_GGML_USE_ZENDNN
|
|
211
|
+
register_backend(wsp_ggml_backend_zendnn_reg());
|
|
212
|
+
#endif
|
|
206
213
|
#ifdef WSP_GGML_USE_HEXAGON
|
|
207
214
|
register_backend(wsp_ggml_backend_hexagon_reg());
|
|
208
215
|
#endif
|
|
@@ -534,8 +541,12 @@ static wsp_ggml_backend_reg_t wsp_ggml_backend_load_best(const char * name, bool
|
|
|
534
541
|
fs::path best_path;
|
|
535
542
|
|
|
536
543
|
for (const auto & search_path : search_paths) {
|
|
537
|
-
if (!fs::exists(search_path)) {
|
|
538
|
-
|
|
544
|
+
if (std::error_code ec; !fs::exists(search_path, ec)) {
|
|
545
|
+
if (ec) {
|
|
546
|
+
WSP_GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
|
|
547
|
+
} else {
|
|
548
|
+
WSP_GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
|
|
549
|
+
}
|
|
539
550
|
continue;
|
|
540
551
|
}
|
|
541
552
|
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
@@ -575,8 +586,12 @@ static wsp_ggml_backend_reg_t wsp_ggml_backend_load_best(const char * name, bool
|
|
|
575
586
|
for (const auto & search_path : search_paths) {
|
|
576
587
|
fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
|
|
577
588
|
fs::path path = search_path / filename;
|
|
578
|
-
if (fs::exists(path)) {
|
|
589
|
+
if (std::error_code ec; fs::exists(path, ec)) {
|
|
579
590
|
return get_reg().load_backend(path, silent);
|
|
591
|
+
} else {
|
|
592
|
+
if (ec) {
|
|
593
|
+
WSP_GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(path).c_str(), ec.message().c_str());
|
|
594
|
+
}
|
|
580
595
|
}
|
|
581
596
|
}
|
|
582
597
|
return nullptr;
|
|
@@ -597,6 +612,7 @@ void wsp_ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
597
612
|
#endif
|
|
598
613
|
|
|
599
614
|
wsp_ggml_backend_load_best("blas", silent, dir_path);
|
|
615
|
+
wsp_ggml_backend_load_best("zendnn", silent, dir_path);
|
|
600
616
|
wsp_ggml_backend_load_best("cann", silent, dir_path);
|
|
601
617
|
wsp_ggml_backend_load_best("cuda", silent, dir_path);
|
|
602
618
|
wsp_ggml_backend_load_best("hip", silent, dir_path);
|
package/cpp/ggml-backend.cpp
CHANGED
|
@@ -36,12 +36,11 @@ const char * wsp_ggml_backend_buft_name(wsp_ggml_backend_buffer_type_t buft) {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
wsp_ggml_backend_buffer_t wsp_ggml_backend_buft_alloc_buffer(wsp_ggml_backend_buffer_type_t buft, size_t size) {
|
|
39
|
+
WSP_GGML_ASSERT(buft);
|
|
39
40
|
if (size == 0) {
|
|
40
41
|
// return a dummy buffer for zero-sized allocations
|
|
41
42
|
return wsp_ggml_backend_buffer_init(buft, {}, NULL, 0);
|
|
42
43
|
}
|
|
43
|
-
|
|
44
|
-
WSP_GGML_ASSERT(buft);
|
|
45
44
|
return buft->iface.alloc_buffer(buft, size);
|
|
46
45
|
}
|
|
47
46
|
|
|
@@ -128,6 +127,12 @@ void * wsp_ggml_backend_buffer_get_base(wsp_ggml_backend_buffer_t buffer) {
|
|
|
128
127
|
return NULL;
|
|
129
128
|
}
|
|
130
129
|
|
|
130
|
+
// FIXME JG: a multi_buffer has a non-zero size, according to the above comment get_base is not optional,
|
|
131
|
+
// I don't know whether the above comment is correct
|
|
132
|
+
if (!buffer->iface.get_base) {
|
|
133
|
+
return NULL;
|
|
134
|
+
}
|
|
135
|
+
|
|
131
136
|
void * base = buffer->iface.get_base(buffer);
|
|
132
137
|
|
|
133
138
|
WSP_GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
|
|
@@ -723,6 +728,12 @@ struct wsp_ggml_backend_sched {
|
|
|
723
728
|
bool op_offload;
|
|
724
729
|
|
|
725
730
|
int debug;
|
|
731
|
+
|
|
732
|
+
// used for debugging graph reallocations [WSP_GGML_SCHED_DEBUG_REALLOC]
|
|
733
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/17617
|
|
734
|
+
int debug_realloc;
|
|
735
|
+
int debug_graph_size;
|
|
736
|
+
int debug_prev_graph_size;
|
|
726
737
|
};
|
|
727
738
|
|
|
728
739
|
#define hash_id(tensor) wsp_ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
|
@@ -1234,10 +1245,8 @@ void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct w
|
|
|
1234
1245
|
tensor_copy = wsp_ggml_dup_tensor_layout(sched->ctx, src);
|
|
1235
1246
|
wsp_ggml_format_name(tensor_copy, "%s#%s#%d", wsp_ggml_backend_name(backend), src->name, c);
|
|
1236
1247
|
}
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
wsp_ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
|
1240
|
-
}
|
|
1248
|
+
wsp_ggml_set_input(tensor_copy);
|
|
1249
|
+
wsp_ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
|
1241
1250
|
tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
|
|
1242
1251
|
SET_CAUSE(tensor_copy, "4.cpy");
|
|
1243
1252
|
}
|
|
@@ -1289,6 +1298,11 @@ void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct w
|
|
|
1289
1298
|
}
|
|
1290
1299
|
|
|
1291
1300
|
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*WSP_GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
|
|
1301
|
+
|
|
1302
|
+
// remember the actual graph_size for performing reallocation checks later [WSP_GGML_SCHED_DEBUG_REALLOC]
|
|
1303
|
+
sched->debug_prev_graph_size = sched->debug_graph_size;
|
|
1304
|
+
sched->debug_graph_size = graph_size;
|
|
1305
|
+
|
|
1292
1306
|
if (sched->graph.size < graph_size) {
|
|
1293
1307
|
sched->graph.size = graph_size;
|
|
1294
1308
|
sched->graph.nodes = (wsp_ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct wsp_ggml_tensor *));
|
|
@@ -1395,14 +1409,27 @@ static bool wsp_ggml_backend_sched_alloc_splits(wsp_ggml_backend_sched_t sched)
|
|
|
1395
1409
|
|
|
1396
1410
|
// allocate graph
|
|
1397
1411
|
if (backend_ids_changed || !wsp_ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
|
1412
|
+
#ifndef NDEBUG
|
|
1413
|
+
WSP_GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
|
1414
|
+
#endif
|
|
1415
|
+
|
|
1416
|
+
if (sched->debug_realloc > 0) {
|
|
1417
|
+
// we are interested only in situations where the graph was reallocated even though its size remained the same [WSP_GGML_SCHED_DEBUG_REALLOC]
|
|
1418
|
+
// example: https://github.com/ggml-org/llama.cpp/pull/17143
|
|
1419
|
+
const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
|
|
1420
|
+
|
|
1421
|
+
if (unexpected || sched->debug_realloc > 1) {
|
|
1422
|
+
WSP_GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
|
|
1423
|
+
sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1398
1427
|
// the re-allocation may cause the split inputs to be moved to a different address
|
|
1399
1428
|
// synchronize without wsp_ggml_backend_sched_synchronize to avoid changing cur_copy
|
|
1400
1429
|
for (int i = 0; i < sched->n_backends; i++) {
|
|
1401
1430
|
wsp_ggml_backend_synchronize(sched->backends[i]);
|
|
1402
1431
|
}
|
|
1403
|
-
|
|
1404
|
-
WSP_GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
|
1405
|
-
#endif
|
|
1432
|
+
|
|
1406
1433
|
wsp_ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
|
|
1407
1434
|
if (!wsp_ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
|
1408
1435
|
WSP_GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__);
|
|
@@ -1614,6 +1641,14 @@ wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(
|
|
|
1614
1641
|
|
|
1615
1642
|
const char * WSP_GGML_SCHED_DEBUG = getenv("WSP_GGML_SCHED_DEBUG");
|
|
1616
1643
|
sched->debug = WSP_GGML_SCHED_DEBUG ? atoi(WSP_GGML_SCHED_DEBUG) : 0;
|
|
1644
|
+
|
|
1645
|
+
sched->debug_realloc = 0;
|
|
1646
|
+
#ifdef WSP_GGML_SCHED_NO_REALLOC
|
|
1647
|
+
sched->debug_realloc = 1;
|
|
1648
|
+
#endif
|
|
1649
|
+
const char * WSP_GGML_SCHED_DEBUG_REALLOC = getenv("WSP_GGML_SCHED_DEBUG_REALLOC");
|
|
1650
|
+
sched->debug_realloc = WSP_GGML_SCHED_DEBUG_REALLOC ? atoi(WSP_GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
|
|
1651
|
+
|
|
1617
1652
|
sched->n_backends = n_backends;
|
|
1618
1653
|
sched->n_copies = parallel ? WSP_GGML_SCHED_MAX_COPIES : 1;
|
|
1619
1654
|
|
|
@@ -1630,6 +1665,9 @@ wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(
|
|
|
1630
1665
|
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
|
1631
1666
|
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
|
1632
1667
|
|
|
1668
|
+
sched->debug_graph_size = 0;
|
|
1669
|
+
sched->debug_prev_graph_size = 0;
|
|
1670
|
+
|
|
1633
1671
|
sched->context_buffer_size = wsp_ggml_sched_max_splits*WSP_GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct wsp_ggml_tensor) + wsp_ggml_graph_overhead_custom(graph_size, false);
|
|
1634
1672
|
sched->context_buffer = (char *) malloc(sched->context_buffer_size);
|
|
1635
1673
|
|
|
@@ -1694,6 +1732,20 @@ void wsp_ggml_backend_sched_reset(wsp_ggml_backend_sched_t sched) {
|
|
|
1694
1732
|
sched->is_alloc = false;
|
|
1695
1733
|
}
|
|
1696
1734
|
|
|
1735
|
+
void wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes) {
|
|
1736
|
+
WSP_GGML_ASSERT(sched);
|
|
1737
|
+
WSP_GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
|
1738
|
+
WSP_GGML_ASSERT(sizes);
|
|
1739
|
+
|
|
1740
|
+
wsp_ggml_backend_sched_reset(sched);
|
|
1741
|
+
|
|
1742
|
+
wsp_ggml_backend_sched_synchronize(sched);
|
|
1743
|
+
|
|
1744
|
+
wsp_ggml_backend_sched_split_graph(sched, measure_graph);
|
|
1745
|
+
|
|
1746
|
+
wsp_ggml_gallocr_reserve_n_size(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids, sizes);
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1697
1749
|
bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph) {
|
|
1698
1750
|
WSP_GGML_ASSERT(sched);
|
|
1699
1751
|
WSP_GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
|
@@ -2001,7 +2053,7 @@ void wsp_ggml_backend_graph_copy_free(struct wsp_ggml_backend_graph_copy copy) {
|
|
|
2001
2053
|
wsp_ggml_free(copy.ctx_unallocated);
|
|
2002
2054
|
}
|
|
2003
2055
|
|
|
2004
|
-
bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor *
|
|
2056
|
+
bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes) {
|
|
2005
2057
|
struct wsp_ggml_backend_graph_copy copy = wsp_ggml_backend_graph_copy(backend2, graph);
|
|
2006
2058
|
if (copy.buffer == NULL) {
|
|
2007
2059
|
return false;
|
|
@@ -2012,22 +2064,22 @@ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggm
|
|
|
2012
2064
|
|
|
2013
2065
|
assert(g1->n_nodes == g2->n_nodes);
|
|
2014
2066
|
|
|
2015
|
-
if (
|
|
2016
|
-
|
|
2067
|
+
if (num_test_nodes != 0) {
|
|
2068
|
+
WSP_GGML_ASSERT(test_nodes);
|
|
2069
|
+
// Compute the whole graph and only test the output for specific tensors
|
|
2017
2070
|
wsp_ggml_backend_graph_compute(backend1, g1);
|
|
2018
2071
|
wsp_ggml_backend_graph_compute(backend2, g2);
|
|
2019
2072
|
|
|
2020
|
-
|
|
2073
|
+
bool verified = false;
|
|
2021
2074
|
for (int i = 0; i < g1->n_nodes; i++) {
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2075
|
+
for (size_t j = 0; j < num_test_nodes; ++j) {
|
|
2076
|
+
if (g1->nodes[i] == test_nodes[j]) {
|
|
2077
|
+
callback(i, g1->nodes[i], g2->nodes[i], user_data);
|
|
2078
|
+
verified = true;
|
|
2079
|
+
}
|
|
2026
2080
|
}
|
|
2027
2081
|
}
|
|
2028
|
-
WSP_GGML_ASSERT(
|
|
2029
|
-
|
|
2030
|
-
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
2082
|
+
WSP_GGML_ASSERT(verified);
|
|
2031
2083
|
} else {
|
|
2032
2084
|
for (int i = 0; i < g1->n_nodes; i++) {
|
|
2033
2085
|
struct wsp_ggml_tensor * t1 = g1->nodes[i];
|
package/cpp/ggml-backend.h
CHANGED
|
@@ -307,6 +307,7 @@ extern "C" {
|
|
|
307
307
|
WSP_GGML_API void wsp_ggml_backend_sched_free(wsp_ggml_backend_sched_t sched);
|
|
308
308
|
|
|
309
309
|
// Initialize backend buffers from a measure graph
|
|
310
|
+
WSP_GGML_API void wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes);
|
|
310
311
|
WSP_GGML_API bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph); // returns success
|
|
311
312
|
|
|
312
313
|
WSP_GGML_API int wsp_ggml_backend_sched_get_n_backends(wsp_ggml_backend_sched_t sched);
|
|
@@ -357,7 +358,7 @@ extern "C" {
|
|
|
357
358
|
typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
|
|
358
359
|
|
|
359
360
|
// Compare the output of two backends
|
|
360
|
-
WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor *
|
|
361
|
+
WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes);
|
|
361
362
|
|
|
362
363
|
// Tensor initialization
|
|
363
364
|
WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
|