whisper.rn 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +1 -1
  2. package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
  3. package/android/src/main/jni.cpp +13 -0
  4. package/cpp/ggml-alloc.c +78 -26
  5. package/cpp/ggml-alloc.h +9 -0
  6. package/cpp/ggml-backend-impl.h +1 -1
  7. package/cpp/ggml-backend-reg.cpp +19 -3
  8. package/cpp/ggml-backend.cpp +72 -20
  9. package/cpp/ggml-backend.h +2 -1
  10. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  11. package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
  12. package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
  13. package/cpp/ggml-cpu/arch-fallback.h +50 -2
  14. package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
  15. package/cpp/ggml-cpu/ggml-cpu.c +139 -58
  16. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  17. package/cpp/ggml-cpu/ops.cpp +170 -18
  18. package/cpp/ggml-cpu/ops.h +1 -0
  19. package/cpp/ggml-cpu/repack.cpp +531 -5
  20. package/cpp/ggml-cpu/repack.h +14 -0
  21. package/cpp/ggml-cpu/simd-mappings.h +16 -18
  22. package/cpp/ggml-cpu/vec.cpp +41 -1
  23. package/cpp/ggml-cpu/vec.h +241 -138
  24. package/cpp/ggml-cpu.h +1 -0
  25. package/cpp/ggml-impl.h +0 -4
  26. package/cpp/ggml-metal/ggml-metal-context.m +26 -16
  27. package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
  28. package/cpp/ggml-metal/ggml-metal-device.h +87 -65
  29. package/cpp/ggml-metal/ggml-metal-device.m +263 -104
  30. package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
  31. package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
  32. package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
  33. package/cpp/ggml-metal/ggml-metal.cpp +6 -5
  34. package/cpp/ggml-metal/ggml-metal.metal +404 -34
  35. package/cpp/ggml.c +110 -31
  36. package/cpp/ggml.h +51 -12
  37. package/cpp/jsi/RNWhisperJSI.cpp +1 -0
  38. package/cpp/whisper.cpp +17 -4
  39. package/ios/CMakeLists.txt +21 -1
  40. package/ios/RNWhisperContext.mm +5 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  58. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  59. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  66. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  68. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  74. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  75. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  76. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  77. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  78. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  79. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  80. package/lib/commonjs/jest-mock.js +2 -0
  81. package/lib/commonjs/jest-mock.js.map +1 -1
  82. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +156 -12
  83. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  84. package/lib/commonjs/version.json +1 -1
  85. package/lib/module/NativeRNWhisper.js.map +1 -1
  86. package/lib/module/jest-mock.js +2 -0
  87. package/lib/module/jest-mock.js.map +1 -1
  88. package/lib/module/realtime-transcription/RealtimeTranscriber.js +155 -12
  89. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  90. package/lib/module/version.json +1 -1
  91. package/lib/typescript/NativeRNWhisper.d.ts +1 -0
  92. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  93. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +29 -0
  94. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  95. package/lib/typescript/realtime-transcription/types.d.ts +7 -0
  96. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  97. package/package.json +1 -1
  98. package/src/NativeRNWhisper.ts +1 -0
  99. package/src/jest-mock.ts +2 -0
  100. package/src/realtime-transcription/RealtimeTranscriber.ts +179 -9
  101. package/src/realtime-transcription/types.ts +9 -0
  102. package/src/version.json +1 -1
package/README.md CHANGED
@@ -98,7 +98,7 @@ Voice Activity Detection allows you to detect speech segments in audio data usin
98
98
  import { initWhisperVad } from 'whisper.rn'
99
99
 
100
100
  const vadContext = await initWhisperVad({
101
- filePath: require('./assets/ggml-silero-v5.1.2.bin'), // VAD model file
101
+ filePath: require('./assets/ggml-silero-v6.2.0.bin'), // VAD model file
102
102
  useGpu: true, // Use GPU acceleration (iOS only)
103
103
  nThreads: 4, // Number of threads for processing
104
104
  })
@@ -425,6 +425,10 @@ public class WhisperContext {
425
425
  }
426
426
  data.putString("result", builder.toString());
427
427
  data.putArray("segments", segments);
428
+ String language = getDetectedLanguage(context);
429
+ if (language != null) {
430
+ data.putString("language", language);
431
+ }
428
432
  return data;
429
433
  }
430
434
 
@@ -556,6 +560,7 @@ public class WhisperContext {
556
560
  protected static native int getTextSegmentT0(long context, int index);
557
561
  protected static native int getTextSegmentT1(long context, int index);
558
562
  protected static native boolean getTextSegmentSpeakerTurnNext(long context, int index);
563
+ protected static native String getDetectedLanguage(long context);
559
564
 
560
565
  protected static native void createRealtimeTranscribeJob(
561
566
  int job_id,
@@ -632,6 +632,19 @@ Java_com_rnwhisper_WhisperContext_getTextSegmentSpeakerTurnNext(
632
632
  return whisper_full_get_segment_speaker_turn_next(context, index);
633
633
  }
634
634
 
635
+ JNIEXPORT jstring JNICALL
636
+ Java_com_rnwhisper_WhisperContext_getDetectedLanguage(
637
+ JNIEnv *env, jobject thiz, jlong context_ptr) {
638
+ UNUSED(thiz);
639
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
640
+ int lang_id = whisper_full_lang_id(context);
641
+ const char *lang_str = whisper_lang_str(lang_id);
642
+ if (lang_str == nullptr) {
643
+ return nullptr;
644
+ }
645
+ return env->NewStringUTF(lang_str);
646
+ }
647
+
635
648
  JNIEXPORT jstring JNICALL
636
649
  Java_com_rnwhisper_WhisperContext_bench(
637
650
  JNIEnv *env,
package/cpp/ggml-alloc.c CHANGED
@@ -25,6 +25,7 @@ static bool wsp_ggml_is_view(const struct wsp_ggml_tensor * t) {
25
25
  // ops that return true for this function must not use restrict pointers for their backend implementations
26
26
  bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
27
27
  switch (op) {
28
+ case WSP_GGML_OP_FILL:
28
29
  case WSP_GGML_OP_SCALE:
29
30
  case WSP_GGML_OP_DIAG_MASK_ZERO:
30
31
  case WSP_GGML_OP_DIAG_MASK_INF:
@@ -311,16 +312,9 @@ static struct buffer_address wsp_ggml_dyn_tallocr_alloc(struct wsp_ggml_dyn_tall
311
312
  }
312
313
 
313
314
  // this is a very naive implementation, but for our case the number of free blocks should be very small
314
- static void wsp_ggml_dyn_tallocr_free_tensor(struct wsp_ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct wsp_ggml_tensor * tensor) {
315
+ static void wsp_ggml_dyn_tallocr_free_bytes(struct wsp_ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
315
316
  size = aligned_offset(NULL, size, alloc->alignment);
316
317
 
317
- AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
318
- __func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
319
-
320
- #ifdef WSP_GGML_ALLOCATOR_DEBUG
321
- remove_allocated_tensor(alloc, addr, tensor);
322
- #endif
323
-
324
318
  struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
325
319
 
326
320
  // see if we can merge with an existing block
@@ -356,8 +350,6 @@ static void wsp_ggml_dyn_tallocr_free_tensor(struct wsp_ggml_dyn_tallocr * alloc
356
350
  }
357
351
  // otherwise, add a new block
358
352
  wsp_ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
359
-
360
- WSP_GGML_UNUSED(tensor);
361
353
  }
362
354
 
363
355
  static void wsp_ggml_dyn_tallocr_reset(struct wsp_ggml_dyn_tallocr * alloc) {
@@ -602,7 +594,9 @@ static bool wsp_ggml_gallocr_is_own(wsp_ggml_gallocr_t galloc, struct wsp_ggml_t
602
594
  }
603
595
 
604
596
  static bool wsp_ggml_gallocr_is_allocated(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * t) {
605
- return t->data != NULL || wsp_ggml_gallocr_hash_get(galloc, t)->allocated;
597
+ return t->data != NULL // tensor data already set externally
598
+ || t->buffer // tensor on external buffer (but not yet allocated)
599
+ || wsp_ggml_gallocr_is_own(galloc, t); // tensor will be allocated by galloc
606
600
  }
607
601
 
608
602
  // free the extra space at the end if the new tensor is smaller
@@ -615,13 +609,17 @@ static void wsp_ggml_gallocr_free_extra_space(wsp_ggml_gallocr_t galloc, struct
615
609
 
616
610
  WSP_GGML_ASSERT(parent_size >= node_size);
617
611
 
612
+ // note: we want after the freeing the chunks to continue to be aligned
613
+ struct wsp_ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
614
+ parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
615
+ node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
616
+
618
617
  if (parent_size > node_size) {
619
- struct wsp_ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
620
618
  struct buffer_address p_addr = p_hn->addr;
621
619
  p_addr.offset += node_size;
622
620
  size_t extra_size = parent_size - node_size;
623
621
  AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
624
- wsp_ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
622
+ wsp_ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
625
623
  }
626
624
  }
627
625
 
@@ -705,7 +703,14 @@ static void wsp_ggml_gallocr_free_node(wsp_ggml_gallocr_t galloc, struct wsp_ggm
705
703
  struct wsp_ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
706
704
  wsp_ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
707
705
  size_t size = wsp_ggml_backend_buft_get_alloc_size(buft, node);
708
- wsp_ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node);
706
+
707
+ AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
708
+ __func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
709
+ #ifdef WSP_GGML_ALLOCATOR_DEBUG
710
+ remove_allocated_tensor(alloc, hn->addr, node);
711
+ #endif
712
+
713
+ wsp_ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
709
714
  hn->allocated = false;
710
715
  }
711
716
 
@@ -820,7 +825,8 @@ static void wsp_ggml_gallocr_alloc_graph_impl(wsp_ggml_gallocr_t galloc, struct
820
825
  }
821
826
  }
822
827
 
823
- bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
828
+ static bool wsp_ggml_gallocr_reserve_n_impl(
829
+ wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, bool no_alloc) {
824
830
  size_t min_hash_size = graph->n_nodes + graph->n_leafs;
825
831
  // add 25% margin to avoid hash collisions
826
832
  min_hash_size += min_hash_size / 4;
@@ -921,15 +927,23 @@ bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgrap
921
927
  }
922
928
  if (realloc) {
923
929
  #ifndef NDEBUG
924
- size_t cur_size = galloc->buffers[i] ? wsp_ggml_vbuffer_size(galloc->buffers[i]) : 0;
925
- WSP_GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
930
+ {
931
+ size_t cur_size = galloc->buffers[i] ? wsp_ggml_vbuffer_size(galloc->buffers[i]) : 0;
932
+ if (cur_size > 0) {
933
+ WSP_GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n",
934
+ __func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
935
+ }
936
+ }
926
937
  #endif
927
-
928
938
  wsp_ggml_vbuffer_free(galloc->buffers[i]);
929
- galloc->buffers[i] = wsp_ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], WSP_GGML_BACKEND_BUFFER_USAGE_COMPUTE);
930
- if (galloc->buffers[i] == NULL) {
931
- WSP_GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), new_size);
932
- return false;
939
+ if (no_alloc) {
940
+ galloc->buffers[i] = NULL;
941
+ } else {
942
+ galloc->buffers[i] = wsp_ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], WSP_GGML_BACKEND_BUFFER_USAGE_COMPUTE);
943
+ if (galloc->buffers[i] == NULL) {
944
+ WSP_GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, wsp_ggml_backend_buft_name(galloc->bufts[i]), new_size);
945
+ return false;
946
+ }
933
947
  }
934
948
  }
935
949
  }
@@ -937,6 +951,21 @@ bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgrap
937
951
  return true;
938
952
  }
939
953
 
954
+ void wsp_ggml_gallocr_reserve_n_size(
955
+ wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, size_t * sizes) {
956
+ WSP_GGML_ASSERT(wsp_ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ true));
957
+ for (int i = 0; i < galloc->n_buffers; i++) {
958
+ sizes[i] = 0;
959
+ for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
960
+ sizes[i] += galloc->buf_tallocs[i]->chunks[c]->max_size;
961
+ }
962
+ }
963
+ }
964
+
965
+ bool wsp_ggml_gallocr_reserve_n(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
966
+ return wsp_ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ false);
967
+ }
968
+
940
969
  bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph *graph) {
941
970
  return wsp_ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
942
971
  }
@@ -1139,7 +1168,8 @@ static bool alloc_tensor_range(struct wsp_ggml_context * ctx,
1139
1168
  return true;
1140
1169
  }
1141
1170
 
1142
- wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft) {
1171
+ static wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(
1172
+ struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
1143
1173
  WSP_GGML_ASSERT(wsp_ggml_get_no_alloc(ctx) == true);
1144
1174
 
1145
1175
  size_t alignment = wsp_ggml_backend_buft_get_alignment(buft);
@@ -1147,6 +1177,7 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
1147
1177
 
1148
1178
  wsp_ggml_backend_buffer_t * buffers = NULL;
1149
1179
  size_t n_buffers = 0;
1180
+ *nbytes_total = 0;
1150
1181
 
1151
1182
  size_t cur_buf_size = 0;
1152
1183
  struct wsp_ggml_tensor * first = wsp_ggml_get_first_tensor(ctx);
@@ -1158,10 +1189,11 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
1158
1189
 
1159
1190
  if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
1160
1191
  // allocate tensors in the current buffer
1161
- if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
1192
+ if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
1162
1193
  return NULL;
1163
1194
  }
1164
1195
  first = t;
1196
+ *nbytes_total += cur_buf_size;
1165
1197
  cur_buf_size = this_size;
1166
1198
  } else {
1167
1199
  cur_buf_size += this_size;
@@ -1170,15 +1202,21 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
1170
1202
 
1171
1203
  // allocate remaining tensors
1172
1204
  if (cur_buf_size > 0) {
1173
- if (!alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
1205
+ *nbytes_total += cur_buf_size;
1206
+ if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
1174
1207
  return NULL;
1175
1208
  }
1176
1209
  }
1177
1210
 
1211
+ if (no_alloc) {
1212
+ return NULL;
1213
+ }
1214
+
1178
1215
  if (n_buffers == 0) {
1179
1216
  #ifndef NDEBUG
1180
1217
  WSP_GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
1181
1218
  #endif
1219
+ WSP_GGML_ASSERT(!buffers);
1182
1220
  return NULL;
1183
1221
  }
1184
1222
 
@@ -1188,10 +1226,24 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
1188
1226
  } else {
1189
1227
  buffer = wsp_ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
1190
1228
  }
1191
- free(buffers);
1229
+ if (buffers) {
1230
+ free(buffers); // can be NULL if context is empty or no_alloc
1231
+ }
1192
1232
  return buffer;
1193
1233
  }
1194
1234
 
1235
+ size_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft) {
1236
+ size_t nbytes_total = 0;
1237
+ wsp_ggml_backend_buffer_t buf = wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
1238
+ WSP_GGML_ASSERT(!buf);
1239
+ return nbytes_total;
1240
+ }
1241
+
1242
+ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft) {
1243
+ size_t nbytes_total = 0;
1244
+ return wsp_ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
1245
+ }
1246
+
1195
1247
  wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend) {
1196
1248
  return wsp_ggml_backend_alloc_ctx_tensors_from_buft(ctx, wsp_ggml_backend_get_default_buffer_type(backend));
1197
1249
  }
package/cpp/ggml-alloc.h CHANGED
@@ -53,7 +53,14 @@ WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
53
53
  // call with a worst-case graph to avoid buffer reallocations
54
54
  // not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55
55
  // returns false if the buffer allocation failed
56
+ // wsp_ggml_gallocr_resrve_n_size writes the buffer sizes per galloc buffer that would be allocated by wsp_ggml_gallocr_reserve_n to sizes
56
57
  WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
58
+ WSP_GGML_API void wsp_ggml_gallocr_reserve_n_size(
59
+ wsp_ggml_gallocr_t galloc,
60
+ struct wsp_ggml_cgraph * graph,
61
+ const int * node_buffer_ids,
62
+ const int * leaf_buffer_ids,
63
+ size_t * sizes);
57
64
  WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
58
65
  wsp_ggml_gallocr_t galloc,
59
66
  struct wsp_ggml_cgraph * graph,
@@ -68,6 +75,8 @@ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc,
68
75
 
69
76
  // Utils
70
77
  // Create a buffer and allocate all the tensors in a wsp_ggml_context
78
+ // wsp_ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by wsp_ggml_backend_alloc_ctx_tensors_from_buft
79
+ WSP_GGML_API size_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
71
80
  WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
72
81
  WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
73
82
 
@@ -144,7 +144,7 @@ extern "C" {
144
144
  // device description: short informative description of the device, could be the model name
145
145
  const char * (*get_description)(wsp_ggml_backend_dev_t dev);
146
146
 
147
- // device memory in bytes
147
+ // device memory in bytes: 0 bytes to indicate no memory to report
148
148
  void (*get_memory)(wsp_ggml_backend_dev_t dev, size_t * free, size_t * total);
149
149
 
150
150
  // device type
@@ -73,6 +73,10 @@
73
73
  #include "ggml-cann.h"
74
74
  #endif
75
75
 
76
+ #ifdef WSP_GGML_USE_ZENDNN
77
+ #include "ggml-zendnn.h"
78
+ #endif
79
+
76
80
  // disable C++17 deprecation warning for std::codecvt_utf8
77
81
  #if defined(__clang__)
78
82
  # pragma clang diagnostic push
@@ -203,6 +207,9 @@ struct wsp_ggml_backend_registry {
203
207
  #ifdef WSP_GGML_USE_OPENCL
204
208
  register_backend(wsp_ggml_backend_opencl_reg());
205
209
  #endif
210
+ #ifdef WSP_GGML_USE_ZENDNN
211
+ register_backend(wsp_ggml_backend_zendnn_reg());
212
+ #endif
206
213
  #ifdef WSP_GGML_USE_HEXAGON
207
214
  register_backend(wsp_ggml_backend_hexagon_reg());
208
215
  #endif
@@ -534,8 +541,12 @@ static wsp_ggml_backend_reg_t wsp_ggml_backend_load_best(const char * name, bool
534
541
  fs::path best_path;
535
542
 
536
543
  for (const auto & search_path : search_paths) {
537
- if (!fs::exists(search_path)) {
538
- WSP_GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
544
+ if (std::error_code ec; !fs::exists(search_path, ec)) {
545
+ if (ec) {
546
+ WSP_GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
547
+ } else {
548
+ WSP_GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
549
+ }
539
550
  continue;
540
551
  }
541
552
  fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
@@ -575,8 +586,12 @@ static wsp_ggml_backend_reg_t wsp_ggml_backend_load_best(const char * name, bool
575
586
  for (const auto & search_path : search_paths) {
576
587
  fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
577
588
  fs::path path = search_path / filename;
578
- if (fs::exists(path)) {
589
+ if (std::error_code ec; fs::exists(path, ec)) {
579
590
  return get_reg().load_backend(path, silent);
591
+ } else {
592
+ if (ec) {
593
+ WSP_GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(path).c_str(), ec.message().c_str());
594
+ }
580
595
  }
581
596
  }
582
597
  return nullptr;
@@ -597,6 +612,7 @@ void wsp_ggml_backend_load_all_from_path(const char * dir_path) {
597
612
  #endif
598
613
 
599
614
  wsp_ggml_backend_load_best("blas", silent, dir_path);
615
+ wsp_ggml_backend_load_best("zendnn", silent, dir_path);
600
616
  wsp_ggml_backend_load_best("cann", silent, dir_path);
601
617
  wsp_ggml_backend_load_best("cuda", silent, dir_path);
602
618
  wsp_ggml_backend_load_best("hip", silent, dir_path);
@@ -36,12 +36,11 @@ const char * wsp_ggml_backend_buft_name(wsp_ggml_backend_buffer_type_t buft) {
36
36
  }
37
37
 
38
38
  wsp_ggml_backend_buffer_t wsp_ggml_backend_buft_alloc_buffer(wsp_ggml_backend_buffer_type_t buft, size_t size) {
39
+ WSP_GGML_ASSERT(buft);
39
40
  if (size == 0) {
40
41
  // return a dummy buffer for zero-sized allocations
41
42
  return wsp_ggml_backend_buffer_init(buft, {}, NULL, 0);
42
43
  }
43
-
44
- WSP_GGML_ASSERT(buft);
45
44
  return buft->iface.alloc_buffer(buft, size);
46
45
  }
47
46
 
@@ -128,6 +127,12 @@ void * wsp_ggml_backend_buffer_get_base(wsp_ggml_backend_buffer_t buffer) {
128
127
  return NULL;
129
128
  }
130
129
 
130
+ // FIXME JG: a multi_buffer has a non-zero size, according to the above comment get_base is not optional,
131
+ // I don't know whether the above comment is correct
132
+ if (!buffer->iface.get_base) {
133
+ return NULL;
134
+ }
135
+
131
136
  void * base = buffer->iface.get_base(buffer);
132
137
 
133
138
  WSP_GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
@@ -723,6 +728,12 @@ struct wsp_ggml_backend_sched {
723
728
  bool op_offload;
724
729
 
725
730
  int debug;
731
+
732
+ // used for debugging graph reallocations [WSP_GGML_SCHED_DEBUG_REALLOC]
733
+ // ref: https://github.com/ggml-org/llama.cpp/pull/17617
734
+ int debug_realloc;
735
+ int debug_graph_size;
736
+ int debug_prev_graph_size;
726
737
  };
727
738
 
728
739
  #define hash_id(tensor) wsp_ggml_hash_find_or_insert(&sched->hash_set, tensor)
@@ -1234,10 +1245,8 @@ void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct w
1234
1245
  tensor_copy = wsp_ggml_dup_tensor_layout(sched->ctx, src);
1235
1246
  wsp_ggml_format_name(tensor_copy, "%s#%s#%d", wsp_ggml_backend_name(backend), src->name, c);
1236
1247
  }
1237
- if (sched->n_copies > 1) {
1238
- wsp_ggml_set_input(tensor_copy);
1239
- wsp_ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
1240
- }
1248
+ wsp_ggml_set_input(tensor_copy);
1249
+ wsp_ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
1241
1250
  tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
1242
1251
  SET_CAUSE(tensor_copy, "4.cpy");
1243
1252
  }
@@ -1289,6 +1298,11 @@ void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct w
1289
1298
  }
1290
1299
 
1291
1300
  int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*WSP_GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
1301
+
1302
+ // remember the actual graph_size for performing reallocation checks later [WSP_GGML_SCHED_DEBUG_REALLOC]
1303
+ sched->debug_prev_graph_size = sched->debug_graph_size;
1304
+ sched->debug_graph_size = graph_size;
1305
+
1292
1306
  if (sched->graph.size < graph_size) {
1293
1307
  sched->graph.size = graph_size;
1294
1308
  sched->graph.nodes = (wsp_ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct wsp_ggml_tensor *));
@@ -1395,14 +1409,27 @@ static bool wsp_ggml_backend_sched_alloc_splits(wsp_ggml_backend_sched_t sched)
1395
1409
 
1396
1410
  // allocate graph
1397
1411
  if (backend_ids_changed || !wsp_ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1412
+ #ifndef NDEBUG
1413
+ WSP_GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
1414
+ #endif
1415
+
1416
+ if (sched->debug_realloc > 0) {
1417
+ // we are interested only in situations where the graph was reallocated even though its size remained the same [WSP_GGML_SCHED_DEBUG_REALLOC]
1418
+ // example: https://github.com/ggml-org/llama.cpp/pull/17143
1419
+ const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
1420
+
1421
+ if (unexpected || sched->debug_realloc > 1) {
1422
+ WSP_GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
1423
+ sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
1424
+ }
1425
+ }
1426
+
1398
1427
  // the re-allocation may cause the split inputs to be moved to a different address
1399
1428
  // synchronize without wsp_ggml_backend_sched_synchronize to avoid changing cur_copy
1400
1429
  for (int i = 0; i < sched->n_backends; i++) {
1401
1430
  wsp_ggml_backend_synchronize(sched->backends[i]);
1402
1431
  }
1403
- #ifndef NDEBUG
1404
- WSP_GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
1405
- #endif
1432
+
1406
1433
  wsp_ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
1407
1434
  if (!wsp_ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
1408
1435
  WSP_GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__);
@@ -1614,6 +1641,14 @@ wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(
1614
1641
 
1615
1642
  const char * WSP_GGML_SCHED_DEBUG = getenv("WSP_GGML_SCHED_DEBUG");
1616
1643
  sched->debug = WSP_GGML_SCHED_DEBUG ? atoi(WSP_GGML_SCHED_DEBUG) : 0;
1644
+
1645
+ sched->debug_realloc = 0;
1646
+ #ifdef WSP_GGML_SCHED_NO_REALLOC
1647
+ sched->debug_realloc = 1;
1648
+ #endif
1649
+ const char * WSP_GGML_SCHED_DEBUG_REALLOC = getenv("WSP_GGML_SCHED_DEBUG_REALLOC");
1650
+ sched->debug_realloc = WSP_GGML_SCHED_DEBUG_REALLOC ? atoi(WSP_GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
1651
+
1617
1652
  sched->n_backends = n_backends;
1618
1653
  sched->n_copies = parallel ? WSP_GGML_SCHED_MAX_COPIES : 1;
1619
1654
 
@@ -1630,6 +1665,9 @@ wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(
1630
1665
  sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
1631
1666
  sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
1632
1667
 
1668
+ sched->debug_graph_size = 0;
1669
+ sched->debug_prev_graph_size = 0;
1670
+
1633
1671
  sched->context_buffer_size = wsp_ggml_sched_max_splits*WSP_GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct wsp_ggml_tensor) + wsp_ggml_graph_overhead_custom(graph_size, false);
1634
1672
  sched->context_buffer = (char *) malloc(sched->context_buffer_size);
1635
1673
 
@@ -1694,6 +1732,20 @@ void wsp_ggml_backend_sched_reset(wsp_ggml_backend_sched_t sched) {
1694
1732
  sched->is_alloc = false;
1695
1733
  }
1696
1734
 
1735
+ void wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes) {
1736
+ WSP_GGML_ASSERT(sched);
1737
+ WSP_GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
1738
+ WSP_GGML_ASSERT(sizes);
1739
+
1740
+ wsp_ggml_backend_sched_reset(sched);
1741
+
1742
+ wsp_ggml_backend_sched_synchronize(sched);
1743
+
1744
+ wsp_ggml_backend_sched_split_graph(sched, measure_graph);
1745
+
1746
+ wsp_ggml_gallocr_reserve_n_size(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids, sizes);
1747
+ }
1748
+
1697
1749
  bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph) {
1698
1750
  WSP_GGML_ASSERT(sched);
1699
1751
  WSP_GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
@@ -2001,7 +2053,7 @@ void wsp_ggml_backend_graph_copy_free(struct wsp_ggml_backend_graph_copy copy) {
2001
2053
  wsp_ggml_free(copy.ctx_unallocated);
2002
2054
  }
2003
2055
 
2004
- bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node) {
2056
+ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes) {
2005
2057
  struct wsp_ggml_backend_graph_copy copy = wsp_ggml_backend_graph_copy(backend2, graph);
2006
2058
  if (copy.buffer == NULL) {
2007
2059
  return false;
@@ -2012,22 +2064,22 @@ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggm
2012
2064
 
2013
2065
  assert(g1->n_nodes == g2->n_nodes);
2014
2066
 
2015
- if (test_node != nullptr) {
2016
- // Compute the whole graph and only test the output for a specific tensor
2067
+ if (num_test_nodes != 0) {
2068
+ WSP_GGML_ASSERT(test_nodes);
2069
+ // Compute the whole graph and only test the output for specific tensors
2017
2070
  wsp_ggml_backend_graph_compute(backend1, g1);
2018
2071
  wsp_ggml_backend_graph_compute(backend2, g2);
2019
2072
 
2020
- int test_node_idx = -1;
2073
+ bool verified = false;
2021
2074
  for (int i = 0; i < g1->n_nodes; i++) {
2022
- struct wsp_ggml_tensor * t1 = g1->nodes[i];
2023
- if (t1 == test_node) {
2024
- test_node_idx = i;
2025
- break;
2075
+ for (size_t j = 0; j < num_test_nodes; ++j) {
2076
+ if (g1->nodes[i] == test_nodes[j]) {
2077
+ callback(i, g1->nodes[i], g2->nodes[i], user_data);
2078
+ verified = true;
2079
+ }
2026
2080
  }
2027
2081
  }
2028
- WSP_GGML_ASSERT(test_node_idx != -1);
2029
-
2030
- callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
2082
+ WSP_GGML_ASSERT(verified);
2031
2083
  } else {
2032
2084
  for (int i = 0; i < g1->n_nodes; i++) {
2033
2085
  struct wsp_ggml_tensor * t1 = g1->nodes[i];
@@ -307,6 +307,7 @@ extern "C" {
307
307
  WSP_GGML_API void wsp_ggml_backend_sched_free(wsp_ggml_backend_sched_t sched);
308
308
 
309
309
  // Initialize backend buffers from a measure graph
310
+ WSP_GGML_API void wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes);
310
311
  WSP_GGML_API bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph); // returns success
311
312
 
312
313
  WSP_GGML_API int wsp_ggml_backend_sched_get_n_backends(wsp_ggml_backend_sched_t sched);
@@ -357,7 +358,7 @@ extern "C" {
357
358
  typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
358
359
 
359
360
  // Compare the output of two backends
360
- WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
361
+ WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes);
361
362
 
362
363
  // Tensor initialization
363
364
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
@@ -8,6 +8,10 @@
8
8
  #include <sys/sysctl.h>
9
9
  #endif
10
10
 
11
+ #if !defined(HWCAP2_SVE2)
12
+ #define HWCAP2_SVE2 (1 << 1)
13
+ #endif
14
+
11
15
  #if !defined(HWCAP2_I8MM)
12
16
  #define HWCAP2_I8MM (1 << 13)
13
17
  #endif