whisper.rn 0.4.0-rc.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +74 -1
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +43 -13
  4. package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
  5. package/android/src/main/java/com/rnwhisper/WhisperContext.java +64 -36
  6. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
  7. package/android/src/main/jni.cpp +205 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  15. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  16. package/cpp/coreml/whisper-compat.h +10 -0
  17. package/cpp/coreml/whisper-compat.m +35 -0
  18. package/cpp/coreml/whisper-decoder-impl.h +27 -15
  19. package/cpp/coreml/whisper-decoder-impl.m +36 -10
  20. package/cpp/coreml/whisper-encoder-impl.h +21 -9
  21. package/cpp/coreml/whisper-encoder-impl.m +29 -3
  22. package/cpp/ggml-alloc.c +39 -37
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +55 -27
  25. package/cpp/ggml-backend-reg.cpp +591 -0
  26. package/cpp/ggml-backend.cpp +336 -955
  27. package/cpp/ggml-backend.h +70 -42
  28. package/cpp/ggml-common.h +57 -49
  29. package/cpp/ggml-cpp.h +39 -0
  30. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  31. package/cpp/ggml-cpu/amx/amx.h +8 -0
  32. package/cpp/ggml-cpu/amx/common.h +91 -0
  33. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  34. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  35. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  36. package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
  37. package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
  38. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  39. package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
  40. package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
  41. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  42. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  43. package/cpp/ggml-cpu/binary-ops.h +16 -0
  44. package/cpp/ggml-cpu/common.h +72 -0
  45. package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
  46. package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
  47. package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
  48. package/cpp/ggml-cpu/ops.cpp +9085 -0
  49. package/cpp/ggml-cpu/ops.h +111 -0
  50. package/cpp/ggml-cpu/quants.c +1157 -0
  51. package/cpp/ggml-cpu/quants.h +89 -0
  52. package/cpp/ggml-cpu/repack.cpp +1570 -0
  53. package/cpp/ggml-cpu/repack.h +98 -0
  54. package/cpp/ggml-cpu/simd-mappings.h +1006 -0
  55. package/cpp/ggml-cpu/traits.cpp +36 -0
  56. package/cpp/ggml-cpu/traits.h +38 -0
  57. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  58. package/cpp/ggml-cpu/unary-ops.h +28 -0
  59. package/cpp/ggml-cpu/vec.cpp +321 -0
  60. package/cpp/ggml-cpu/vec.h +973 -0
  61. package/cpp/ggml-cpu.h +143 -0
  62. package/cpp/ggml-impl.h +417 -23
  63. package/cpp/ggml-metal-impl.h +622 -0
  64. package/cpp/ggml-metal.h +9 -9
  65. package/cpp/ggml-metal.m +3451 -1344
  66. package/cpp/ggml-opt.cpp +1037 -0
  67. package/cpp/ggml-opt.h +237 -0
  68. package/cpp/ggml-quants.c +296 -10818
  69. package/cpp/ggml-quants.h +78 -125
  70. package/cpp/ggml-threading.cpp +12 -0
  71. package/cpp/ggml-threading.h +14 -0
  72. package/cpp/ggml-whisper-sim.metallib +0 -0
  73. package/cpp/ggml-whisper.metallib +0 -0
  74. package/cpp/ggml.c +4633 -21450
  75. package/cpp/ggml.h +320 -661
  76. package/cpp/gguf.cpp +1347 -0
  77. package/cpp/gguf.h +202 -0
  78. package/cpp/rn-whisper.cpp +4 -11
  79. package/cpp/whisper-arch.h +197 -0
  80. package/cpp/whisper.cpp +2022 -495
  81. package/cpp/whisper.h +75 -18
  82. package/ios/CMakeLists.txt +95 -0
  83. package/ios/RNWhisper.h +5 -0
  84. package/ios/RNWhisper.mm +147 -0
  85. package/ios/RNWhisperAudioUtils.m +4 -0
  86. package/ios/RNWhisperContext.h +5 -0
  87. package/ios/RNWhisperContext.mm +22 -26
  88. package/ios/RNWhisperVadContext.h +29 -0
  89. package/ios/RNWhisperVadContext.mm +152 -0
  90. package/ios/rnwhisper.xcframework/Info.plist +74 -0
  91. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  92. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  93. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  94. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  95. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  96. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  97. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  98. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  99. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  100. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  101. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  102. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  103. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  104. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  105. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  106. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  107. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  108. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  109. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  110. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  111. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  112. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  113. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  114. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  115. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  116. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  117. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  118. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  119. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  120. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  121. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  122. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  123. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  124. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  125. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  126. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  127. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  128. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  129. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  130. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  131. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  132. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  133. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  134. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  135. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  136. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  137. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  138. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  139. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  140. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  141. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  142. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  143. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  144. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  145. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  146. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  147. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  148. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  149. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  150. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  151. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  152. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  153. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  154. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  155. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  156. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  157. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  158. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  159. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  160. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  161. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  162. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  163. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  164. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  165. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  166. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  167. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  168. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  169. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  170. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  171. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  172. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  173. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  174. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  175. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  176. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  177. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  178. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  179. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  180. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  181. package/jest/mock.js +24 -0
  182. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  183. package/lib/commonjs/index.js +111 -1
  184. package/lib/commonjs/index.js.map +1 -1
  185. package/lib/commonjs/version.json +1 -1
  186. package/lib/module/NativeRNWhisper.js.map +1 -1
  187. package/lib/module/index.js +112 -0
  188. package/lib/module/index.js.map +1 -1
  189. package/lib/module/version.json +1 -1
  190. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  191. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  192. package/lib/typescript/index.d.ts +39 -3
  193. package/lib/typescript/index.d.ts.map +1 -1
  194. package/package.json +10 -6
  195. package/src/NativeRNWhisper.ts +48 -0
  196. package/src/index.ts +132 -1
  197. package/src/version.json +1 -1
  198. package/whisper-rn.podspec +11 -18
  199. package/cpp/README.md +0 -4
  200. package/cpp/ggml-aarch64.c +0 -3209
  201. package/cpp/ggml-aarch64.h +0 -39
  202. package/cpp/ggml-cpu-impl.h +0 -614
@@ -3,6 +3,20 @@
3
3
  #include "ggml.h"
4
4
  #include "ggml-alloc.h"
5
5
 
6
+ #ifdef WSP_GGML_BACKEND_SHARED
7
+ # if defined(_WIN32) && !defined(__MINGW32__)
8
+ # ifdef WSP_GGML_BACKEND_BUILD
9
+ # define WSP_GGML_BACKEND_API __declspec(dllexport) extern
10
+ # else
11
+ # define WSP_GGML_BACKEND_API __declspec(dllimport) extern
12
+ # endif
13
+ # else
14
+ # define WSP_GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
15
+ # endif
16
+ #else
17
+ # define WSP_GGML_BACKEND_API extern
18
+ #endif
19
+
6
20
  #ifdef __cplusplus
7
21
  extern "C" {
8
22
  #endif
@@ -24,7 +38,7 @@ extern "C" {
24
38
  WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_buft_alloc_buffer (wsp_ggml_backend_buffer_type_t buft, size_t size);
25
39
  WSP_GGML_API size_t wsp_ggml_backend_buft_get_alignment (wsp_ggml_backend_buffer_type_t buft);
26
40
  WSP_GGML_API size_t wsp_ggml_backend_buft_get_max_size (wsp_ggml_backend_buffer_type_t buft);
27
- WSP_GGML_API size_t wsp_ggml_backend_buft_get_alloc_size(wsp_ggml_backend_buffer_type_t buft, struct wsp_ggml_tensor * tensor);
41
+ WSP_GGML_API size_t wsp_ggml_backend_buft_get_alloc_size(wsp_ggml_backend_buffer_type_t buft, const struct wsp_ggml_tensor * tensor);
28
42
  WSP_GGML_API bool wsp_ggml_backend_buft_is_host (wsp_ggml_backend_buffer_type_t buft);
29
43
  WSP_GGML_API wsp_ggml_backend_dev_t wsp_ggml_backend_buft_get_device (wsp_ggml_backend_buffer_type_t buft);
30
44
 
@@ -42,10 +56,10 @@ extern "C" {
42
56
  WSP_GGML_API void wsp_ggml_backend_buffer_free (wsp_ggml_backend_buffer_t buffer);
43
57
  WSP_GGML_API void * wsp_ggml_backend_buffer_get_base (wsp_ggml_backend_buffer_t buffer);
44
58
  WSP_GGML_API size_t wsp_ggml_backend_buffer_get_size (wsp_ggml_backend_buffer_t buffer);
45
- WSP_GGML_API void wsp_ggml_backend_buffer_init_tensor (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
59
+ WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_buffer_init_tensor (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
46
60
  WSP_GGML_API size_t wsp_ggml_backend_buffer_get_alignment (wsp_ggml_backend_buffer_t buffer);
47
61
  WSP_GGML_API size_t wsp_ggml_backend_buffer_get_max_size (wsp_ggml_backend_buffer_t buffer);
48
- WSP_GGML_API size_t wsp_ggml_backend_buffer_get_alloc_size(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
62
+ WSP_GGML_API size_t wsp_ggml_backend_buffer_get_alloc_size(wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * tensor);
49
63
  WSP_GGML_API void wsp_ggml_backend_buffer_clear (wsp_ggml_backend_buffer_t buffer, uint8_t value);
50
64
  WSP_GGML_API bool wsp_ggml_backend_buffer_is_host (wsp_ggml_backend_buffer_t buffer);
51
65
  WSP_GGML_API void wsp_ggml_backend_buffer_set_usage (wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
@@ -72,7 +86,7 @@ extern "C" {
72
86
  WSP_GGML_API void wsp_ggml_backend_tensor_set_async(wsp_ggml_backend_t backend, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
73
87
  WSP_GGML_API void wsp_ggml_backend_tensor_get_async(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
74
88
 
75
- // "offset" refers to the offset of the tensor data for setting/getting data
89
+ // "offset" refers to the offset in tensor->data for setting/getting data
76
90
  WSP_GGML_API void wsp_ggml_backend_tensor_set( struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
77
91
  WSP_GGML_API void wsp_ggml_backend_tensor_get(const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
78
92
  WSP_GGML_API void wsp_ggml_backend_tensor_memset( struct wsp_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
@@ -114,11 +128,12 @@ extern "C" {
114
128
  //
115
129
 
116
130
  enum wsp_ggml_backend_dev_type {
131
+ // CPU device using system memory
117
132
  WSP_GGML_BACKEND_DEVICE_TYPE_CPU,
133
+ // GPU device using dedicated memory
118
134
  WSP_GGML_BACKEND_DEVICE_TYPE_GPU,
119
- // devices with full capabilities (excludes backends such as BLAS that only support matrix multiplication)
120
- WSP_GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
121
- WSP_GGML_BACKEND_DEVICE_TYPE_GPU_FULL
135
+ // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
136
+ WSP_GGML_BACKEND_DEVICE_TYPE_ACCEL
122
137
  };
123
138
 
124
139
  // functionality supported by the device
@@ -167,15 +182,29 @@ extern "C" {
167
182
  WSP_GGML_API wsp_ggml_backend_dev_t wsp_ggml_backend_reg_dev_get(wsp_ggml_backend_reg_t reg, size_t index);
168
183
  WSP_GGML_API void * wsp_ggml_backend_reg_get_proc_address(wsp_ggml_backend_reg_t reg, const char * name);
169
184
 
170
-
171
- // Functions that may be obtained using wsp_ggml_backend_reg_get_proc_address
172
- typedef wsp_ggml_backend_buffer_type_t (*wsp_ggml_backend_split_buffer_type_t)(const float *);
173
- typedef void (*wsp_ggml_backend_set_n_threads_t)(wsp_ggml_backend_t, int);
185
+ // Common functions that may be obtained using wsp_ggml_backend_reg_get_proc_address
186
+
187
+ // Split buffer type for tensor parallelism
188
+ typedef wsp_ggml_backend_buffer_type_t (*wsp_ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
189
+ // Set the number of threads for the backend
190
+ typedef void (*wsp_ggml_backend_set_n_threads_t)(wsp_ggml_backend_t backend, int n_threads);
191
+ // Get additional buffer types provided by the device (returns a NULL-terminated array)
192
+ typedef wsp_ggml_backend_buffer_type_t * (*wsp_ggml_backend_dev_get_extra_bufts_t)(wsp_ggml_backend_dev_t device);
193
+ // Set the abort callback for the backend
194
+ typedef void (*wsp_ggml_backend_set_abort_callback_t)(wsp_ggml_backend_t backend, wsp_ggml_abort_callback abort_callback, void * abort_callback_data);
195
+ // Get a list of feature flags supported by the backend (returns a NULL-terminated array)
196
+ struct wsp_ggml_backend_feature {
197
+ const char * name;
198
+ const char * value;
199
+ };
200
+ typedef struct wsp_ggml_backend_feature * (*wsp_ggml_backend_get_features_t)(wsp_ggml_backend_reg_t reg);
174
201
 
175
202
  //
176
203
  // Backend registry
177
204
  //
178
205
 
206
+ WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
207
+
179
208
  // Backend (reg) enumeration
180
209
  WSP_GGML_API size_t wsp_ggml_backend_reg_count(void);
181
210
  WSP_GGML_API wsp_ggml_backend_reg_t wsp_ggml_backend_reg_get(size_t index);
@@ -192,9 +221,17 @@ extern "C" {
192
221
  WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_init_by_name(const char * name, const char * params);
193
222
  // = wsp_ggml_backend_dev_init(wsp_ggml_backend_dev_by_type(type), params)
194
223
  WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_init_by_type(enum wsp_ggml_backend_dev_type type, const char * params);
195
- // = wsp_ggml_backend_dev_init(wsp_ggml_backend_dev_by_type(GPU_FULL) OR wsp_ggml_backend_dev_by_type(CPU_FULL), NULL)
224
+ // = wsp_ggml_backend_dev_init(wsp_ggml_backend_dev_by_type(GPU) OR wsp_ggml_backend_dev_by_type(CPU), NULL)
196
225
  WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_init_best(void);
197
226
 
227
+ // Load a backend from a dynamic library and register it
228
+ WSP_GGML_API wsp_ggml_backend_reg_t wsp_ggml_backend_load(const char * path);
229
+ // Unload a backend if loaded dynamically and unregister it
230
+ WSP_GGML_API void wsp_ggml_backend_unload(wsp_ggml_backend_reg_t reg);
231
+ // Load all known backends from dynamic libraries
232
+ WSP_GGML_API void wsp_ggml_backend_load_all(void);
233
+ WSP_GGML_API void wsp_ggml_backend_load_all_from_path(const char * dir_path);
234
+
198
235
  //
199
236
  // Backend scheduler
200
237
  //
@@ -211,7 +248,7 @@ extern "C" {
211
248
  // preferrably to run on the same backend as the buffer
212
249
  wsp_ggml_backend_buffer_set_usage(buf_weights, WSP_GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
213
250
 
214
- sched = wsp_ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, WSP_GGML_DEFAULT_GRAPH_SIZE, false);
251
+ sched = wsp_ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, WSP_GGML_DEFAULT_GRAPH_SIZE, false, true);
215
252
 
216
253
  // initialize buffers from a max size graph (optional)
217
254
  reserve_graph = build_graph(sched, max_batch_size);
@@ -223,14 +260,20 @@ extern "C" {
223
260
  wsp_ggml_backend_sched_reserve(sched, reserve_graph);
224
261
 
225
262
  // compute
226
- graph = build_graph(sched);
227
- wsp_ggml_backend_sched_graph_compute(sched, graph);
263
+ graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
264
+ for (int i = 0; i < 10; ++i) {
265
+ wsp_ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
266
+ }
228
267
 
229
268
  // if there are graph inputs:
230
- wsp_ggml_backend_sched_reset(sched);
231
- wsp_ggml_backend_sched_alloc_graph(sched, graph);
232
- wsp_ggml_backend_tensor_set(input_tensor, ...);
233
- wsp_ggml_backend_sched_graph_compute(sched, graph);
269
+ graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once wsp_ggml_free is called)
270
+ wsp_ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
271
+ wsp_ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
272
+ wsp_ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
273
+ wsp_ggml_backend_sched_graph_compute(sched, graph); // execute the graph
274
+
275
+ // as an alternative to the above it is also possible to assign the inputs to a dedicated context and
276
+ // allocate them statically via wsp_ggml_backend_alloc_ctx_tensors
234
277
  }
235
278
  */
236
279
 
@@ -245,8 +288,8 @@ extern "C" {
245
288
  //
246
289
  typedef bool (*wsp_ggml_backend_sched_eval_callback)(struct wsp_ggml_tensor * t, bool ask, void * user_data);
247
290
 
248
- // Initialize a backend scheduler
249
- WSP_GGML_API wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(wsp_ggml_backend_t * backends, wsp_ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
291
+ // Initialize a backend scheduler, backends with low index are given priority over backends with high index
292
+ WSP_GGML_API wsp_ggml_backend_sched_t wsp_ggml_backend_sched_new(wsp_ggml_backend_t * backends, wsp_ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool op_offload);
250
293
  WSP_GGML_API void wsp_ggml_backend_sched_free(wsp_ggml_backend_sched_t sched);
251
294
 
252
295
  // Initialize backend buffers from a measure graph
@@ -270,7 +313,9 @@ extern "C" {
270
313
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_sched_graph_compute_async(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph);
271
314
  WSP_GGML_API void wsp_ggml_backend_sched_synchronize(wsp_ggml_backend_sched_t sched);
272
315
 
273
- // Reset all assignments and allocators - must be called before changing the node backends
316
+ // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
317
+ // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
318
+ // The correct way to use this API is to discard the deallocated tensors and create new ones.
274
319
  WSP_GGML_API void wsp_ggml_backend_sched_reset(wsp_ggml_backend_sched_t sched);
275
320
 
276
321
  // Set a callback to be called for each resulting node during graph compute
@@ -297,30 +342,13 @@ extern "C" {
297
342
  WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data);
298
343
 
299
344
  // Tensor initialization
300
- WSP_GGML_API void wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
301
- WSP_GGML_API void wsp_ggml_backend_view_init(struct wsp_ggml_tensor * tensor);
345
+ WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
346
+ WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_view_init(struct wsp_ggml_tensor * tensor);
302
347
 
303
- //
304
- // CPU backend
305
- //
306
-
307
- WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_cpu_init(void);
308
-
309
- WSP_GGML_API bool wsp_ggml_backend_is_cpu (wsp_ggml_backend_t backend);
310
- WSP_GGML_API void wsp_ggml_backend_cpu_set_n_threads (wsp_ggml_backend_t backend_cpu, int n_threads);
311
- WSP_GGML_API void wsp_ggml_backend_cpu_set_threadpool (wsp_ggml_backend_t backend_cpu, wsp_ggml_threadpool_t threadpool);
312
- WSP_GGML_API void wsp_ggml_backend_cpu_set_abort_callback(wsp_ggml_backend_t backend_cpu, wsp_ggml_abort_callback abort_callback, void * abort_callback_data);
313
-
314
- // Create a backend buffer from an existing pointer
348
+ // CPU buffer types are always available
315
349
  WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
316
350
  WSP_GGML_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_buffer_type(void);
317
351
 
318
- WSP_GGML_API wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void);
319
-
320
- #ifdef WSP_GGML_USE_CPU_HBM
321
- WSP_GGML_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_hbm_buffer_type(void);
322
- #endif
323
-
324
352
  #ifdef __cplusplus
325
353
  }
326
354
  #endif
package/cpp/ggml-common.h CHANGED
@@ -6,7 +6,20 @@
6
6
  typedef uint16_t wsp_ggml_half;
7
7
  typedef uint32_t wsp_ggml_half2;
8
8
 
9
- #define WSP_GGML_COMMON_AGGR
9
+ #define WSP_GGML_COMMON_AGGR_U
10
+ #define WSP_GGML_COMMON_AGGR_S
11
+
12
+ #define WSP_GGML_COMMON_DECL
13
+ #elif defined(WSP_GGML_COMMON_DECL_CPP)
14
+ #include <cstdint>
15
+
16
+ typedef uint16_t wsp_ggml_half;
17
+ typedef uint32_t wsp_ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define WSP_GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define WSP_GGML_COMMON_AGGR_S data
10
23
 
11
24
  #define WSP_GGML_COMMON_DECL
12
25
  #elif defined(WSP_GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t wsp_ggml_half2;
15
28
  typedef half wsp_ggml_half;
16
29
  typedef half2 wsp_ggml_half2;
17
30
 
18
- #define WSP_GGML_COMMON_AGGR
31
+ #define WSP_GGML_COMMON_AGGR_U
32
+ #define WSP_GGML_COMMON_AGGR_S
19
33
 
20
34
  #define WSP_GGML_COMMON_DECL
21
35
  #elif defined(WSP_GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 wsp_ggml_half2;
29
43
  typedef half wsp_ggml_half;
30
44
  typedef half2 wsp_ggml_half2;
31
45
 
32
- #define WSP_GGML_COMMON_AGGR data
46
+ #define WSP_GGML_COMMON_AGGR_U
47
+ #define WSP_GGML_COMMON_AGGR_S data
33
48
 
34
49
  #define WSP_GGML_COMMON_DECL
35
50
  #elif defined(WSP_GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 wsp_ggml_half2;
39
54
  typedef half wsp_ggml_half;
40
55
  typedef half2 wsp_ggml_half2;
41
56
 
42
- #define WSP_GGML_COMMON_AGGR data
57
+ #define WSP_GGML_COMMON_AGGR_U
58
+ #define WSP_GGML_COMMON_AGGR_S data
43
59
 
44
60
  #define WSP_GGML_COMMON_DECL
45
61
  #elif defined(WSP_GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 wsp_ggml_half2;
49
65
  typedef sycl::half wsp_ggml_half;
50
66
  typedef sycl::half2 wsp_ggml_half2;
51
67
 
52
- #define WSP_GGML_COMMON_AGGR data
68
+ #define WSP_GGML_COMMON_AGGR_U
69
+ #define WSP_GGML_COMMON_AGGR_S data
53
70
 
54
71
  #define WSP_GGML_COMMON_DECL
55
72
  #endif
@@ -141,6 +158,12 @@ typedef sycl::half2 wsp_ggml_half2;
141
158
 
142
159
  #endif // WSP_GGML_COMMON_DECL_CUDA || WSP_GGML_COMMON_DECL_HIP
143
160
 
161
+ #ifdef _MSC_VER
162
+ #define WSP_GGML_EXTENSION
163
+ #else // _MSC_VER
164
+ #define WSP_GGML_EXTENSION __extension__
165
+ #endif // _MSC_VER
166
+
144
167
  #define QK4_0 32
145
168
  typedef struct {
146
169
  wsp_ggml_half d; // delta
@@ -150,13 +173,13 @@ static_assert(sizeof(block_q4_0) == sizeof(wsp_ggml_half) + QK4_0 / 2, "wrong q4
150
173
 
151
174
  #define QK4_1 32
152
175
  typedef struct {
153
- union {
176
+ WSP_GGML_EXTENSION union {
154
177
  struct {
155
178
  wsp_ggml_half d; // delta
156
179
  wsp_ggml_half m; // min
157
- } WSP_GGML_COMMON_AGGR;
180
+ } WSP_GGML_COMMON_AGGR_S;
158
181
  wsp_ggml_half2 dm;
159
- };
182
+ } WSP_GGML_COMMON_AGGR_U;
160
183
  uint8_t qs[QK4_1 / 2]; // nibbles / quants
161
184
  } block_q4_1;
162
185
  static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -171,13 +194,13 @@ static_assert(sizeof(block_q5_0) == sizeof(wsp_ggml_half) + sizeof(uint32_t) + Q
171
194
 
172
195
  #define QK5_1 32
173
196
  typedef struct {
174
- union {
197
+ WSP_GGML_EXTENSION union {
175
198
  struct {
176
199
  wsp_ggml_half d; // delta
177
200
  wsp_ggml_half m; // min
178
- } WSP_GGML_COMMON_AGGR;
201
+ } WSP_GGML_COMMON_AGGR_S;
179
202
  wsp_ggml_half2 dm;
180
- };
203
+ } WSP_GGML_COMMON_AGGR_U;
181
204
  uint8_t qh[4]; // 5-th bit of quants
182
205
  uint8_t qs[QK5_1 / 2]; // nibbles / quants
183
206
  } block_q5_1;
@@ -192,41 +215,17 @@ static_assert(sizeof(block_q8_0) == sizeof(wsp_ggml_half) + QK8_0, "wrong q8_0 b
192
215
 
193
216
  #define QK8_1 32
194
217
  typedef struct {
195
- union {
218
+ WSP_GGML_EXTENSION union {
196
219
  struct {
197
220
  wsp_ggml_half d; // delta
198
221
  wsp_ggml_half s; // d * sum(qs[i])
199
- } WSP_GGML_COMMON_AGGR;
222
+ } WSP_GGML_COMMON_AGGR_S;
200
223
  wsp_ggml_half2 ds;
201
- };
224
+ } WSP_GGML_COMMON_AGGR_U;
202
225
  int8_t qs[QK8_1]; // quants
203
226
  } block_q8_1;
204
227
  static_assert(sizeof(block_q8_1) == 2*sizeof(wsp_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205
228
 
206
- typedef struct {
207
- wsp_ggml_half d[4]; // deltas for 4 q4_0 blocks
208
- uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209
- } block_q4_0x4;
210
- static_assert(sizeof(block_q4_0x4) == 4 * sizeof(wsp_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211
-
212
- typedef struct {
213
- wsp_ggml_half d[8]; // deltas for 8 q4_0 blocks
214
- uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215
- } block_q4_0x8;
216
- static_assert(sizeof(block_q4_0x8) == 8 * sizeof(wsp_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217
-
218
- typedef struct {
219
- wsp_ggml_half d[4]; // deltas for 4 q8_0 blocks
220
- int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221
- } block_q8_0x4;
222
- static_assert(sizeof(block_q8_0x4) == 4 * sizeof(wsp_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223
-
224
- typedef struct {
225
- wsp_ggml_half d[8]; // deltas for 8 q8_0 blocks
226
- int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227
- } block_q8_0x8;
228
- static_assert(sizeof(block_q8_0x8) == 8 * sizeof(wsp_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229
-
230
229
  //
231
230
  // Ternary quantization
232
231
  //
@@ -257,13 +256,13 @@ static_assert(sizeof(block_tq2_0) == sizeof(wsp_ggml_half) + QK_K / 4, "wrong tq
257
256
  typedef struct {
258
257
  uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
259
258
  uint8_t qs[QK_K/4]; // quants
260
- union {
259
+ WSP_GGML_EXTENSION union {
261
260
  struct {
262
261
  wsp_ggml_half d; // super-block scale for quantized scales
263
262
  wsp_ggml_half dmin; // super-block scale for quantized mins
264
- } WSP_GGML_COMMON_AGGR;
263
+ } WSP_GGML_COMMON_AGGR_S;
265
264
  wsp_ggml_half2 dm;
266
- };
265
+ } WSP_GGML_COMMON_AGGR_U;
267
266
  } block_q2_K;
268
267
  static_assert(sizeof(block_q2_K) == 2*sizeof(wsp_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
268
 
@@ -284,13 +283,13 @@ static_assert(sizeof(block_q3_K) == sizeof(wsp_ggml_half) + QK_K / 4 + QK_K / 8
284
283
  // weight is represented as x = a * q + b
285
284
  // Effectively 4.5 bits per weight
286
285
  typedef struct {
287
- union {
286
+ WSP_GGML_EXTENSION union {
288
287
  struct {
289
288
  wsp_ggml_half d; // super-block scale for quantized scales
290
289
  wsp_ggml_half dmin; // super-block scale for quantized mins
291
- } WSP_GGML_COMMON_AGGR;
290
+ } WSP_GGML_COMMON_AGGR_S;
292
291
  wsp_ggml_half2 dm;
293
- };
292
+ } WSP_GGML_COMMON_AGGR_U;
294
293
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
294
  uint8_t qs[QK_K/2]; // 4--bit quants
296
295
  } block_q4_K;
@@ -301,13 +300,13 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(wsp_ggml_half) + K_SCALE_SIZE + QK_
301
300
  // weight is represented as x = a * q + b
302
301
  // Effectively 5.5 bits per weight
303
302
  typedef struct {
304
- union {
303
+ WSP_GGML_EXTENSION union {
305
304
  struct {
306
305
  wsp_ggml_half d; // super-block scale for quantized scales
307
306
  wsp_ggml_half dmin; // super-block scale for quantized mins
308
- } WSP_GGML_COMMON_AGGR;
307
+ } WSP_GGML_COMMON_AGGR_S;
309
308
  wsp_ggml_half2 dm;
310
- };
309
+ } WSP_GGML_COMMON_AGGR_U;
311
310
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
311
  uint8_t qh[QK_K/8]; // quants, high bit
313
312
  uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -431,6 +430,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(wsp_ggml_half) + sizeof(uint16_t) +
431
430
  #define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
431
  #define WSP_GGML_TABLE_END() };
433
432
 
433
+ #define WSP_GGML_COMMON_IMPL
434
+ #elif defined(WSP_GGML_COMMON_IMPL_CPP)
435
+ #include <cstdint>
436
+
437
+ #define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
438
+ #define WSP_GGML_TABLE_END() };
439
+
434
440
  #define WSP_GGML_COMMON_IMPL
435
441
  #elif defined(WSP_GGML_COMMON_IMPL_METAL)
436
442
  #include <metal_stdlib>
@@ -473,7 +479,6 @@ WSP_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
473
479
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
474
480
  WSP_GGML_TABLE_END()
475
481
 
476
- //#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
477
482
  WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
478
483
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
479
484
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
@@ -508,7 +513,6 @@ WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
508
513
  0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
509
514
  0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
510
515
  WSP_GGML_TABLE_END()
511
- //#endif
512
516
 
513
517
 
514
518
  WSP_GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
@@ -1070,6 +1074,10 @@ WSP_GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
1070
1074
  0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
1071
1075
  WSP_GGML_TABLE_END()
1072
1076
 
1077
+ WSP_GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078
+ -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079
+ WSP_GGML_TABLE_END()
1080
+
1073
1081
  #define NGRID_IQ1S 2048
1074
1082
  #define IQ1S_DELTA 0.125f
1075
1083
  #define IQ1M_DELTA 0.125f
package/cpp/ggml-cpp.h ADDED
@@ -0,0 +1,39 @@
1
+ #pragma once
2
+
3
+ #ifndef __cplusplus
4
+ #error "This header is for C++ only"
5
+ #endif
6
+
7
+ #include "ggml.h"
8
+ #include "ggml-alloc.h"
9
+ #include "ggml-backend.h"
10
+ #include "gguf.h"
11
+ #include <memory>
12
+
13
+ // Smart pointers for ggml types
14
+
15
+ // ggml
16
+
17
+ struct wsp_ggml_context_deleter { void operator()(wsp_ggml_context * ctx) { wsp_ggml_free(ctx); } };
18
+ struct wsp_gguf_context_deleter { void operator()(wsp_gguf_context * ctx) { wsp_gguf_free(ctx); } };
19
+
20
+ typedef std::unique_ptr<wsp_ggml_context, wsp_ggml_context_deleter> wsp_ggml_context_ptr;
21
+ typedef std::unique_ptr<wsp_gguf_context, wsp_gguf_context_deleter> wsp_gguf_context_ptr;
22
+
23
+ // ggml-alloc
24
+
25
+ struct wsp_ggml_gallocr_deleter { void operator()(wsp_ggml_gallocr_t galloc) { wsp_ggml_gallocr_free(galloc); } };
26
+
27
+ typedef std::unique_ptr<wsp_ggml_gallocr, wsp_ggml_gallocr_deleter> wsp_ggml_gallocr_ptr;
28
+
29
+ // ggml-backend
30
+
31
+ struct wsp_ggml_backend_deleter { void operator()(wsp_ggml_backend_t backend) { wsp_ggml_backend_free(backend); } };
32
+ struct wsp_ggml_backend_buffer_deleter { void operator()(wsp_ggml_backend_buffer_t buffer) { wsp_ggml_backend_buffer_free(buffer); } };
33
+ struct wsp_ggml_backend_event_deleter { void operator()(wsp_ggml_backend_event_t event) { wsp_ggml_backend_event_free(event); } };
34
+ struct wsp_ggml_backend_sched_deleter { void operator()(wsp_ggml_backend_sched_t sched) { wsp_ggml_backend_sched_free(sched); } };
35
+
36
+ typedef std::unique_ptr<wsp_ggml_backend, wsp_ggml_backend_deleter> wsp_ggml_backend_ptr;
37
+ typedef std::unique_ptr<wsp_ggml_backend_buffer, wsp_ggml_backend_buffer_deleter> wsp_ggml_backend_buffer_ptr;
38
+ typedef std::unique_ptr<wsp_ggml_backend_event, wsp_ggml_backend_event_deleter> wsp_ggml_backend_event_ptr;
39
+ typedef std::unique_ptr<wsp_ggml_backend_sched, wsp_ggml_backend_sched_deleter> wsp_ggml_backend_sched_ptr;