whisper.rn 0.4.0-rc.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +44 -13
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -12
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +75 -34
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +53 -38
- package/android/src/main/jni.cpp +38 -1
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/cpp/coreml/whisper-compat.h +10 -0
- package/cpp/coreml/whisper-compat.m +35 -0
- package/cpp/coreml/whisper-decoder-impl.h +27 -15
- package/cpp/coreml/whisper-decoder-impl.m +36 -10
- package/cpp/coreml/whisper-encoder-impl.h +21 -9
- package/cpp/coreml/whisper-encoder-impl.m +29 -3
- package/cpp/ggml-alloc.c +727 -517
- package/cpp/ggml-alloc.h +47 -65
- package/cpp/ggml-backend-impl.h +196 -57
- package/cpp/ggml-backend-reg.cpp +591 -0
- package/cpp/ggml-backend.cpp +2016 -0
- package/cpp/ggml-backend.h +234 -89
- package/cpp/ggml-common.h +1861 -0
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
- package/cpp/ggml-cpu/ops.cpp +9085 -0
- package/cpp/ggml-cpu/ops.h +111 -0
- package/cpp/ggml-cpu/quants.c +1157 -0
- package/cpp/ggml-cpu/quants.h +89 -0
- package/cpp/ggml-cpu/repack.cpp +1570 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +1006 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +321 -0
- package/cpp/ggml-cpu/vec.h +973 -0
- package/cpp/ggml-cpu.h +143 -0
- package/cpp/ggml-impl.h +525 -168
- package/cpp/ggml-metal-impl.h +622 -0
- package/cpp/ggml-metal.h +16 -14
- package/cpp/ggml-metal.m +5289 -1859
- package/cpp/ggml-opt.cpp +1037 -0
- package/cpp/ggml-opt.h +237 -0
- package/cpp/ggml-quants.c +2916 -6877
- package/cpp/ggml-quants.h +87 -249
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +3293 -16770
- package/cpp/ggml.h +778 -835
- package/cpp/gguf.cpp +1347 -0
- package/cpp/gguf.h +202 -0
- package/cpp/rn-whisper.cpp +84 -0
- package/cpp/rn-whisper.h +2 -0
- package/cpp/whisper-arch.h +197 -0
- package/cpp/whisper.cpp +3240 -944
- package/cpp/whisper.h +144 -31
- package/ios/CMakeLists.txt +95 -0
- package/ios/RNWhisper.h +5 -0
- package/ios/RNWhisper.mm +124 -37
- package/ios/RNWhisperAudioUtils.h +1 -0
- package/ios/RNWhisperAudioUtils.m +24 -13
- package/ios/RNWhisperContext.h +8 -2
- package/ios/RNWhisperContext.mm +42 -8
- package/ios/rnwhisper.xcframework/Info.plist +74 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/jest/mock.js +14 -1
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +48 -19
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +48 -19
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +6 -3
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +25 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +15 -10
- package/src/NativeRNWhisper.ts +12 -3
- package/src/index.ts +63 -24
- package/src/version.json +1 -1
- package/whisper-rn.podspec +18 -18
- package/cpp/README.md +0 -4
- package/cpp/ggml-backend.c +0 -1718
- package/cpp/ggml-metal-whisper.metal +0 -5820
package/cpp/ggml-quants.h
CHANGED
|
@@ -1,262 +1,100 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
#define WSP_GGML_COMMON_DECL_C
|
|
4
|
+
#include "ggml-common.h"
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
#include <stdint.h>
|
|
8
|
-
#include <stddef.h>
|
|
9
|
-
|
|
10
|
-
#define QK4_0 32
|
|
11
|
-
typedef struct {
|
|
12
|
-
wsp_ggml_fp16_t d; // delta
|
|
13
|
-
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
|
14
|
-
} block_q4_0;
|
|
15
|
-
static_assert(sizeof(block_q4_0) == sizeof(wsp_ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
|
16
|
-
|
|
17
|
-
#define QK4_1 32
|
|
18
|
-
typedef struct {
|
|
19
|
-
wsp_ggml_fp16_t d; // delta
|
|
20
|
-
wsp_ggml_fp16_t m; // min
|
|
21
|
-
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
|
22
|
-
} block_q4_1;
|
|
23
|
-
static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
|
24
|
-
|
|
25
|
-
#define QK5_0 32
|
|
26
|
-
typedef struct {
|
|
27
|
-
wsp_ggml_fp16_t d; // delta
|
|
28
|
-
uint8_t qh[4]; // 5-th bit of quants
|
|
29
|
-
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
|
30
|
-
} block_q5_0;
|
|
31
|
-
static_assert(sizeof(block_q5_0) == sizeof(wsp_ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
|
32
|
-
|
|
33
|
-
#define QK5_1 32
|
|
34
|
-
typedef struct {
|
|
35
|
-
wsp_ggml_fp16_t d; // delta
|
|
36
|
-
wsp_ggml_fp16_t m; // min
|
|
37
|
-
uint8_t qh[4]; // 5-th bit of quants
|
|
38
|
-
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
|
39
|
-
} block_q5_1;
|
|
40
|
-
static_assert(sizeof(block_q5_1) == 2 * sizeof(wsp_ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
|
41
|
-
|
|
42
|
-
#define QK8_0 32
|
|
43
|
-
typedef struct {
|
|
44
|
-
wsp_ggml_fp16_t d; // delta
|
|
45
|
-
int8_t qs[QK8_0]; // quants
|
|
46
|
-
} block_q8_0;
|
|
47
|
-
static_assert(sizeof(block_q8_0) == sizeof(wsp_ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
|
48
|
-
|
|
49
|
-
#define QK8_1 32
|
|
50
|
-
typedef struct {
|
|
51
|
-
float d; // delta
|
|
52
|
-
float s; // d * sum(qs[i])
|
|
53
|
-
int8_t qs[QK8_1]; // quants
|
|
54
|
-
} block_q8_1;
|
|
55
|
-
static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
|
|
56
|
-
|
|
57
|
-
//
|
|
58
|
-
// Super-block quantization structures
|
|
59
|
-
//
|
|
60
|
-
|
|
61
|
-
// Super-block size
|
|
62
|
-
#ifdef WSP_GGML_QKK_64
|
|
63
|
-
#define QK_K 64
|
|
64
|
-
#define K_SCALE_SIZE 4
|
|
65
|
-
#else
|
|
66
|
-
#define QK_K 256
|
|
67
|
-
#define K_SCALE_SIZE 12
|
|
68
|
-
#endif
|
|
69
|
-
|
|
70
|
-
// 2-bit quantization
|
|
71
|
-
// weight is represented as x = a * q + b
|
|
72
|
-
// 16 blocks of 16 elements each
|
|
73
|
-
// Effectively 2.625 bits per weight
|
|
74
|
-
typedef struct {
|
|
75
|
-
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
|
76
|
-
uint8_t qs[QK_K/4]; // quants
|
|
77
|
-
wsp_ggml_fp16_t d; // super-block scale for quantized scales
|
|
78
|
-
wsp_ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
79
|
-
} block_q2_K;
|
|
80
|
-
static_assert(sizeof(block_q2_K) == 2*sizeof(wsp_ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
|
81
|
-
|
|
82
|
-
// 3-bit quantization
|
|
83
|
-
// weight is represented as x = a * q
|
|
84
|
-
// 16 blocks of 16 elements each
|
|
85
|
-
// Effectively 3.4375 bits per weight
|
|
86
|
-
#ifdef WSP_GGML_QKK_64
|
|
87
|
-
typedef struct {
|
|
88
|
-
uint8_t hmask[QK_K/8]; // quants - high bit
|
|
89
|
-
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
|
90
|
-
uint8_t scales[2];
|
|
91
|
-
wsp_ggml_fp16_t d; // super-block scale
|
|
92
|
-
} block_q3_K;
|
|
93
|
-
static_assert(sizeof(block_q3_K) == sizeof(wsp_ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
|
|
94
|
-
#else
|
|
95
|
-
typedef struct {
|
|
96
|
-
uint8_t hmask[QK_K/8]; // quants - high bit
|
|
97
|
-
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
|
98
|
-
uint8_t scales[12]; // scales, quantized with 6 bits
|
|
99
|
-
wsp_ggml_fp16_t d; // super-block scale
|
|
100
|
-
} block_q3_K;
|
|
101
|
-
static_assert(sizeof(block_q3_K) == sizeof(wsp_ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
|
|
102
|
-
#endif
|
|
6
|
+
#include "ggml.h"
|
|
103
7
|
|
|
104
|
-
//
|
|
105
|
-
// 8 blocks of 32 elements each
|
|
106
|
-
// weight is represented as x = a * q + b
|
|
107
|
-
// Effectively 4.5 bits per weight
|
|
108
|
-
#ifdef WSP_GGML_QKK_64
|
|
109
|
-
typedef struct {
|
|
110
|
-
wsp_ggml_fp16_t d[2]; // super-block scales/mins
|
|
111
|
-
uint8_t scales[2]; // 4-bit block scales/mins
|
|
112
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
|
113
|
-
} block_q4_K;
|
|
114
|
-
static_assert(sizeof(block_q4_K) == 2*sizeof(wsp_ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
|
115
|
-
#else
|
|
116
|
-
typedef struct {
|
|
117
|
-
wsp_ggml_fp16_t d; // super-block scale for quantized scales
|
|
118
|
-
wsp_ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
119
|
-
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
120
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
|
121
|
-
} block_q4_K;
|
|
122
|
-
static_assert(sizeof(block_q4_K) == 2*sizeof(wsp_ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
|
|
123
|
-
#endif
|
|
8
|
+
// GGML internal header
|
|
124
9
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
// weight is represented as x = a * q + b
|
|
128
|
-
// Effectively 5.5 bits per weight
|
|
129
|
-
#ifdef WSP_GGML_QKK_64
|
|
130
|
-
typedef struct {
|
|
131
|
-
wsp_ggml_fp16_t d; // super-block scale
|
|
132
|
-
int8_t scales[QK_K/16]; // 8-bit block scales
|
|
133
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
|
134
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
|
135
|
-
} block_q5_K;
|
|
136
|
-
static_assert(sizeof(block_q5_K) == sizeof(wsp_ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
|
137
|
-
#else
|
|
138
|
-
typedef struct {
|
|
139
|
-
wsp_ggml_fp16_t d; // super-block scale for quantized scales
|
|
140
|
-
wsp_ggml_fp16_t dmin; // super-block scale for quantized mins
|
|
141
|
-
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
|
142
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
|
143
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
|
144
|
-
} block_q5_K;
|
|
145
|
-
static_assert(sizeof(block_q5_K) == 2*sizeof(wsp_ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
|
10
|
+
#ifdef __cplusplus
|
|
11
|
+
extern "C" {
|
|
146
12
|
#endif
|
|
147
13
|
|
|
148
|
-
//
|
|
149
|
-
// weight is represented as x = a * q
|
|
150
|
-
// 16 blocks of 16 elements each
|
|
151
|
-
// Effectively 6.5625 bits per weight
|
|
152
|
-
typedef struct {
|
|
153
|
-
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
|
154
|
-
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
|
155
|
-
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
|
156
|
-
wsp_ggml_fp16_t d; // super-block scale
|
|
157
|
-
} block_q6_K;
|
|
158
|
-
static_assert(sizeof(block_q6_K) == sizeof(wsp_ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
|
|
159
|
-
|
|
160
|
-
// This is only used for intermediate quantization and dot products
|
|
161
|
-
typedef struct {
|
|
162
|
-
float d; // delta
|
|
163
|
-
int8_t qs[QK_K]; // quants
|
|
164
|
-
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
|
|
165
|
-
} block_q8_K;
|
|
166
|
-
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
|
|
167
|
-
|
|
168
|
-
// (Almost) "true" 2-bit quantization.
|
|
169
|
-
// Due to the need to use blocks as per ggml dsign, it ends up using
|
|
170
|
-
// 2.0625 bpw because of the 16-bit scale for each block of 256.
|
|
171
|
-
typedef struct {
|
|
172
|
-
wsp_ggml_fp16_t d;
|
|
173
|
-
uint16_t qs[QK_K/8];
|
|
174
|
-
} block_iq2_xxs;
|
|
175
|
-
static_assert(sizeof(block_iq2_xxs) == sizeof(wsp_ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
|
176
|
-
|
|
177
|
-
// 2.3125 bpw quants
|
|
178
|
-
typedef struct {
|
|
179
|
-
wsp_ggml_fp16_t d;
|
|
180
|
-
uint16_t qs[QK_K/8];
|
|
181
|
-
uint8_t scales[QK_K/32];
|
|
182
|
-
} block_iq2_xs;
|
|
183
|
-
static_assert(sizeof(block_iq2_xs) == sizeof(wsp_ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
|
14
|
+
// NOTE: these functions are defined as WSP_GGML_API because they used by the CPU backend
|
|
184
15
|
|
|
185
16
|
// Quantization
|
|
186
|
-
void
|
|
187
|
-
void
|
|
188
|
-
void
|
|
189
|
-
void
|
|
190
|
-
void
|
|
191
|
-
void
|
|
192
|
-
|
|
193
|
-
void
|
|
194
|
-
void
|
|
195
|
-
void
|
|
196
|
-
void
|
|
197
|
-
void
|
|
198
|
-
void
|
|
199
|
-
|
|
200
|
-
void
|
|
201
|
-
void
|
|
202
|
-
|
|
203
|
-
void
|
|
204
|
-
void
|
|
205
|
-
void
|
|
206
|
-
|
|
207
|
-
void
|
|
208
|
-
void wsp_quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
|
|
209
|
-
void wsp_quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
|
|
210
|
-
void wsp_quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
|
|
211
|
-
void wsp_quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
|
|
212
|
-
void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
|
|
17
|
+
WSP_GGML_API void wsp_quantize_row_q4_0_ref(const float * WSP_GGML_RESTRICT x, block_q4_0 * WSP_GGML_RESTRICT y, int64_t k);
|
|
18
|
+
WSP_GGML_API void wsp_quantize_row_q4_1_ref(const float * WSP_GGML_RESTRICT x, block_q4_1 * WSP_GGML_RESTRICT y, int64_t k);
|
|
19
|
+
WSP_GGML_API void wsp_quantize_row_q5_0_ref(const float * WSP_GGML_RESTRICT x, block_q5_0 * WSP_GGML_RESTRICT y, int64_t k);
|
|
20
|
+
WSP_GGML_API void wsp_quantize_row_q5_1_ref(const float * WSP_GGML_RESTRICT x, block_q5_1 * WSP_GGML_RESTRICT y, int64_t k);
|
|
21
|
+
WSP_GGML_API void wsp_quantize_row_q8_0_ref(const float * WSP_GGML_RESTRICT x, block_q8_0 * WSP_GGML_RESTRICT y, int64_t k);
|
|
22
|
+
WSP_GGML_API void wsp_quantize_row_q8_1_ref(const float * WSP_GGML_RESTRICT x, block_q8_1 * WSP_GGML_RESTRICT y, int64_t k);
|
|
23
|
+
|
|
24
|
+
WSP_GGML_API void wsp_quantize_row_q2_K_ref(const float * WSP_GGML_RESTRICT x, block_q2_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
25
|
+
WSP_GGML_API void wsp_quantize_row_q3_K_ref(const float * WSP_GGML_RESTRICT x, block_q3_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
26
|
+
WSP_GGML_API void wsp_quantize_row_q4_K_ref(const float * WSP_GGML_RESTRICT x, block_q4_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
27
|
+
WSP_GGML_API void wsp_quantize_row_q5_K_ref(const float * WSP_GGML_RESTRICT x, block_q5_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
28
|
+
WSP_GGML_API void wsp_quantize_row_q6_K_ref(const float * WSP_GGML_RESTRICT x, block_q6_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
29
|
+
WSP_GGML_API void wsp_quantize_row_q8_K_ref(const float * WSP_GGML_RESTRICT x, block_q8_K * WSP_GGML_RESTRICT y, int64_t k);
|
|
30
|
+
|
|
31
|
+
WSP_GGML_API void wsp_quantize_row_tq1_0_ref(const float * WSP_GGML_RESTRICT x, block_tq1_0 * WSP_GGML_RESTRICT y, int64_t k);
|
|
32
|
+
WSP_GGML_API void wsp_quantize_row_tq2_0_ref(const float * WSP_GGML_RESTRICT x, block_tq2_0 * WSP_GGML_RESTRICT y, int64_t k);
|
|
33
|
+
|
|
34
|
+
WSP_GGML_API void wsp_quantize_row_iq3_xxs_ref(const float * WSP_GGML_RESTRICT x, block_iq3_xxs * WSP_GGML_RESTRICT y, int64_t k);
|
|
35
|
+
WSP_GGML_API void wsp_quantize_row_iq4_nl_ref (const float * WSP_GGML_RESTRICT x, block_iq4_nl * WSP_GGML_RESTRICT y, int64_t k);
|
|
36
|
+
WSP_GGML_API void wsp_quantize_row_iq4_xs_ref (const float * WSP_GGML_RESTRICT x, block_iq4_xs * WSP_GGML_RESTRICT y, int64_t k);
|
|
37
|
+
WSP_GGML_API void wsp_quantize_row_iq3_s_ref (const float * WSP_GGML_RESTRICT x, block_iq3_s * WSP_GGML_RESTRICT y, int64_t k);
|
|
38
|
+
WSP_GGML_API void wsp_quantize_row_iq2_s_ref (const float * WSP_GGML_RESTRICT x, block_iq2_s * WSP_GGML_RESTRICT y, int64_t k);
|
|
213
39
|
|
|
214
40
|
// Dequantization
|
|
215
|
-
void wsp_dewsp_quantize_row_q4_0(const block_q4_0 *
|
|
216
|
-
void wsp_dewsp_quantize_row_q4_1(const block_q4_1 *
|
|
217
|
-
void wsp_dewsp_quantize_row_q5_0(const block_q5_0 *
|
|
218
|
-
void wsp_dewsp_quantize_row_q5_1(const block_q5_1 *
|
|
219
|
-
void wsp_dewsp_quantize_row_q8_0(const block_q8_0 *
|
|
220
|
-
//void wsp_dewsp_quantize_row_q8_1(const block_q8_1 *
|
|
221
|
-
|
|
222
|
-
void wsp_dewsp_quantize_row_q2_K(const block_q2_K *
|
|
223
|
-
void wsp_dewsp_quantize_row_q3_K(const block_q3_K *
|
|
224
|
-
void wsp_dewsp_quantize_row_q4_K(const block_q4_K *
|
|
225
|
-
void wsp_dewsp_quantize_row_q5_K(const block_q5_K *
|
|
226
|
-
void wsp_dewsp_quantize_row_q6_K(const block_q6_K *
|
|
227
|
-
void wsp_dewsp_quantize_row_q8_K(const block_q8_K *
|
|
228
|
-
|
|
229
|
-
void
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
void
|
|
233
|
-
void
|
|
234
|
-
void
|
|
235
|
-
void
|
|
236
|
-
void
|
|
41
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
42
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
43
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
44
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
45
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
46
|
+
//WSP_GGML_API void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
47
|
+
|
|
48
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q2_K(const block_q2_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
49
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q3_K(const block_q3_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
50
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q4_K(const block_q4_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
51
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q5_K(const block_q5_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
52
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q6_K(const block_q6_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
53
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_q8_K(const block_q8_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
54
|
+
|
|
55
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_tq1_0(const block_tq1_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
56
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_tq2_0(const block_tq2_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
57
|
+
|
|
58
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq2_xxs(const block_iq2_xxs * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
59
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq2_xs (const block_iq2_xs * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
60
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq2_s (const block_iq2_s * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
61
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq3_xxs(const block_iq3_xxs * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
62
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq1_s (const block_iq1_s * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
63
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq1_m (const block_iq1_m * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
64
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq4_nl (const block_iq4_nl * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
65
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq4_xs (const block_iq4_xs * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
66
|
+
WSP_GGML_API void wsp_dewsp_quantize_row_iq3_s (const block_iq3_s * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
237
67
|
|
|
238
|
-
void wsp_ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
239
|
-
void wsp_ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
240
|
-
void wsp_ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
241
|
-
void wsp_ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
242
|
-
void wsp_ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
243
|
-
void wsp_ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
244
|
-
void wsp_ggml_vec_dot_iq2_xs_q8_K (int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
|
245
|
-
|
|
246
|
-
//
|
|
247
68
|
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
|
248
|
-
|
|
249
|
-
size_t
|
|
250
|
-
size_t
|
|
251
|
-
size_t
|
|
252
|
-
size_t
|
|
253
|
-
size_t
|
|
254
|
-
size_t
|
|
255
|
-
size_t
|
|
256
|
-
size_t
|
|
257
|
-
|
|
258
|
-
size_t
|
|
259
|
-
size_t
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
69
|
+
WSP_GGML_API size_t wsp_quantize_iq2_xxs(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
70
|
+
WSP_GGML_API size_t wsp_quantize_iq2_xs (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
71
|
+
WSP_GGML_API size_t wsp_quantize_iq2_s (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
72
|
+
WSP_GGML_API size_t wsp_quantize_iq3_xxs(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
73
|
+
WSP_GGML_API size_t wsp_quantize_iq1_s (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
74
|
+
WSP_GGML_API size_t wsp_quantize_iq1_m (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
75
|
+
WSP_GGML_API size_t wsp_quantize_iq4_nl (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
76
|
+
WSP_GGML_API size_t wsp_quantize_iq4_xs (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
77
|
+
WSP_GGML_API size_t wsp_quantize_iq3_s (const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
78
|
+
|
|
79
|
+
WSP_GGML_API size_t wsp_quantize_tq1_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
80
|
+
WSP_GGML_API size_t wsp_quantize_tq2_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
81
|
+
|
|
82
|
+
WSP_GGML_API size_t wsp_quantize_q2_K(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
83
|
+
WSP_GGML_API size_t wsp_quantize_q3_K(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
84
|
+
WSP_GGML_API size_t wsp_quantize_q4_K(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
85
|
+
WSP_GGML_API size_t wsp_quantize_q5_K(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
86
|
+
WSP_GGML_API size_t wsp_quantize_q6_K(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
87
|
+
WSP_GGML_API size_t wsp_quantize_q4_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
88
|
+
WSP_GGML_API size_t wsp_quantize_q4_1(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
89
|
+
WSP_GGML_API size_t wsp_quantize_q5_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
90
|
+
WSP_GGML_API size_t wsp_quantize_q5_1(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
91
|
+
WSP_GGML_API size_t wsp_quantize_q8_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
92
|
+
|
|
93
|
+
WSP_GGML_API void wsp_iq2xs_init_impl(enum wsp_ggml_type type);
|
|
94
|
+
WSP_GGML_API void wsp_iq2xs_free_impl(enum wsp_ggml_type type);
|
|
95
|
+
WSP_GGML_API void wsp_iq3xs_init_impl(int grid_size);
|
|
96
|
+
WSP_GGML_API void wsp_iq3xs_free_impl(int grid_size);
|
|
97
|
+
|
|
98
|
+
#ifdef __cplusplus
|
|
99
|
+
}
|
|
100
|
+
#endif
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#include "ggml-threading.h"
|
|
2
|
+
#include <mutex>
|
|
3
|
+
|
|
4
|
+
std::mutex wsp_ggml_critical_section_mutex;
|
|
5
|
+
|
|
6
|
+
void wsp_ggml_critical_section_start() {
|
|
7
|
+
wsp_ggml_critical_section_mutex.lock();
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
void wsp_ggml_critical_section_end(void) {
|
|
11
|
+
wsp_ggml_critical_section_mutex.unlock();
|
|
12
|
+
}
|
|
Binary file
|
|
Binary file
|