whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,197 @@
1
+ //
2
+ // whisper-encoder-impl.m
3
+ //
4
+ // This file was automatically generated and should not be edited.
5
+ //
6
+
7
+ #if !__has_feature(objc_arc)
8
+ #error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
9
+ #endif
10
+
11
+ #import "whisper-encoder-impl.h"
12
+
13
+ @implementation whisper_encoder_implInput
14
+
15
+ - (instancetype)initWithLogmel_data:(MLMultiArray *)logmel_data {
16
+ self = [super init];
17
+ if (self) {
18
+ _logmel_data = logmel_data;
19
+ }
20
+ return self;
21
+ }
22
+
23
+ - (NSSet<NSString *> *)featureNames {
24
+ return [NSSet setWithArray:@[@"logmel_data"]];
25
+ }
26
+
27
+ - (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
28
+ if ([featureName isEqualToString:@"logmel_data"]) {
29
+ return [MLFeatureValue featureValueWithMultiArray:self.logmel_data];
30
+ }
31
+ return nil;
32
+ }
33
+
34
+ @end
35
+
36
+ @implementation whisper_encoder_implOutput
37
+
38
+ - (instancetype)initWithOutput:(MLMultiArray *)output {
39
+ self = [super init];
40
+ if (self) {
41
+ _output = output;
42
+ }
43
+ return self;
44
+ }
45
+
46
+ - (NSSet<NSString *> *)featureNames {
47
+ return [NSSet setWithArray:@[@"output"]];
48
+ }
49
+
50
+ - (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
51
+ if ([featureName isEqualToString:@"output"]) {
52
+ return [MLFeatureValue featureValueWithMultiArray:self.output];
53
+ }
54
+ return nil;
55
+ }
56
+
57
+ @end
58
+
59
+ @implementation whisper_encoder_impl
60
+
61
+
62
+ /**
63
+ URL of the underlying .mlmodelc directory.
64
+ */
65
+ + (nullable NSURL *)URLOfModelInThisBundle {
66
+ NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"whisper_encoder_impl" ofType:@"mlmodelc"];
67
+ if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load whisper-encoder-impl.mlmodelc in the bundle resource"); return nil; }
68
+ return [NSURL fileURLWithPath:assetPath];
69
+ }
70
+
71
+
72
+ /**
73
+ Initialize whisper_encoder_impl instance from an existing MLModel object.
74
+
75
+ Usually the application does not use this initializer unless it makes a subclass of whisper_encoder_impl.
76
+ Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
77
+ */
78
+ - (instancetype)initWithMLModel:(MLModel *)model {
79
+ self = [super init];
80
+ if (!self) { return nil; }
81
+ _model = model;
82
+ if (_model == nil) { return nil; }
83
+ return self;
84
+ }
85
+
86
+
87
+ /**
88
+ Initialize whisper_encoder_impl instance with the model in this bundle.
89
+ */
90
+ - (nullable instancetype)init {
91
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
92
+ }
93
+
94
+
95
+ /**
96
+ Initialize whisper_encoder_impl instance with the model in this bundle.
97
+
98
+ @param configuration The model configuration object
99
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
100
+ */
101
+ - (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
102
+ return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
103
+ }
104
+
105
+
106
+ /**
107
+ Initialize whisper_encoder_impl instance from the model URL.
108
+
109
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
110
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
111
+ */
112
+ - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
113
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
114
+ if (model == nil) { return nil; }
115
+ return [self initWithMLModel:model];
116
+ }
117
+
118
+
119
+ /**
120
+ Initialize whisper_encoder_impl instance from the model URL.
121
+
122
+ @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
123
+ @param configuration The model configuration object
124
+ @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
125
+ */
126
+ - (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
127
+ MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
128
+ if (model == nil) { return nil; }
129
+ return [self initWithMLModel:model];
130
+ }
131
+
132
+
133
+ /**
134
+ Construct whisper_encoder_impl instance asynchronously with configuration.
135
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
136
+
137
+ @param configuration The model configuration
138
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
139
+ */
140
+ + (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
141
+ [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
142
+ configuration:configuration
143
+ completionHandler:handler];
144
+ }
145
+
146
+
147
+ /**
148
+ Construct whisper_encoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
149
+
150
+ Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
151
+
152
+ @param modelURL The model URL.
153
+ @param configuration The model configuration
154
+ @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
155
+ */
156
+ + (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
157
+ [MLModel loadContentsOfURL:modelURL
158
+ configuration:configuration
159
+ completionHandler:^(MLModel *model, NSError *error) {
160
+ if (model != nil) {
161
+ whisper_encoder_impl *typedModel = [[whisper_encoder_impl alloc] initWithMLModel:model];
162
+ handler(typedModel, nil);
163
+ } else {
164
+ handler(nil, error);
165
+ }
166
+ }];
167
+ }
168
+
169
+ - (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
170
+ return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
171
+ }
172
+
173
+ - (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
174
+ id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
175
+ if (!outFeatures) { return nil; }
176
+ return [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
177
+ }
178
+
179
+ - (nullable whisper_encoder_implOutput *)predictionFromLogmel_data:(MLMultiArray *)logmel_data error:(NSError * _Nullable __autoreleasing * _Nullable)error {
180
+ whisper_encoder_implInput *input_ = [[whisper_encoder_implInput alloc] initWithLogmel_data:logmel_data];
181
+ return [self predictionFromFeatures:input_ error:error];
182
+ }
183
+
184
+ - (nullable NSArray<whisper_encoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_encoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
185
+ id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
186
+ id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
187
+ if (!outBatch) { return nil; }
188
+ NSMutableArray<whisper_encoder_implOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
189
+ for (NSInteger i = 0; i < outBatch.count; i++) {
190
+ id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
191
+ whisper_encoder_implOutput * result = [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
192
+ [results addObject:result];
193
+ }
194
+ return results;
195
+ }
196
+
197
+ @end
@@ -0,0 +1,26 @@
1
+ // Wrapper of the Core ML Whisper Encoder model
2
+ //
3
+ // Code is derived from the work of Github user @wangchou
4
+ // ref: https://github.com/wangchou/callCoreMLFromCpp
5
+
6
+ #include <stdint.h>
7
+
8
+ #if __cplusplus
9
+ extern "C" {
10
+ #endif
11
+
12
+ struct whisper_coreml_context;
13
+
14
+ struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
15
+ void whisper_coreml_free(struct whisper_coreml_context * ctx);
16
+
17
+ void whisper_coreml_encode(
18
+ const whisper_coreml_context * ctx,
19
+ int64_t n_ctx,
20
+ int64_t n_mel,
21
+ float * mel,
22
+ float * out);
23
+
24
+ #if __cplusplus
25
+ }
26
+ #endif
@@ -0,0 +1,108 @@
1
+ #include "openvino/whisper-openvino-encoder.h"
2
+ #include "ggml.h"
3
+ #include <openvino/openvino.hpp>
4
+ #include <iostream>
5
+
6
+ struct whisper_openvino_context {
7
+ ov::InferRequest inferRequest;
8
+ };
9
+
10
+ struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
11
+ const char* device,
12
+ const char* cache_dir)
13
+ {
14
+ if (!path_model || !device) {
15
+ fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
16
+ return nullptr;
17
+ }
18
+
19
+ fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
20
+ __func__, path_model, device, cache_dir ? cache_dir : "(not set)");
21
+
22
+ whisper_openvino_context *context = new whisper_openvino_context;
23
+ try {
24
+ ov::Core core;
25
+
26
+ if (cache_dir) {
27
+ // enables caching of device-specific 'blobs' during core.compile_model
28
+ // routine. This speeds up calls to compile_model for successive runs.
29
+ core.set_property(ov::cache_dir(cache_dir));
30
+ }
31
+
32
+ //Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
33
+ std::shared_ptr<ov::Model> model = core.read_model(path_model);
34
+
35
+ // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
36
+ auto compiledModel = core.compile_model(model, device);
37
+
38
+ // From the compiled model object, create an infer request. This is the thing that we
39
+ // we will use later on to trigger inference execution.
40
+ context->inferRequest = compiledModel.create_infer_request();
41
+ }
42
+ catch (const std::exception& error) {
43
+ std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
44
+ delete context;
45
+ context = nullptr;
46
+ }
47
+
48
+ return context;
49
+ }
50
+
51
+ void whisper_openvino_free(struct whisper_openvino_context * ctx) {
52
+ if( ctx ) {
53
+ delete ctx;
54
+ }
55
+ }
56
+
57
+ int whisper_openvino_encode(
58
+ whisper_openvino_context* ctx,
59
+ ggml_tensor* mel,
60
+ ggml_tensor* out) {
61
+
62
+ if (!ctx || !mel || !out) {
63
+ fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
64
+ return 0;
65
+ }
66
+
67
+ if (ggml_n_dims(mel) != 2) {
68
+ fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
69
+ __func__, ggml_n_dims(mel));
70
+ return 0;
71
+ }
72
+
73
+ if (ggml_n_dims(out) != 2) {
74
+ fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
75
+ __func__, ggml_n_dims(out));
76
+ return 0;
77
+ }
78
+
79
+ try {
80
+
81
+ //wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
82
+ {
83
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
84
+ ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
85
+ ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
86
+ ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
87
+ ctx->inferRequest.set_input_tensor(input_tensor);
88
+ }
89
+
90
+ //wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
91
+ {
92
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
93
+ ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
94
+ ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
95
+ ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
96
+ ctx->inferRequest.set_output_tensor(out_tensor);
97
+ }
98
+
99
+ //run inference
100
+ ctx->inferRequest.infer();
101
+ }
102
+ catch (const std::exception& error) {
103
+ std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
104
+ return 0;
105
+ }
106
+
107
+ return 1;
108
+ }
@@ -0,0 +1,31 @@
1
+ // Wrapper of the OpenVINO Whisper Encoder model
2
+ //
3
+
4
+ #if __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ struct whisper_openvino_context;
9
+
10
+ // initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and
11
+ // path to cache_dir. Returns null upon failure.
12
+ struct whisper_openvino_context * whisper_openvino_init(const char * path_model,
13
+ const char * device,
14
+ const char * cache_dir);
15
+
16
+ // clean up a ctx previously returned from whisper_openvino_init()
17
+ void whisper_openvino_free(struct whisper_openvino_context * ctx);
18
+
19
+ struct ggml_tensor;
20
+
21
+ // Perform encode using OpenVINO.
22
+ // Returns 1 on success
23
+ // Returns 0 on failure
24
+ int whisper_openvino_encode(
25
+ whisper_openvino_context* ctx,
26
+ ggml_tensor* mel,
27
+ ggml_tensor* out);
28
+
29
+ #if __cplusplus
30
+ }
31
+ #endif