RubyGems - whispercpp - Versions diffs - 1.3.0 → 1.3.1 - Mend

whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

checksums.yaml +4 -4
data/.gitignore +5 -0
data/LICENSE +1 -1
data/README.md +165 -434
data/Rakefile +60 -11
data/ext/.gitignore +13 -0
data/ext/cpu.mk +9 -0
data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
data/ext/extconf.rb +185 -16
data/ext/ggml/include/ggml-alloc.h +76 -0
data/ext/ggml/include/ggml-backend.h +352 -0
data/ext/ggml/include/ggml-blas.h +25 -0
data/ext/ggml/include/ggml-cann.h +123 -0
data/ext/ggml/include/ggml-cpp.h +38 -0
data/ext/ggml/include/ggml-cpu.h +135 -0
data/ext/ggml/include/ggml-cuda.h +47 -0
data/ext/ggml/include/ggml-kompute.h +50 -0
data/ext/ggml/include/ggml-metal.h +66 -0
data/ext/ggml/include/ggml-opencl.h +26 -0
data/ext/ggml/include/ggml-opt.h +216 -0
data/ext/ggml/include/ggml-rpc.h +28 -0
data/ext/ggml/include/ggml-sycl.h +49 -0
data/ext/ggml/include/ggml-vulkan.h +31 -0
data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
data/ext/ggml/src/ggml-alloc.c +1037 -0
data/ext/ggml/src/ggml-amx/common.h +94 -0
data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
data/ext/ggml/src/ggml-amx/mmq.h +17 -0
data/ext/ggml/src/ggml-backend-impl.h +256 -0
data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
data/ext/ggml/src/ggml-backend.cpp +1999 -0
data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
data/ext/ggml/src/ggml-cann/common.h +286 -0
data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
data/ext/ggml/src/ggml-common.h +1853 -0
data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
data/ext/ggml/src/ggml-impl.h +556 -0
data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
data/ext/ggml/src/ggml-opt.cpp +854 -0
data/ext/ggml/src/ggml-quants.c +5238 -0
data/ext/ggml/src/ggml-quants.h +100 -0
data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
data/ext/ggml/src/ggml-threading.cpp +12 -0
data/ext/ggml/src/ggml-threading.h +14 -0
data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
data/ext/ggml/src/ggml.c +7694 -0
data/ext/{whisper.h → include/whisper.h} +23 -22
data/ext/metal-embed.mk +17 -0
data/ext/metal.mk +6 -0
data/ext/ruby_whisper.cpp +1492 -9
data/ext/ruby_whisper.h +10 -0
data/ext/scripts/get-flags.mk +38 -0
data/ext/src/coreml/whisper-decoder-impl.h +146 -0
data/ext/src/coreml/whisper-decoder-impl.m +201 -0
data/ext/src/coreml/whisper-encoder-impl.h +142 -0
data/ext/src/coreml/whisper-encoder-impl.m +197 -0
data/ext/src/coreml/whisper-encoder.h +26 -0
data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
data/extsources.rb +6 -0
data/lib/whisper/model/uri.rb +157 -0
data/lib/whisper.rb +2 -0
data/tests/helper.rb +7 -0
data/tests/jfk_reader/.gitignore +5 -0
data/tests/jfk_reader/extconf.rb +3 -0
data/tests/jfk_reader/jfk_reader.c +68 -0
data/tests/test_callback.rb +160 -0
data/tests/test_error.rb +20 -0
data/tests/test_model.rb +71 -0
data/tests/test_package.rb +31 -0
data/tests/test_params.rb +160 -0
data/tests/test_segment.rb +83 -0
data/tests/test_whisper.rb +211 -123
data/whispercpp.gemspec +36 -0
metadata +137 -11
data/ext/ggml.c +0 -21755

data/ext/src/coreml/whisper-encoder-impl.m ADDED Viewed

@@ -0,0 +1,197 @@
+//
+// whisper-encoder-impl.m
+//
+// This file was automatically generated and should not be edited.
+//
+#if !__has_feature(objc_arc)
+#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
+#endif
+#import "whisper-encoder-impl.h"
+@implementation whisper_encoder_implInput
+- (instancetype)initWithLogmel_data:(MLMultiArray *)logmel_data {
+    self = [super init];
+    if (self) {
+        _logmel_data = logmel_data;
+    }
+    return self;
+}
+- (NSSet<NSString *> *)featureNames {
+    return [NSSet setWithArray:@[@"logmel_data"]];
+}
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+    if ([featureName isEqualToString:@"logmel_data"]) {
+        return [MLFeatureValue featureValueWithMultiArray:self.logmel_data];
+    }
+    return nil;
+}
+@end
+@implementation whisper_encoder_implOutput
+- (instancetype)initWithOutput:(MLMultiArray *)output {
+    self = [super init];
+    if (self) {
+        _output = output;
+    }
+    return self;
+}
+- (NSSet<NSString *> *)featureNames {
+    return [NSSet setWithArray:@[@"output"]];
+}
+- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
+    if ([featureName isEqualToString:@"output"]) {
+        return [MLFeatureValue featureValueWithMultiArray:self.output];
+    }
+    return nil;
+}
+@end
+@implementation whisper_encoder_impl
+/**
+    URL of the underlying .mlmodelc directory.
+*/
++ (nullable NSURL *)URLOfModelInThisBundle {
+    NSString *assetPath = [[NSBundle bundleForClass:[self class]] pathForResource:@"whisper_encoder_impl" ofType:@"mlmodelc"];
+    if (nil == assetPath) { os_log_error(OS_LOG_DEFAULT, "Could not load whisper-encoder-impl.mlmodelc in the bundle resource"); return nil; }
+    return [NSURL fileURLWithPath:assetPath];
+}
+/**
+    Initialize whisper_encoder_impl instance from an existing MLModel object.
+    Usually the application does not use this initializer unless it makes a subclass of whisper_encoder_impl.
+    Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
+*/
+- (instancetype)initWithMLModel:(MLModel *)model {
+    self = [super init];
+    if (!self) { return nil; }
+    _model = model;
+    if (_model == nil) { return nil; }
+    return self;
+}
+/**
+    Initialize whisper_encoder_impl instance with the model in this bundle.
+*/
+- (nullable instancetype)init {
+    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle error:nil];
+}
+/**
+    Initialize whisper_encoder_impl instance with the model in this bundle.
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithConfiguration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    return [self initWithContentsOfURL:(NSURL * _Nonnull)self.class.URLOfModelInThisBundle configuration:configuration error:error];
+}
+/**
+    Initialize whisper_encoder_impl instance from the model URL.
+    @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    MLModel *model = [MLModel modelWithContentsOfURL:modelURL error:error];
+    if (model == nil) { return nil; }
+    return [self initWithMLModel:model];
+}
+/**
+    Initialize whisper_encoder_impl instance from the model URL.
+    @param modelURL URL to the .mlmodelc directory for whisper_encoder_impl.
+    @param configuration The model configuration object
+    @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
+*/
+- (nullable instancetype)initWithContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    MLModel *model = [MLModel modelWithContentsOfURL:modelURL configuration:configuration error:error];
+    if (model == nil) { return nil; }
+    return [self initWithMLModel:model];
+}
+/**
+    Construct whisper_encoder_impl instance asynchronously with configuration.
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
+    [self loadContentsOfURL:(NSURL * _Nonnull)[self URLOfModelInThisBundle]
+              configuration:configuration
+          completionHandler:handler];
+}
+/**
+    Construct whisper_encoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
+    Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
+    @param modelURL The model URL.
+    @param configuration The model configuration
+    @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_encoder_impl instance or NSError object.
+*/
++ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_encoder_impl * _Nullable model, NSError * _Nullable error))handler {
+    [MLModel loadContentsOfURL:modelURL
+                 configuration:configuration
+             completionHandler:^(MLModel *model, NSError *error) {
+        if (model != nil) {
+            whisper_encoder_impl *typedModel = [[whisper_encoder_impl alloc] initWithMLModel:model];
+            handler(typedModel, nil);
+        } else {
+            handler(nil, error);
+        }
+    }];
+}
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    return [self predictionFromFeatures:input options:[[MLPredictionOptions alloc] init] error:error];
+}
+- (nullable whisper_encoder_implOutput *)predictionFromFeatures:(whisper_encoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    id<MLFeatureProvider> outFeatures = [self.model predictionFromFeatures:input options:options error:error];
+    if (!outFeatures) { return nil; }
+    return [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
+}
+- (nullable whisper_encoder_implOutput *)predictionFromLogmel_data:(MLMultiArray *)logmel_data error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    whisper_encoder_implInput *input_ = [[whisper_encoder_implInput alloc] initWithLogmel_data:logmel_data];
+    return [self predictionFromFeatures:input_ error:error];
+}
+- (nullable NSArray<whisper_encoder_implOutput *> *)predictionsFromInputs:(NSArray<whisper_encoder_implInput*> *)inputArray options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error {
+    id<MLBatchProvider> inBatch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray:inputArray];
+    id<MLBatchProvider> outBatch = [self.model predictionsFromBatch:inBatch options:options error:error];
+    if (!outBatch) { return nil; }
+    NSMutableArray<whisper_encoder_implOutput*> *results = [NSMutableArray arrayWithCapacity:(NSUInteger)outBatch.count];
+    for (NSInteger i = 0; i < outBatch.count; i++) {
+        id<MLFeatureProvider> resultProvider = [outBatch featuresAtIndex:i];
+        whisper_encoder_implOutput * result = [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[resultProvider featureValueForName:@"output"].multiArrayValue];
+        [results addObject:result];
+    }
+    return results;
+}
+@end

data/ext/src/coreml/whisper-encoder.h ADDED Viewed

@@ -0,0 +1,26 @@
+// Wrapper of the Core ML Whisper Encoder model
+//
+// Code is derived from the work of Github user @wangchou
+// ref: https://github.com/wangchou/callCoreMLFromCpp
+#include <stdint.h>
+#if __cplusplus
+extern "C" {
+#endif
+struct whisper_coreml_context;
+struct whisper_coreml_context * whisper_coreml_init(const char * path_model);
+void whisper_coreml_free(struct whisper_coreml_context * ctx);
+void whisper_coreml_encode(
+        const whisper_coreml_context * ctx,
+                             int64_t   n_ctx,
+                             int64_t   n_mel,
+                               float * mel,
+                               float * out);
+#if __cplusplus
+}
+#endif

data/ext/src/openvino/whisper-openvino-encoder.cpp ADDED Viewed

@@ -0,0 +1,108 @@
+#include "openvino/whisper-openvino-encoder.h"
+#include "ggml.h"
+#include <openvino/openvino.hpp>
+#include <iostream>
+struct whisper_openvino_context {
+    ov::InferRequest inferRequest;
+};
+struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
+    const char* device,
+    const char* cache_dir)
+{
+    if (!path_model || !device) {
+        fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
+        return nullptr;
+    }
+    fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
+        __func__, path_model, device, cache_dir ? cache_dir : "(not set)");
+	whisper_openvino_context *context = new whisper_openvino_context;
+    try {
+        ov::Core core;
+        if (cache_dir) {
+            // enables caching of device-specific 'blobs' during core.compile_model
+            // routine. This speeds up calls to compile_model for successive runs.
+            core.set_property(ov::cache_dir(cache_dir));
+        }
+        //Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
+        std::shared_ptr<ov::Model> model = core.read_model(path_model);
+        // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
+        auto compiledModel = core.compile_model(model, device);
+        // From the compiled model object, create an infer request. This is the thing that we
+        //  we will use later on to trigger inference execution.
+        context->inferRequest = compiledModel.create_infer_request();
+    }
+    catch (const std::exception& error) {
+        std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
+        delete context;
+        context = nullptr;
+    }
+    return context;
+}
+void whisper_openvino_free(struct whisper_openvino_context * ctx) {
+    if( ctx ) {
+        delete ctx;
+    }
+}
+int whisper_openvino_encode(
+    whisper_openvino_context* ctx,
+    ggml_tensor* mel,
+    ggml_tensor* out) {
+    if (!ctx || !mel || !out) {
+        fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
+        return 0;
+    }
+    if (ggml_n_dims(mel) != 2) {
+        fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
+            __func__, ggml_n_dims(mel));
+        return 0;
+    }
+    if (ggml_n_dims(out) != 2) {
+        fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
+            __func__, ggml_n_dims(out));
+        return 0;
+    }
+    try {
+        //wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
+        {
+            // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
+            ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
+            ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
+            ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
+            ctx->inferRequest.set_input_tensor(input_tensor);
+        }
+        //wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
+        {
+            // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
+            ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
+            ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
+            ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
+            ctx->inferRequest.set_output_tensor(out_tensor);
+        }
+        //run inference
+        ctx->inferRequest.infer();
+    }
+    catch (const std::exception& error) {
+        std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
+        return 0;
+    }
+    return 1;
+}

data/ext/src/openvino/whisper-openvino-encoder.h ADDED Viewed

@@ -0,0 +1,31 @@
+// Wrapper of the OpenVINO Whisper Encoder model
+//
+#if __cplusplus
+extern "C" {
+#endif
+struct whisper_openvino_context;
+// initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and
+// path to cache_dir. Returns null upon failure.
+struct whisper_openvino_context * whisper_openvino_init(const char * path_model,
+                                                        const char * device,
+                                                        const char * cache_dir);
+// clean up a ctx previously returned from whisper_openvino_init()
+void whisper_openvino_free(struct whisper_openvino_context * ctx);
+struct ggml_tensor;
+// Perform encode using OpenVINO.
+// Returns 1 on success
+// Returns 0 on failure
+int whisper_openvino_encode(
+    whisper_openvino_context* ctx,
+    ggml_tensor* mel,
+    ggml_tensor* out);
+#if __cplusplus
+}
+#endif