npm - whisper.rn - Versions diffs - 0.4.0-rc.9 → 0.4.0 - Mend

whisper.rn 0.4.0-rc.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

package/cpp/coreml/whisper-encoder-impl.m CHANGED Viewed

@@ -8,6 +8,7 @@
 #error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
 #endif
+#import "whisper-compat.h"
 #import "whisper-encoder-impl.h"
 @implementation whisper_encoder_implInput
@@ -76,10 +77,13 @@
     Such application may want to use `-[MLModel initWithContentsOfURL:configuration:error:]` and `+URLOfModelInThisBundle` to create a MLModel object to pass-in.
 */
 - (instancetype)initWithMLModel:(MLModel *)model {
+    if (model == nil) {
+        return nil;
+    }
     self = [super init];
-    if (!self) { return nil; }
-    _model = model;
-    if (_model == nil) { return nil; }
+    if (self != nil) {
+        _model = model;
+    }
     return self;
 }
@@ -176,6 +180,28 @@
     return [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
 }
+- (void)predictionFromFeatures:(whisper_encoder_implInput *)input completionHandler:(void (^)(whisper_encoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler {
+    [self.model predictionFromFeatures:input completionHandler:^(id<MLFeatureProvider> prediction, NSError *predictionError) {
+        if (prediction != nil) {
+            whisper_encoder_implOutput *output = [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[prediction featureValueForName:@"output"].multiArrayValue];
+            completionHandler(output, predictionError);
+        } else {
+            completionHandler(nil, predictionError);
+        }
+    }];
+}
+- (void)predictionFromFeatures:(whisper_encoder_implInput *)input options:(MLPredictionOptions *)options completionHandler:(void (^)(whisper_encoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler {
+    [self.model predictionFromFeatures:input options:options completionHandler:^(id<MLFeatureProvider> prediction, NSError *predictionError) {
+        if (prediction != nil) {
+            whisper_encoder_implOutput *output = [[whisper_encoder_implOutput alloc] initWithOutput:(MLMultiArray *)[prediction featureValueForName:@"output"].multiArrayValue];
+            completionHandler(output, predictionError);
+        } else {
+            completionHandler(nil, predictionError);
+        }
+    }];
+}
 - (nullable whisper_encoder_implOutput *)predictionFromLogmel_data:(MLMultiArray *)logmel_data error:(NSError * _Nullable __autoreleasing * _Nullable)error {
     whisper_encoder_implInput *input_ = [[whisper_encoder_implInput alloc] initWithLogmel_data:logmel_data];
     return [self predictionFromFeatures:input_ error:error];

package/cpp/ggml-alloc.c CHANGED Viewed

@@ -37,6 +37,7 @@ static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const str
     return true;
 }
+// ops that return true for this function must not use restrict pointers for their backend implementations
 static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
     switch (op) {
         case WSP_GGML_OP_SCALE:
@@ -52,8 +53,12 @@ static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
         case WSP_GGML_OP_LOG:
         case WSP_GGML_OP_UNARY:
         case WSP_GGML_OP_ROPE:
+        case WSP_GGML_OP_ROPE_BACK:
+        case WSP_GGML_OP_SILU_BACK:
         case WSP_GGML_OP_RMS_NORM:
+        case WSP_GGML_OP_RMS_NORM_BACK:
         case WSP_GGML_OP_SOFT_MAX:
+        case WSP_GGML_OP_SOFT_MAX_BACK:
             return true;
         default:
@@ -84,7 +89,7 @@ struct wsp_ggml_tallocr wsp_ggml_tallocr_new(wsp_ggml_backend_buffer_t buffer) {
     return talloc;
 }
-void wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor) {
+enum wsp_ggml_status wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor) {
     size_t size = wsp_ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
     size = WSP_GGML_PAD(size, talloc->alignment);
@@ -99,7 +104,7 @@ void wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_te
     assert(((uintptr_t)addr % talloc->alignment) == 0);
-    wsp_ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
+    return wsp_ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
 }
 // dynamic tensor allocator
@@ -466,18 +471,12 @@ static bool wsp_ggml_gallocr_is_own(wsp_ggml_gallocr_t galloc, struct wsp_ggml_t
     return wsp_ggml_gallocr_hash_get(galloc, t)->allocated;
 }
-static void wsp_ggml_gallocr_set_node_offset(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * node, int buffer_id, size_t offset) {
-    struct hash_node * hn = wsp_ggml_gallocr_hash_get(galloc, node);
-    hn->buffer_id = buffer_id;
-    hn->offset = offset;
-    hn->allocated = true;
-}
 static bool wsp_ggml_gallocr_is_allocated(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * t) {
     return t->data != NULL || wsp_ggml_gallocr_hash_get(galloc, t)->allocated;
 }
 static void wsp_ggml_gallocr_allocate_node(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * node, int buffer_id) {
+    WSP_GGML_ASSERT(buffer_id >= 0);
     struct hash_node * hn = wsp_ggml_gallocr_hash_get(galloc, node);
     if (!wsp_ggml_gallocr_is_allocated(galloc, node) && !wsp_ggml_is_view(node)) {
@@ -540,7 +539,6 @@ static void wsp_ggml_gallocr_allocate_node(wsp_ggml_gallocr_t galloc, struct wsp
         size_t offset = wsp_ggml_dyn_tallocr_alloc(alloc, size, node);
         hn->buffer_id = buffer_id;
         hn->offset = offset;
-        return;
     }
 }
@@ -816,7 +814,14 @@ static void wsp_ggml_gallocr_init_tensor(wsp_ggml_gallocr_t galloc, struct wsp_g
 }
 static bool wsp_ggml_gallocr_node_needs_realloc(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * node, struct tensor_alloc * talloc) {
-    size_t node_size = (node->data || node->view_src) ? 0 : wsp_ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
+    size_t node_size = 0;
+    if (!node->data && !node->view_src) {
+        // If we previously had data but don't now then reallocate
+        if (talloc->buffer_id < 0) {
+            return false;
+        }
+        node_size = wsp_ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
+    }
     return talloc->size_max >= node_size;
 }
@@ -931,42 +936,51 @@ size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc, int buffer_id
 // utils
+static void free_buffers(wsp_ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
+    for (size_t i = 0; i < *n_buffers; i++) {
+        wsp_ggml_backend_buffer_free((*buffers)[i]);
+    }
+    free(*buffers);
+}
 static bool alloc_tensor_range(struct wsp_ggml_context * ctx,
         struct wsp_ggml_tensor * first, struct wsp_ggml_tensor * last,
         wsp_ggml_backend_buffer_type_t buft, size_t size,
         wsp_ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
     wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_buft_alloc_buffer(buft, size);
     if (buffer == NULL) {
-#ifndef NDEBUG
-        WSP_GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, wsp_ggml_backend_buft_name(buft), size);
-#endif
-        for (size_t i = 0; i < *n_buffers; i++) {
-            wsp_ggml_backend_buffer_free((*buffers)[i]);
-        }
-        free(*buffers);
+        WSP_GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, wsp_ggml_backend_buft_name(buft), size);
+        free_buffers(buffers, n_buffers);
         return false;
     }
+    *buffers = realloc(*buffers, sizeof(wsp_ggml_backend_buffer_t) * (*n_buffers + 1));
+    (*buffers)[(*n_buffers)++] = buffer;
     struct wsp_ggml_tallocr tallocr = wsp_ggml_tallocr_new(buffer);
     for (struct wsp_ggml_tensor * t = first; t != last; t = wsp_ggml_get_next_tensor(ctx, t)) {
+        enum wsp_ggml_status status = WSP_GGML_STATUS_SUCCESS;
         if (t->data == NULL) {
             if (t->view_src == NULL) {
-                wsp_ggml_tallocr_alloc(&tallocr, t);
+                status = wsp_ggml_tallocr_alloc(&tallocr, t);
             } else if (t->buffer == NULL) {
-                wsp_ggml_backend_view_init(t);
+                status = wsp_ggml_backend_view_init(t);
             }
         } else {
             if (t->view_src != NULL && t->buffer == NULL) {
                 // view of a pre-allocated tensor
-                wsp_ggml_backend_view_init(t);
+                status = wsp_ggml_backend_view_init(t);
             }
         }
+        if (status != WSP_GGML_STATUS_SUCCESS) {
+            WSP_GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
+            free_buffers(buffers, n_buffers);
+            return false;
+        }
     }
-    *buffers = realloc(*buffers, sizeof(wsp_ggml_backend_buffer_t) * (*n_buffers + 1));
-    (*buffers)[(*n_buffers)++] = buffer;
     return true;
 }
@@ -987,19 +1001,7 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
             this_size = WSP_GGML_PAD(wsp_ggml_backend_buft_get_alloc_size(buft, t), alignment);
         }
-        if (this_size > max_size) {
-            WSP_GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
-                    __func__, t->name,
-                    wsp_ggml_backend_buft_name(buft),
-                    this_size, max_size);
-            for (size_t i = 0; i < n_buffers; i++) {
-                wsp_ggml_backend_buffer_free(buffers[i]);
-            }
-            free(buffers);
-            return NULL;
-        }
-        if ((cur_buf_size + this_size) > max_size) {
+        if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
             // allocate tensors in the current buffer
             if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
                 return NULL;

package/cpp/ggml-alloc.h CHANGED Viewed

@@ -19,7 +19,7 @@ struct wsp_ggml_tallocr {
 };
 WSP_GGML_API struct wsp_ggml_tallocr wsp_ggml_tallocr_new(wsp_ggml_backend_buffer_t buffer);
-WSP_GGML_API void                wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor);
+WSP_GGML_API enum wsp_ggml_status    wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor);
 // Graph allocator
 /*

package/cpp/ggml-backend-impl.h CHANGED Viewed

@@ -8,6 +8,8 @@
 extern "C" {
 #endif
+    #define WSP_GGML_BACKEND_API_VERSION 1
     //
     // Backend buffer type
     //
@@ -22,7 +24,7 @@ extern "C" {
         size_t                (*get_max_size)  (wsp_ggml_backend_buffer_type_t buft);
         // (optional) data size needed to allocate the tensor, including padding (defaults to wsp_ggml_nbytes)
         size_t                (*get_alloc_size)(wsp_ggml_backend_buffer_type_t buft, const struct wsp_ggml_tensor * tensor);
-        // (optional) check if tensor data is in host memory (defaults to false)
+        // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
         bool                  (*is_host)       (wsp_ggml_backend_buffer_type_t buft);
     };
@@ -37,13 +39,12 @@ extern "C" {
     //
     struct wsp_ggml_backend_buffer_i {
-        const char * (*get_name)     (wsp_ggml_backend_buffer_t buffer);
         // (optional) free the buffer
         void         (*free_buffer)  (wsp_ggml_backend_buffer_t buffer);
         // base address of the buffer
         void *       (*get_base)     (wsp_ggml_backend_buffer_t buffer);
         // (optional) initialize a tensor in the buffer (eg. add tensor extras)
-        void         (*init_tensor)  (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
+        enum wsp_ggml_status (*init_tensor)(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
         // tensor data access
         void         (*memset_tensor)(wsp_ggml_backend_buffer_t buffer,       struct wsp_ggml_tensor * tensor,     uint8_t value, size_t offset, size_t size);
         void         (*set_tensor)   (wsp_ggml_backend_buffer_t buffer,       struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
@@ -64,20 +65,20 @@ extern "C" {
         enum wsp_ggml_backend_buffer_usage usage;
     };
-    wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
+    WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
                    wsp_ggml_backend_buffer_type_t buft,
             struct wsp_ggml_backend_buffer_i      iface,
                    void *                     context,
                    size_t                     size);
     // do not use directly, use wsp_ggml_backend_tensor_copy instead
-    bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
+    WSP_GGML_API bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
     // multi-buffer
     // buffer that contains a collection of buffers
-    wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
-    bool                  wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
-    void                  wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
+    WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
+    WSP_GGML_API bool                  wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
+    WSP_GGML_API void                  wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
     //
     // Backend (stream)
@@ -88,19 +89,16 @@ extern "C" {
         void (*free)(wsp_ggml_backend_t backend);
-        // Will be moved to the device interface
-        // buffer allocation
-        wsp_ggml_backend_buffer_type_t (*get_default_buffer_type)(wsp_ggml_backend_t backend);
         // (optional) asynchronous tensor data access
         void (*set_tensor_async)(wsp_ggml_backend_t backend,       struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
         void (*get_tensor_async)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * tensor,       void * data, size_t offset, size_t size);
         bool (*cpy_tensor_async)(wsp_ggml_backend_t backend_src, wsp_ggml_backend_t backend_dst, const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
-        // (optional) complete all pending operations
+        // (optional) complete all pending operations (required if the backend supports async operations)
         void (*synchronize)(wsp_ggml_backend_t backend);
-        // (optional) compute graph with a plan (not used currently)
+        // (optional) graph plans (not used currently)
+        // compute graph with a plan
         wsp_ggml_backend_graph_plan_t (*graph_plan_create) (wsp_ggml_backend_t backend, const struct wsp_ggml_cgraph * cgraph);
         void                      (*graph_plan_free)   (wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
         // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
@@ -111,13 +109,6 @@ extern "C" {
         // compute graph (always async if supported by the backend)
         enum wsp_ggml_status          (*graph_compute)     (wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
-        // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
-        //            new backends should implement the device interface instead
-        // These functions are being moved to the device interface
-        bool (*supports_op)  (wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * op);
-        bool (*supports_buft)(wsp_ggml_backend_t backend, wsp_ggml_backend_buffer_type_t buft);
-        bool (*offload_op)   (wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * op);
         // (optional) event synchronization
         // record an event on this stream
         void (*event_record)(wsp_ggml_backend_t backend, wsp_ggml_backend_event_t event);
@@ -210,17 +201,54 @@ extern "C" {
     };
     struct wsp_ggml_backend_reg {
-        // int api_version; // TODO: for dynamic loading
+        int api_version; // initialize to WSP_GGML_BACKEND_API_VERSION
         struct wsp_ggml_backend_reg_i iface;
         void * context;
     };
     // Internal backend registry API
-    void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
-    void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
-    // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
-    // typedef wsp_ggml_backend_register_t * (*wsp_ggml_backend_init)(void);
+    WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
+    // Add backend dynamic loading support to the backend
+    // Initialize the backend
+    typedef wsp_ggml_backend_reg_t (*wsp_ggml_backend_init_t)(void);
+    // Optional: obtain a score for the backend based on the system configuration
+    // Higher scores are preferred, 0 means the backend is not supported in the current system
+    typedef int                (*wsp_ggml_backend_score_t)(void);
+#ifdef WSP_GGML_BACKEND_DL
+#    ifdef __cplusplus
+#        define WSP_GGML_BACKEND_DL_IMPL(reg_fn)                             \
+            extern "C" {                                                 \
+            WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void); \
+            }                                                            \
+            wsp_ggml_backend_reg_t wsp_ggml_backend_init(void) {                 \
+                return reg_fn();                                         \
+            }
+#        define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn)       \
+            extern "C" {                                   \
+            WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void); \
+            }                                              \
+            int wsp_ggml_backend_score(void) {                 \
+                return score_fn();                         \
+            }
+#    else
+#        define WSP_GGML_BACKEND_DL_IMPL(reg_fn)                              \
+            WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void);  \
+            wsp_ggml_backend_reg_t                  wsp_ggml_backend_init(void) { \
+                return reg_fn();                                          \
+            }
+#        define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn)        \
+            WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void);  \
+            int                  wsp_ggml_backend_score(void) { \
+                return score_fn();                          \
+            }
+#    endif
+#else
+#    define WSP_GGML_BACKEND_DL_IMPL(reg_fn)
+#    define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn)
+#endif
 #ifdef  __cplusplus
 }