whisper.rn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/android/src/main/jni.cpp +12 -3
  2. package/cpp/ggml-alloc.c +49 -18
  3. package/cpp/ggml-backend-impl.h +0 -3
  4. package/cpp/ggml-backend-reg.cpp +8 -0
  5. package/cpp/ggml-backend.cpp +0 -2
  6. package/cpp/ggml-backend.h +2 -0
  7. package/cpp/ggml-cpu/amx/amx.cpp +1 -0
  8. package/cpp/ggml-cpu/arch/arm/quants.c +428 -26
  9. package/cpp/ggml-cpu/ggml-cpu-impl.h +4 -2
  10. package/cpp/ggml-cpu/ggml-cpu.c +67 -24
  11. package/cpp/ggml-cpu/ops.cpp +489 -364
  12. package/cpp/ggml-cpu/ops.h +4 -4
  13. package/cpp/ggml-cpu/repack.cpp +143 -29
  14. package/cpp/ggml-cpu/simd-mappings.h +25 -25
  15. package/cpp/ggml-cpu/unary-ops.cpp +151 -0
  16. package/cpp/ggml-cpu/unary-ops.h +7 -0
  17. package/cpp/ggml-cpu/vec.cpp +83 -0
  18. package/cpp/ggml-cpu/vec.h +20 -8
  19. package/cpp/ggml-impl.h +67 -2
  20. package/cpp/ggml-metal/ggml-metal-common.cpp +2 -2
  21. package/cpp/ggml-metal/ggml-metal-context.m +5 -6
  22. package/cpp/ggml-metal/ggml-metal-device.cpp +300 -14
  23. package/cpp/ggml-metal/ggml-metal-device.h +26 -1
  24. package/cpp/ggml-metal/ggml-metal-device.m +243 -28
  25. package/cpp/ggml-metal/ggml-metal-impl.h +177 -9
  26. package/cpp/ggml-metal/ggml-metal-ops.cpp +843 -157
  27. package/cpp/ggml-metal/ggml-metal-ops.h +8 -0
  28. package/cpp/ggml-metal/ggml-metal.cpp +8 -3
  29. package/cpp/ggml-metal/ggml-metal.metal +12436 -0
  30. package/cpp/ggml.c +317 -4
  31. package/cpp/ggml.h +139 -0
  32. package/cpp/jsi/RNWhisperJSI.cpp +7 -2
  33. package/cpp/rn-whisper.h +1 -0
  34. package/cpp/whisper.cpp +8 -2
  35. package/ios/RNWhisperContext.mm +3 -1
  36. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  37. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  38. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +67 -2
  39. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +139 -0
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  45. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +67 -2
  47. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +139 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  53. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  54. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  55. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +67 -2
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +139 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  62. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +67 -2
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +139 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  70. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  71. package/lib/commonjs/version.json +1 -1
  72. package/lib/module/NativeRNWhisper.js.map +1 -1
  73. package/lib/module/version.json +1 -1
  74. package/lib/typescript/NativeRNWhisper.d.ts +2 -0
  75. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  76. package/package.json +1 -1
  77. package/src/NativeRNWhisper.ts +2 -0
  78. package/src/version.json +1 -1
  79. package/whisper-rn.podspec +1 -1
  80. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  81. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  82. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  83. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  84. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  85. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
@@ -44,6 +44,7 @@ void wsp_ggml_vec_dot_bf16(int n, float * WSP_GGML_RESTRICT s, size_t bs, wsp_gg
44
44
  void wsp_ggml_vec_dot_f16(int n, float * WSP_GGML_RESTRICT s, size_t bs, wsp_ggml_fp16_t * WSP_GGML_RESTRICT x, size_t bx, wsp_ggml_fp16_t * WSP_GGML_RESTRICT y, size_t by, int nrc);
45
45
 
46
46
  void wsp_ggml_vec_silu_f32(const int n, float * y, const float * x);
47
+ wsp_ggml_float wsp_ggml_vec_cvar_f32(const int n, float * y, const float * x, const float mean); //it will also center y ( y = y - mean )
47
48
  wsp_ggml_float wsp_ggml_vec_soft_max_f32(const int n, float * y, const float * x, float max);
48
49
  wsp_ggml_float wsp_ggml_vec_log_soft_max_f32(const int n, float * y, const float * x, float max);
49
50
 
@@ -143,14 +144,14 @@ inline static void wsp_ggml_vec_dot_f16_unroll(const int n, const int xs, float
143
144
  for (int i = 0; i < np; i += wsp_ggml_f16_step) {
144
145
  ay1 = WSP_GGML_F16x_VEC_LOAD(y + i + 0 * wsp_ggml_f16_epr, 0); // 8 elements
145
146
 
146
- ax1 = WSP_GGML_F16x_VEC_LOAD(x[0] + i + 0*wsp_ggml_f16_epr, 0); // 8 elemnst
147
+ ax1 = WSP_GGML_F16x_VEC_LOAD(x[0] + i + 0*wsp_ggml_f16_epr, 0); // 8 elements
147
148
  sum_00 = WSP_GGML_F16x_VEC_FMA(sum_00, ax1, ay1); // sum_00 = sum_00+ax1*ay1
148
149
  ax1 = WSP_GGML_F16x_VEC_LOAD(x[1] + i + 0*wsp_ggml_f16_epr, 0); // 8 elements
149
150
  sum_10 = WSP_GGML_F16x_VEC_FMA(sum_10, ax1, ay1);
150
151
 
151
152
  ay2 = WSP_GGML_F16x_VEC_LOAD(y + i + 1 * wsp_ggml_f16_epr, 1); // next 8 elements
152
153
 
153
- ax2 = WSP_GGML_F16x_VEC_LOAD(x[0] + i + 1*wsp_ggml_f16_epr, 1); // next 8 ekements
154
+ ax2 = WSP_GGML_F16x_VEC_LOAD(x[0] + i + 1*wsp_ggml_f16_epr, 1); // next 8 elements
154
155
  sum_01 = WSP_GGML_F16x_VEC_FMA(sum_01, ax2, ay2);
155
156
  ax2 = WSP_GGML_F16x_VEC_LOAD(x[1] + i + 1*wsp_ggml_f16_epr, 1);
156
157
  sum_11 = WSP_GGML_F16x_VEC_FMA(sum_11, ax2, ay2);
@@ -159,7 +160,7 @@ inline static void wsp_ggml_vec_dot_f16_unroll(const int n, const int xs, float
159
160
 
160
161
  ax3 = WSP_GGML_F16x_VEC_LOAD(x[0] + i + 2*wsp_ggml_f16_epr, 2);
161
162
  sum_02 = WSP_GGML_F16x_VEC_FMA(sum_02, ax3, ay3);
162
- ax1 = WSP_GGML_F16x_VEC_LOAD(x[1] + i + 2*wsp_ggml_f16_epr, 2);
163
+ ax3 = WSP_GGML_F16x_VEC_LOAD(x[1] + i + 2*wsp_ggml_f16_epr, 2);
163
164
  sum_12 = WSP_GGML_F16x_VEC_FMA(sum_12, ax3, ay3);
164
165
 
165
166
  ay4 = WSP_GGML_F16x_VEC_LOAD(y + i + 3 * wsp_ggml_f16_epr, 3);
@@ -654,11 +655,11 @@ inline static void wsp_ggml_vec_scale_f32(const int n, float * y, const float
654
655
  }
655
656
  // leftovers
656
657
  // maximum number of leftover elements will be less that wsp_ggml_f32_epr. Apply predicated svmad on available elements only
657
- if (np < n) {
658
- svbool_t pg = svwhilelt_b32(np, n);
659
- ay1 = svld1_f32(pg, y + np);
658
+ for (int i = np; i < n; i += wsp_ggml_f32_epr) {
659
+ svbool_t pg = svwhilelt_b32(i, n);
660
+ ay1 = svld1_f32(pg, y + i);
660
661
  ay1 = svmul_f32_m(pg, ay1, vx);
661
- svst1_f32(pg, y + np, ay1);
662
+ svst1_f32(pg, y + i, ay1);
662
663
  }
663
664
  #elif defined(__riscv_v_intrinsic)
664
665
  for (int i = 0, avl; i < n; i += avl) {
@@ -819,7 +820,8 @@ inline static void wsp_ggml_vec_tanh_f16 (const int n, wsp_ggml_fp16_t * y, cons
819
820
  inline static void wsp_ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
820
821
  inline static void wsp_ggml_vec_elu_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
821
822
  for (int i = 0; i < n; ++i) {
822
- y[i] = WSP_GGML_CPU_FP32_TO_FP16(expm1f(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
823
+ const float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
824
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : expm1f(v));
823
825
  }
824
826
  }
825
827
  inline static void wsp_ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
@@ -1414,6 +1416,16 @@ inline static void wsp_ggml_vec_sum_f32(const int n, float * s, const float * x)
1414
1416
  #endif
1415
1417
  }
1416
1418
 
1419
+ inline static void wsp_ggml_vec_cumsum_f32(const int n, float * y, const float * x) {
1420
+ for (int i = 0; i < n; ++i) {
1421
+ if (i == 0) {
1422
+ y[i] = x[i];
1423
+ } else {
1424
+ y[i] = y[i - 1] + x[i];
1425
+ }
1426
+ }
1427
+ }
1428
+
1417
1429
  inline static void wsp_ggml_vec_sum_f32_ggf(const int n, wsp_ggml_float * s, const float * x) {
1418
1430
  wsp_ggml_float sum = 0.0;
1419
1431
  for (int i = 0; i < n; ++i) {
package/cpp/ggml-impl.h CHANGED
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
102
102
  }
103
103
  }
104
104
 
105
+ static inline float wsp_ggml_compute_softplus_f32(float input) {
106
+ return (input > 20.0f) ? input : logf(1 + expf(input));
107
+ }
105
108
  //
106
109
  // logging
107
110
  //
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
562
565
  #define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
563
566
  #define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
564
567
 
568
+ static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
569
+ const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
570
+
571
+ size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
572
+ if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
573
+ return 0;
574
+ }
575
+ return cgraph->use_counts[hash_pos];
576
+ }
577
+
565
578
  // return true if the node's results are only used by N other nodes
566
579
  // and can be fused into their calculations.
567
580
  static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
568
581
  const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
569
582
 
570
583
  // check the use count against how many we're replacing
571
- size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
572
- if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
584
+ if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
573
585
  return false;
574
586
  }
575
587
 
@@ -635,11 +647,42 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
635
647
  return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
636
648
  }
637
649
 
650
+ WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
651
+ const int * node_idxs,
652
+ int count,
653
+ const enum wsp_ggml_op * ops,
654
+ const int * outputs,
655
+ int num_outputs);
656
+
657
+ // Returns true if the subgraph formed by {node_idxs} can be fused
658
+ // checks whethers all nodes which are not part of outputs can be elided
659
+ // by checking if their num_uses are confined to the subgraph
660
+ static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
661
+ int node_idx,
662
+ int count,
663
+ const enum wsp_ggml_op * ops,
664
+ const int * outputs,
665
+ int num_outputs) {
666
+ WSP_GGML_ASSERT(count < 32);
667
+ if (node_idx + count > cgraph->n_nodes) {
668
+ return false;
669
+ }
670
+
671
+ int idxs[32];
672
+
673
+ for (int i = 0; i < count; ++i) {
674
+ idxs[i] = node_idx + i;
675
+ }
676
+
677
+ return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
678
+ }
679
+
638
680
  #ifdef __cplusplus
639
681
  }
640
682
  #endif
641
683
 
642
684
  #ifdef __cplusplus
685
+ #include <array>
643
686
  #include <initializer_list>
644
687
  #include <vector>
645
688
 
@@ -648,6 +691,28 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
648
691
  return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
649
692
  }
650
693
 
694
+ inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
695
+ int start_idx,
696
+ std::initializer_list<enum wsp_ggml_op> ops,
697
+ std::initializer_list<int> outputs = {}) {
698
+ return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
699
+ }
700
+
701
+ // Return true if the edges in the graph match expectations.
702
+ inline bool wsp_ggml_check_edges(const struct wsp_ggml_cgraph * cgraph,
703
+ int start_idx,
704
+ std::initializer_list<std::array<int, 3>> edges) {
705
+ for (const auto & edge : edges) {
706
+ int dst_node = edge[0];
707
+ int src_idx = edge[1];
708
+ int src_node = edge[2];
709
+ if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
710
+ return false;
711
+ }
712
+ }
713
+ return true;
714
+ }
715
+
651
716
  // expose GGUF internals for test code
652
717
  WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
653
718
  WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
@@ -112,7 +112,7 @@ static bool wsp_ggml_mem_ranges_add_dst(wsp_ggml_mem_ranges_t mrs, const wsp_ggm
112
112
  }
113
113
 
114
114
  bool wsp_ggml_mem_ranges_add(wsp_ggml_mem_ranges_t mrs, const wsp_ggml_tensor * tensor) {
115
- for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
115
+ for (int i = 0; i < WSP_GGML_MAX_SRC; i++) {
116
116
  if (tensor->src[i]) {
117
117
  wsp_ggml_mem_ranges_add_src(mrs, tensor->src[i]);
118
118
  }
@@ -173,7 +173,7 @@ static bool wsp_ggml_mem_ranges_check_dst(wsp_ggml_mem_ranges_t mrs, const wsp_g
173
173
  }
174
174
 
175
175
  bool wsp_ggml_mem_ranges_check(wsp_ggml_mem_ranges_t mrs, const wsp_ggml_tensor * tensor) {
176
- for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
176
+ for (int i = 0; i < WSP_GGML_MAX_SRC; i++) {
177
177
  if (tensor->src[i]) {
178
178
  if (!wsp_ggml_mem_ranges_check_src(mrs, tensor->src[i])) {
179
179
  return false;
@@ -35,7 +35,6 @@ struct wsp_ggml_metal {
35
35
  // additional, inference-time compiled pipelines
36
36
  wsp_ggml_metal_pipelines_t pipelines_ext;
37
37
 
38
- bool use_bfloat;
39
38
  bool use_fusion;
40
39
  bool use_concurrency;
41
40
  bool use_graph_optimize;
@@ -121,11 +120,10 @@ wsp_ggml_metal_t wsp_ggml_metal_init(wsp_ggml_metal_device_t dev) {
121
120
  }
122
121
  }
123
122
 
124
- const struct wsp_ggml_metal_device_props * props_dev = wsp_ggml_metal_device_get_props(dev);
123
+ //const struct wsp_ggml_metal_device_props * props_dev = wsp_ggml_metal_device_get_props(dev);
125
124
 
126
125
  res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
127
126
 
128
- res->use_bfloat = props_dev->has_bfloat;
129
127
  res->use_fusion = getenv("WSP_GGML_METAL_FUSION_DISABLE") == nil;
130
128
  res->use_concurrency = getenv("WSP_GGML_METAL_CONCURRENCY_DISABLE") == nil;
131
129
 
@@ -147,7 +145,6 @@ wsp_ggml_metal_t wsp_ggml_metal_init(wsp_ggml_metal_device_t dev) {
147
145
 
148
146
  memset(res->fuse_cnt, 0, sizeof(res->fuse_cnt));
149
147
 
150
- WSP_GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, res->use_bfloat ? "true" : "false");
151
148
  WSP_GGML_LOG_INFO("%s: use fusion = %s\n", __func__, res->use_fusion ? "true" : "false");
152
149
  WSP_GGML_LOG_INFO("%s: use concurrency = %s\n", __func__, res->use_concurrency ? "true" : "false");
153
150
  WSP_GGML_LOG_INFO("%s: use graph optimize = %s\n", __func__, res->use_graph_optimize ? "true" : "false");
@@ -292,7 +289,7 @@ void wsp_ggml_metal_set_tensor_async(wsp_ggml_metal_t ctx, struct wsp_ggml_tenso
292
289
 
293
290
  // queue the copy operation into the queue of the Metal context
294
291
  // this will be queued at the end, after any currently ongoing GPU operations
295
- id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
292
+ id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
296
293
  id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
297
294
 
298
295
  [encoder copyFromBuffer:buf_src
@@ -303,6 +300,7 @@ void wsp_ggml_metal_set_tensor_async(wsp_ggml_metal_t ctx, struct wsp_ggml_tenso
303
300
 
304
301
  [encoder endEncoding];
305
302
  [cmd_buf commit];
303
+ [buf_src release];
306
304
 
307
305
  // do not wait here for completion
308
306
  //[cmd_buf waitUntilCompleted];
@@ -333,7 +331,7 @@ void wsp_ggml_metal_get_tensor_async(wsp_ggml_metal_t ctx, const struct wsp_ggml
333
331
 
334
332
  // queue the copy operation into the queue of the Metal context
335
333
  // this will be queued at the end, after any currently ongoing GPU operations
336
- id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
334
+ id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
337
335
  id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
338
336
 
339
337
  [encoder copyFromBuffer:bid_src.metal
@@ -344,6 +342,7 @@ void wsp_ggml_metal_get_tensor_async(wsp_ggml_metal_t ctx, const struct wsp_ggml
344
342
 
345
343
  [encoder endEncoding];
346
344
  [cmd_buf commit];
345
+ [buf_dst release];
347
346
 
348
347
  // do not wait here for completion
349
348
  //[cmd_buf waitUntilCompleted];