whisper.rn 0.4.0-rc.6 → 0.4.0-rc.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/whisper.cpp CHANGED
@@ -122,9 +122,18 @@ WHISPER_ATTRIBUTE_FORMAT(2, 3)
122
122
  static void whisper_log_internal (wsp_ggml_log_level level, const char * format, ...);
123
123
  static void whisper_log_callback_default(wsp_ggml_log_level level, const char * text, void * user_data);
124
124
 
125
- #define WHISPER_LOG_INFO(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
126
- #define WHISPER_LOG_WARN(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
127
125
  #define WHISPER_LOG_ERROR(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
126
+ #define WHISPER_LOG_WARN(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_WARN , __VA_ARGS__)
127
+ #define WHISPER_LOG_INFO(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
128
+
129
+ // define this to enable verbose trace logging - useful for debugging purposes
130
+ //#define WHISPER_DEBUG
131
+
132
+ #if defined(WHISPER_DEBUG)
133
+ #define WHISPER_LOG_DEBUG(...) whisper_log_internal(WSP_GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
134
+ #else
135
+ #define WHISPER_LOG_DEBUG(...)
136
+ #endif
128
137
 
129
138
  #define WHISPER_ASSERT(x) \
130
139
  do { \
@@ -134,18 +143,6 @@ static void whisper_log_callback_default(wsp_ggml_log_level level, const char *
134
143
  } \
135
144
  } while (0)
136
145
 
137
- // define this to enable verbose trace logging - useful for debugging purposes
138
- //#define WHISPER_DEBUG
139
-
140
- #if defined(WHISPER_DEBUG)
141
- #define WHISPER_PRINT_DEBUG(...) \
142
- do { \
143
- fprintf(stderr, __VA_ARGS__); \
144
- } while (0)
145
- #else
146
- #define WHISPER_PRINT_DEBUG(...)
147
- #endif
148
-
149
146
  //#define WHISPER_USE_FLASH_ATTN
150
147
  //#define WHISPER_USE_FLASH_FF
151
148
  #define WHISPER_MAX_DECODERS 8
@@ -155,7 +152,7 @@ static void whisper_log_callback_default(wsp_ggml_log_level level, const char *
155
152
  // ggml helpers
156
153
  //
157
154
 
158
- static void wsp_ggml_graph_compute_helper(
155
+ static bool wsp_ggml_graph_compute_helper(
159
156
  struct wsp_ggml_cgraph * graph,
160
157
  std::vector<uint8_t> & buf,
161
158
  int n_threads,
@@ -171,10 +168,10 @@ static void wsp_ggml_graph_compute_helper(
171
168
  plan.work_data = buf.data();
172
169
  }
173
170
 
174
- wsp_ggml_graph_compute(graph, &plan);
171
+ return wsp_ggml_graph_compute(graph, &plan);
175
172
  }
176
173
 
177
- static void wsp_ggml_graph_compute_helper(
174
+ static bool wsp_ggml_graph_compute_helper(
178
175
  struct wsp_ggml_backend * backend,
179
176
  struct wsp_ggml_cgraph * graph,
180
177
  int n_threads) {
@@ -186,7 +183,7 @@ static void wsp_ggml_graph_compute_helper(
186
183
  wsp_ggml_backend_metal_set_n_cb(backend, n_threads);
187
184
  }
188
185
  #endif
189
- wsp_ggml_backend_graph_compute(backend, graph);
186
+ return wsp_ggml_backend_graph_compute(backend, graph);
190
187
  }
191
188
 
192
189
  // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
@@ -487,8 +484,8 @@ static size_t whisper_allocr_size(struct whisper_allocr & allocr) {
487
484
 
488
485
  // measure the memory usage of a graph and prepare the allocr's internal data buffer
489
486
  static void whisper_allocr_graph_init(struct whisper_allocr & allocr, wsp_ggml_backend_t backend, std::function<struct wsp_ggml_cgraph *()> && get_graph) {
490
- auto & alloc = allocr.alloc;
491
- auto & meta = allocr.meta;
487
+ auto & alloc = allocr.alloc;
488
+ auto & meta = allocr.meta;
492
489
 
493
490
  alloc = wsp_ggml_allocr_new_measure_from_backend(backend);
494
491
 
@@ -704,7 +701,7 @@ struct whisper_model {
704
701
  struct wsp_ggml_context * ctx;
705
702
 
706
703
  // the model backend data is read-only and can be shared between processors
707
- struct wsp_ggml_backend_buffer * buffer;
704
+ std::vector<struct wsp_ggml_backend_buffer *> buffers;
708
705
 
709
706
  // tensors
710
707
  int n_loaded;
@@ -1073,7 +1070,7 @@ static wsp_ggml_backend_t whisper_backend_init(const whisper_context_params & pa
1073
1070
  #ifdef WSP_GGML_USE_METAL
1074
1071
  if (params.use_gpu) {
1075
1072
  WHISPER_LOG_INFO("%s: using Metal backend\n", __func__);
1076
- wsp_ggml_metal_log_set_callback(whisper_log_callback_default, nullptr);
1073
+ wsp_ggml_backend_metal_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
1077
1074
  backend_gpu = wsp_ggml_backend_metal_init();
1078
1075
  if (!backend_gpu) {
1079
1076
  WHISPER_LOG_ERROR("%s: wsp_ggml_backend_metal_init() failed\n", __func__);
@@ -1517,24 +1514,64 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1517
1514
 
1518
1515
  wctx.backend = whisper_backend_init(wctx.params);
1519
1516
 
1517
+ // some devices have a limit on the maximum size of single memory buffer
1518
+ // for example, iPhones are limited to 1GB per buffer
1519
+ // to workaround this, we will allocate multiple buffers of smaller size and will split the tensors with the
1520
+ // model weights between them
1521
+ //
1522
+ // the map_t2b maps tensor names to buffer indices
1523
+ // as we iterate over the tensors, we will allocate new buffers when the current one is full
1524
+ //
1525
+ // finally, we create a separate allocator for each buffer and use it to allocate the tensors
1526
+ // we keep the allocators alive until all the tensors are loaded
1527
+
1528
+ WSP_GGML_ASSERT(model.buffers.empty());
1529
+
1530
+ std::map<std::string, int> map_t2b;
1531
+
1520
1532
  {
1521
1533
  size_t size_main = 0;
1534
+ size_t size_cur = 0;
1535
+
1536
+ static const size_t GB = 1024ull*1024ull*1024ull;
1522
1537
 
1523
1538
  for (const auto & t : model.tensors) {
1524
- size_main += wsp_ggml_nbytes(t.second) + wsp_ggml_tensor_overhead();
1539
+ const size_t cur = wsp_ggml_nbytes(t.second) + wsp_ggml_tensor_overhead();
1540
+
1541
+ // adding the tensor to the current buffer will exceed the limit, so we need to allocate a new buffer
1542
+ if (size_cur + cur > GB) {
1543
+ WSP_GGML_ASSERT(size_cur > 0 && "A tensor is too large to fit in a single buffer");
1544
+
1545
+ model.buffers.emplace_back(wsp_ggml_backend_alloc_buffer(wctx.backend, size_cur));
1546
+
1547
+ size_cur = cur;
1548
+ }
1549
+
1550
+ map_t2b[t.first] = model.buffers.size();
1551
+
1552
+ size_cur += cur;
1553
+ size_main += cur;
1525
1554
  }
1526
1555
 
1527
- model.buffer = wsp_ggml_backend_alloc_buffer(wctx.backend, size_main);
1556
+ // allocate the last buffer if needed
1557
+ if (size_cur > 0) {
1558
+ model.buffers.emplace_back(wsp_ggml_backend_alloc_buffer(wctx.backend, size_cur));
1559
+ }
1528
1560
 
1529
- WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, wsp_ggml_backend_name(wctx.backend), size_main / 1e6);
1561
+ WSP_GGML_ASSERT(model.buffers.size() > 0);
1562
+
1563
+ WHISPER_LOG_INFO("%s: %8s total size = %8.2f MB (%d buffers)\n", __func__, wsp_ggml_backend_name(wctx.backend), size_main / 1e6, (int) model.buffers.size());
1530
1564
  }
1531
1565
 
1532
- wsp_ggml_allocr * alloc = wsp_ggml_allocr_new_from_buffer(model.buffer);
1566
+ std::vector<wsp_ggml_allocr *> allocs(model.buffers.size());
1567
+ for (size_t i = 0; i < allocs.size(); ++i) {
1568
+ allocs[i] = wsp_ggml_allocr_new_from_buffer(model.buffers[i]);
1569
+ }
1533
1570
 
1534
1571
  // allocate tensors in the backend buffers
1535
1572
  {
1536
1573
  for (const auto & t : model.tensors) {
1537
- wsp_ggml_allocr_alloc(alloc, t.second);
1574
+ wsp_ggml_allocr_alloc(allocs[map_t2b[t.first]], t.second);
1538
1575
  }
1539
1576
  }
1540
1577
 
@@ -1635,7 +1672,9 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1635
1672
  }
1636
1673
  }
1637
1674
 
1638
- wsp_ggml_allocr_free(alloc);
1675
+ for (auto & alloc : allocs) {
1676
+ wsp_ggml_allocr_free(alloc);
1677
+ }
1639
1678
 
1640
1679
  wctx.t_load_us = wsp_ggml_time_us() - t_start_us;
1641
1680
 
@@ -1777,7 +1816,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_encoder(
1777
1816
 
1778
1817
  wsp_ggml_cgraph * gf = wsp_ggml_new_graph_custom(ctx0, WHISPER_MAX_NODES, false);
1779
1818
 
1780
- wsp_ggml_allocr * alloc = wstate.alloc_encode.alloc;
1819
+ //wsp_ggml_allocr * alloc = wstate.alloc_encode.alloc;
1781
1820
 
1782
1821
  //struct wsp_ggml_tensor * cur = wsp_ggml_new_tensor_2d(ctx0, WSP_GGML_TYPE_F32, n_ctx, n_state);
1783
1822
  //wsp_ggml_allocr_alloc(alloc, cur);
@@ -1787,13 +1826,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_encoder(
1787
1826
  //}
1788
1827
  struct wsp_ggml_tensor * cur = wsp_ggml_view_tensor(ctx0, wstate.embd_conv);
1789
1828
 
1790
- struct wsp_ggml_tensor * KQscale = wsp_ggml_new_tensor_1d(ctx0, WSP_GGML_TYPE_F32, 1);
1791
- wsp_ggml_allocr_alloc(alloc, KQscale);
1792
-
1793
- if (!wsp_ggml_allocr_is_measure(alloc)) {
1794
- const float val = 1.0f/sqrtf(float(n_state)/n_head);
1795
- wsp_ggml_backend_tensor_set(KQscale, &val, 0, sizeof(float));
1796
- }
1829
+ const float KQscale = 1.0f/sqrtf(float(n_state)/n_head);
1797
1830
 
1798
1831
  // ===================================================================
1799
1832
  // NOTE: experimenting with partial evaluation of the encoder (ignore)
@@ -1843,14 +1876,14 @@ static struct wsp_ggml_cgraph * whisper_build_graph_encoder(
1843
1876
 
1844
1877
  Qcur = wsp_ggml_add(ctx0, Qcur, layer.attn_q_b);
1845
1878
 
1846
- //Qcur = wsp_ggml_scale(ctx0, Qcur, wsp_ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1879
+ //Qcur = wsp_ggml_scale(ctx0, Qcur, pow(float(n_state)/n_head, -0.25));
1847
1880
 
1848
1881
  // note: no bias for Key
1849
1882
  struct wsp_ggml_tensor * Kcur = wsp_ggml_mul_mat(ctx0,
1850
1883
  layer.attn_k_w,
1851
1884
  cur);
1852
1885
 
1853
- //Kcur = wsp_ggml_scale(ctx0, Kcur, wsp_ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1886
+ //Kcur = wsp_ggml_scale(ctx0, Kcur, pow(float(n_state)/n_head, -0.25));
1854
1887
 
1855
1888
  struct wsp_ggml_tensor * Vcur = wsp_ggml_mul_mat(ctx0,
1856
1889
  layer.attn_v_w,
@@ -2032,7 +2065,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_cross(
2032
2065
 
2033
2066
  wsp_ggml_cgraph * gf = wsp_ggml_new_graph(ctx0);
2034
2067
 
2035
- wsp_ggml_allocr * alloc = wstate.alloc_cross.alloc;
2068
+ //wsp_ggml_allocr * alloc = wstate.alloc_cross.alloc;
2036
2069
 
2037
2070
  //struct wsp_ggml_tensor * cur = wsp_ggml_new_tensor_2d(ctx0, WSP_GGML_TYPE_F32, n_state, n_ctx);
2038
2071
  //wsp_ggml_allocr_alloc(alloc, cur);
@@ -2042,13 +2075,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_cross(
2042
2075
  //}
2043
2076
  struct wsp_ggml_tensor * cur = wsp_ggml_view_tensor(ctx0, wstate.embd_enc);
2044
2077
 
2045
- struct wsp_ggml_tensor * Kscale = wsp_ggml_new_tensor_1d(ctx0, WSP_GGML_TYPE_F32, 1);
2046
- wsp_ggml_allocr_alloc(alloc, Kscale);
2047
-
2048
- if (!wsp_ggml_allocr_is_measure(alloc)) {
2049
- const float val = pow(float(n_state) / n_head, -0.25);
2050
- wsp_ggml_backend_tensor_set(Kscale, &val, 0, sizeof(float));
2051
- }
2078
+ const float Kscale = pow(float(n_state) / n_head, -0.25);
2052
2079
 
2053
2080
  for (int il = 0; il < model.hparams.n_text_layer; ++il) {
2054
2081
  auto & layer = model.layers_decoder[il];
@@ -2118,7 +2145,9 @@ static bool whisper_encode_internal(
2118
2145
  wsp_ggml_allocr_alloc_graph(alloc, gf);
2119
2146
 
2120
2147
  if (!whisper_encode_external(wstate)) {
2121
- wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2148
+ if (!wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2149
+ return false;
2150
+ }
2122
2151
  }
2123
2152
  }
2124
2153
 
@@ -2132,7 +2161,9 @@ static bool whisper_encode_internal(
2132
2161
 
2133
2162
  wsp_ggml_allocr_alloc_graph(alloc, gf);
2134
2163
 
2135
- wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2164
+ if (!wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2165
+ return false;
2166
+ }
2136
2167
  }
2137
2168
 
2138
2169
  // cross
@@ -2145,7 +2176,9 @@ static bool whisper_encode_internal(
2145
2176
 
2146
2177
  wsp_ggml_allocr_alloc_graph(alloc, gf);
2147
2178
 
2148
- wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2179
+ if (!wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2180
+ return false;
2181
+ }
2149
2182
  }
2150
2183
 
2151
2184
  wstate.t_encode_us += wsp_ggml_time_us() - t_start_us;
@@ -2178,7 +2211,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_decoder(
2178
2211
  const int32_t n_kv = wsp_ggml_allocr_is_measure(alloc) ? n_ctx : kv_self.n;
2179
2212
  const int32_t kv_head = wsp_ggml_allocr_is_measure(alloc) ? n_ctx - n_tokens : kv_self.head;
2180
2213
 
2181
- //WHISPER_PRINT_DEBUG("%s: n_past = %d, n_tokens = %d, n_audio_ctx = %d, n_ctx = %d\n", __func__, n_past, n_tokens, n_audio_ctx, n_ctx);
2214
+ //WHISPER_LOG_DEBUG("%s: n_past = %d, n_tokens = %d, n_audio_ctx = %d, n_ctx = %d\n", __func__, n_past, n_tokens, n_audio_ctx, n_ctx);
2182
2215
 
2183
2216
  struct wsp_ggml_init_params params = {
2184
2217
  /*.mem_size =*/ wstate.alloc_decode.meta.size(),
@@ -2207,13 +2240,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_decoder(
2207
2240
  }
2208
2241
  }
2209
2242
 
2210
- struct wsp_ggml_tensor * KQscale = wsp_ggml_new_tensor_1d(ctx0, WSP_GGML_TYPE_F32, 1);
2211
- wsp_ggml_allocr_alloc(alloc, KQscale);
2212
-
2213
- if (!wsp_ggml_allocr_is_measure(alloc)) {
2214
- const float val = pow(float(n_state)/n_head, -0.25);
2215
- wsp_ggml_backend_tensor_set(KQscale, &val, 0, sizeof(float));
2216
- }
2243
+ const float KQscale = pow(float(n_state)/n_head, -0.25);
2217
2244
 
2218
2245
  struct wsp_ggml_tensor * KQ_mask = wsp_ggml_new_tensor_3d(ctx0, WSP_GGML_TYPE_F32, n_kv, n_tokens, 1);
2219
2246
  wsp_ggml_allocr_alloc(alloc, KQ_mask);
@@ -2573,7 +2600,9 @@ static bool whisper_decode_internal(
2573
2600
 
2574
2601
  logits = gf->nodes[gf->n_nodes - 1];
2575
2602
 
2576
- wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads);
2603
+ if (!wsp_ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2604
+ return false;
2605
+ }
2577
2606
  }
2578
2607
 
2579
2608
  logits_out.resize(n_tokens*n_vocab);
@@ -3393,8 +3422,10 @@ void whisper_free(struct whisper_context * ctx) {
3393
3422
  wsp_ggml_free(ctx->model.ctx);
3394
3423
  }
3395
3424
 
3396
- if (ctx->model.buffer) {
3397
- wsp_ggml_backend_buffer_free(ctx->model.buffer);
3425
+ for (auto & buffer : ctx->model.buffers) {
3426
+ if (buffer) {
3427
+ wsp_ggml_backend_buffer_free(buffer);
3428
+ }
3398
3429
  }
3399
3430
 
3400
3431
  whisper_free_state(ctx->state);
@@ -3838,6 +3869,7 @@ void whisper_reset_timings(struct whisper_context * ctx) {
3838
3869
  ctx->state->t_sample_us = 0;
3839
3870
  ctx->state->t_encode_us = 0;
3840
3871
  ctx->state->t_decode_us = 0;
3872
+ ctx->state->t_batchd_us = 0;
3841
3873
  ctx->state->t_prompt_us = 0;
3842
3874
  ctx->state->n_sample = 0;
3843
3875
  ctx->state->n_encode = 0;
@@ -4966,7 +4998,7 @@ static void whisper_sequence_score(
4966
4998
  const auto p = kv.second/(double)cnt;
4967
4999
  entropy -= p*log(p);
4968
5000
 
4969
- //WHISPER_PRINT_DEBUG("entropy: %d %f %f, count %d\n", kv.first, p, log(p), kv.second);
5001
+ //WHISPER_LOG_DEBUG("entropy: %d %f %f, count %d\n", kv.first, p, log(p), kv.second);
4970
5002
  }
4971
5003
 
4972
5004
  sequence.entropy = entropy;
@@ -5032,7 +5064,7 @@ int whisper_full_with_state(
5032
5064
  // basically don't process anything that is less than 1.0s
5033
5065
  // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
5034
5066
  if (seek_end < seek_start + (params.speed_up ? 50 : 100)) {
5035
- WHISPER_PRINT_DEBUG("%s: input is too short - %d ms < 1000 ms\n", __func__, (seek_end - seek_start)*10);
5067
+ WHISPER_LOG_DEBUG("%s: input is too short - %d ms < 1000 ms\n", __func__, (seek_end - seek_start)*10);
5036
5068
  return 0;
5037
5069
  }
5038
5070
 
@@ -5221,7 +5253,7 @@ int whisper_full_with_state(
5221
5253
 
5222
5254
  n_decoders_cur = std::max(1, n_decoders_cur);
5223
5255
 
5224
- WHISPER_PRINT_DEBUG("\n%s: strategy = %d, decoding with %d decoders, temperature = %.2f\n", __func__, params.strategy, n_decoders_cur, t_cur);
5256
+ WHISPER_LOG_DEBUG("\n%s: strategy = %d, decoding with %d decoders, temperature = %.2f\n", __func__, params.strategy, n_decoders_cur, t_cur);
5225
5257
 
5226
5258
  // TAGS: WHISPER_DECODER_INIT
5227
5259
  for (int j = 0; j < n_decoders_cur; ++j) {
@@ -5265,11 +5297,11 @@ int whisper_full_with_state(
5265
5297
  prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());
5266
5298
 
5267
5299
  // print the prompt
5268
- WHISPER_PRINT_DEBUG("\n\n");
5300
+ WHISPER_LOG_DEBUG("\n\n");
5269
5301
  for (int i = 0; i < (int) prompt.size(); i++) {
5270
- WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
5302
+ WHISPER_LOG_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
5271
5303
  }
5272
- WHISPER_PRINT_DEBUG("\n\n");
5304
+ WHISPER_LOG_DEBUG("\n\n");
5273
5305
 
5274
5306
  whisper_kv_cache_clear(state->kv_self);
5275
5307
 
@@ -5417,7 +5449,7 @@ int whisper_full_with_state(
5417
5449
 
5418
5450
  whisper_kv_cache_seq_cp(state->kv_self, cur.decoder_idx, WHISPER_MAX_DECODERS + j, -1, -1);
5419
5451
 
5420
- WHISPER_PRINT_DEBUG("%s: beam search: decoder %d: from decoder %d: token = %10s, plog = %8.5f, sum_logprobs = %8.5f\n",
5452
+ WHISPER_LOG_DEBUG("%s: beam search: decoder %d: from decoder %d: token = %10s, plog = %8.5f, sum_logprobs = %8.5f\n",
5421
5453
  __func__, j, cur.decoder_idx, ctx->vocab.id_to_token.at(decoder.sequence.tokens.back().id).c_str(), decoder.sequence.tokens.back().plog, decoder.sequence.sum_logprobs_all);
5422
5454
  }
5423
5455
 
@@ -5460,7 +5492,7 @@ int whisper_full_with_state(
5460
5492
 
5461
5493
  // do not allow to go back in time
5462
5494
  if (has_ts && seek_delta > seek_delta_new && result_len < i) {
5463
- WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to seek_delta (%d > %d)\n", __func__, j, seek_delta, seek_delta_new);
5495
+ WHISPER_LOG_DEBUG("%s: decoder %d: failed due to seek_delta (%d > %d)\n", __func__, j, seek_delta, seek_delta_new);
5464
5496
  failed = true; // TODO: maybe this is not a failure ?
5465
5497
  continue;
5466
5498
  }
@@ -5475,7 +5507,7 @@ int whisper_full_with_state(
5475
5507
  #ifdef WHISPER_DEBUG
5476
5508
  {
5477
5509
  const auto tt = token.pt > 0.10 ? ctx->vocab.id_to_token.at(token.tid) : "[?]";
5478
- WHISPER_PRINT_DEBUG("%s: id = %3d, decoder = %d, token = %6d, p = %6.3f, ts = %10s, %6.3f, result_len = %4d '%s'\n",
5510
+ WHISPER_LOG_DEBUG("%s: id = %3d, decoder = %d, token = %6d, p = %6.3f, ts = %10s, %6.3f, result_len = %4d '%s'\n",
5479
5511
  __func__, i, j, token.id, token.p, tt.c_str(), token.pt, result_len, ctx->vocab.id_to_token.at(token.id).c_str());
5480
5512
  }
5481
5513
  #endif
@@ -5485,22 +5517,22 @@ int whisper_full_with_state(
5485
5517
  (params.max_tokens > 0 && i >= params.max_tokens) || // max tokens per segment reached
5486
5518
  (has_ts && seek + seek_delta + 100 >= seek_end) // end of audio reached
5487
5519
  ) {
5488
- if (result_len == 0) {
5520
+ if (result_len == 0 && !params.no_timestamps) {
5489
5521
  if (seek + seek_delta + 100 >= seek_end) {
5490
5522
  result_len = i + 1;
5491
5523
  } else {
5492
- WHISPER_PRINT_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);
5524
+ WHISPER_LOG_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);
5493
5525
  failed = true;
5494
5526
  continue;
5495
5527
  }
5496
5528
  }
5497
5529
 
5498
- if (params.single_segment) {
5530
+ if (params.single_segment || params.no_timestamps) {
5499
5531
  result_len = i + 1;
5500
5532
  seek_delta = 100*WHISPER_CHUNK_SIZE;
5501
5533
  }
5502
5534
 
5503
- WHISPER_PRINT_DEBUG("%s: decoder %d completed\n", __func__, j);
5535
+ WHISPER_LOG_DEBUG("%s: decoder %d completed\n", __func__, j);
5504
5536
  completed = true;
5505
5537
  continue;
5506
5538
  }
@@ -5516,7 +5548,7 @@ int whisper_full_with_state(
5516
5548
  // sometimes, the decoding can get stuck in a repetition loop
5517
5549
  // this is an attempt to mitigate such cases - we flag the decoding as failed and use a fallback strategy
5518
5550
  if (i == n_max - 1 && (result_len == 0 || seek_delta < 100*WHISPER_CHUNK_SIZE/2)) {
5519
- WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to repetition loop\n", __func__, j);
5551
+ WHISPER_LOG_DEBUG("%s: decoder %d: failed due to repetition loop\n", __func__, j);
5520
5552
  failed = true;
5521
5553
  continue;
5522
5554
  }
@@ -5558,7 +5590,7 @@ int whisper_full_with_state(
5558
5590
  continue;
5559
5591
  }
5560
5592
 
5561
- //WHISPER_PRINT_DEBUG("%s: decoder %d: token %d, seek_delta %d\n", __func__, j, decoder.sequence.tokens.back().id, decoder.seek_delta);
5593
+ //WHISPER_LOG_DEBUG("%s: decoder %d: token %d, seek_delta %d\n", __func__, j, decoder.sequence.tokens.back().id, decoder.seek_delta);
5562
5594
 
5563
5595
  decoder.i_batch = batch.n_tokens;
5564
5596
 
@@ -5638,11 +5670,11 @@ int whisper_full_with_state(
5638
5670
  decoder.sequence.tokens.resize(decoder.sequence.result_len);
5639
5671
  whisper_sequence_score(params, decoder.sequence);
5640
5672
 
5641
- WHISPER_PRINT_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
5673
+ WHISPER_LOG_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
5642
5674
  __func__, j, decoder.sequence.score, decoder.sequence.result_len, decoder.sequence.avg_logprobs, decoder.sequence.entropy);
5643
5675
 
5644
5676
  if (decoder.sequence.result_len > 32 && decoder.sequence.entropy < params.entropy_thold) {
5645
- WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
5677
+ WHISPER_LOG_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
5646
5678
  __func__, j, decoder.sequence.entropy, params.entropy_thold);
5647
5679
 
5648
5680
  decoder.failed = true;
@@ -5657,7 +5689,7 @@ int whisper_full_with_state(
5657
5689
  }
5658
5690
  }
5659
5691
 
5660
- WHISPER_PRINT_DEBUG("%s: best decoder = %d\n", __func__, best_decoder_id);
5692
+ WHISPER_LOG_DEBUG("%s: best decoder = %d\n", __func__, best_decoder_id);
5661
5693
  }
5662
5694
 
5663
5695
  bool success = true;
@@ -5669,7 +5701,7 @@ int whisper_full_with_state(
5669
5701
  const auto & decoder = state->decoders[best_decoder_id];
5670
5702
 
5671
5703
  if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
5672
- WHISPER_PRINT_DEBUG("%s: failed due to avg_logprobs %8.5f < %8.5f\n", __func__, decoder.sequence.avg_logprobs, params.logprob_thold);
5704
+ WHISPER_LOG_DEBUG("%s: failed due to avg_logprobs %8.5f < %8.5f\n", __func__, decoder.sequence.avg_logprobs, params.logprob_thold);
5673
5705
  success = false;
5674
5706
  state->n_fail_p++;
5675
5707
  }
@@ -5677,13 +5709,13 @@ int whisper_full_with_state(
5677
5709
 
5678
5710
  if (success) {
5679
5711
  //for (auto & token : ctx->decoders[best_decoder_id].sequence.tokens) {
5680
- // WHISPER_PRINT_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
5712
+ // WHISPER_LOG_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
5681
5713
  //}
5682
5714
 
5683
5715
  break;
5684
5716
  }
5685
5717
 
5686
- WHISPER_PRINT_DEBUG("\n%s: failed to decode with temperature = %.2f\n", __func__, t_cur);
5718
+ WHISPER_LOG_DEBUG("\n%s: failed to decode with temperature = %.2f\n", __func__, t_cur);
5687
5719
  }
5688
5720
 
5689
5721
  // output results through a user-provided callback
@@ -5695,7 +5727,7 @@ int whisper_full_with_state(
5695
5727
 
5696
5728
  const auto & tokens_cur = best_decoder.sequence.tokens;
5697
5729
 
5698
- //WHISPER_PRINT_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
5730
+ //WHISPER_LOG_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
5699
5731
 
5700
5732
  // update prompt_past
5701
5733
  prompt_past.clear();
@@ -5815,7 +5847,7 @@ int whisper_full_with_state(
5815
5847
  // update audio window
5816
5848
  seek += seek_delta;
5817
5849
 
5818
- WHISPER_PRINT_DEBUG("seek = %d, seek_delta = %d\n", seek, seek_delta);
5850
+ WHISPER_LOG_DEBUG("seek = %d, seek_delta = %d\n", seek, seek_delta);
5819
5851
  }
5820
5852
  }
5821
5853
 
@@ -6132,7 +6164,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
6132
6164
 
6133
6165
  // multi-thread
6134
6166
 
6135
- for (uint32_t k = 1; k <= n_threads; k++) {
6167
+ for (int32_t k = 1; k <= n_threads; k++) {
6136
6168
  char * src = (char *) malloc(size);
6137
6169
  char * dst = (char *) malloc(size);
6138
6170
 
@@ -6156,13 +6188,13 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
6156
6188
  const int64_t t0 = wsp_ggml_time_us();
6157
6189
 
6158
6190
  std::vector<std::thread> threads(k - 1);
6159
- for (uint32_t th = 0; th < k - 1; ++th) {
6191
+ for (int32_t th = 0; th < k - 1; ++th) {
6160
6192
  threads[th] = std::thread(helper, th);
6161
6193
  }
6162
6194
 
6163
6195
  helper(k - 1);
6164
6196
 
6165
- for (uint32_t th = 0; th < k - 1; ++th) {
6197
+ for (int32_t th = 0; th < k - 1; ++th) {
6166
6198
  threads[th].join();
6167
6199
  }
6168
6200
 
@@ -116,6 +116,7 @@
116
116
  self->recordState.transcribeSliceIndex = 0;
117
117
  self->recordState.nSamplesTranscribing = 0;
118
118
 
119
+ self->recordState.sliceNSamples.clear();
119
120
  self->recordState.sliceNSamples.push_back(0);
120
121
 
121
122
  self->recordState.job = rnwhisper::job_new(jobId, [self createParams:options jobId:jobId]);
@@ -202,7 +203,7 @@ void AudioInputCallback(void * inUserData,
202
203
  state->sliceNSamples.push_back(0);
203
204
  }
204
205
 
205
- NSLog(@"[RNWhisper] Slice %d has %d samples", state->sliceIndex, nSamples);
206
+ NSLog(@"[RNWhisper] Slice %d has %d samples, put %d samples", state->sliceIndex, nSamples, n);
206
207
 
207
208
  state->job->put_pcm_data((short*) inBuffer->mAudioData, state->sliceIndex, nSamples, n);
208
209
 
@@ -413,7 +414,8 @@ struct rnwhisper_segments_callback_data {
413
414
  params.new_segment_callback_user_data = &user_data;
414
415
  }
415
416
 
416
- rnwhisper::job* job = rnwhisper::job_new(jobId, params);;
417
+ rnwhisper::job* job = rnwhisper::job_new(jobId, params);
418
+ self->recordState.job = job;
417
419
  int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
418
420
  rnwhisper::job_remove(jobId);
419
421
  self->recordState.isTranscribing = false;
@@ -1 +1 @@
1
- {"version":"1.5.2"}
1
+ {"version":"1.5.4"}
@@ -1 +1 @@
1
- {"version":"1.5.2"}
1
+ {"version":"1.5.4"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper.rn",
3
- "version": "0.4.0-rc.6",
3
+ "version": "0.4.0-rc.8",
4
4
  "description": "React Native binding of whisper.cpp",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
package/src/version.json CHANGED
@@ -1 +1 @@
1
- {"version":"1.5.2"}
1
+ {"version":"1.5.4"}