whisper.rn 0.4.0-rc.8 → 0.4.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/android/src/main/CMakeLists.txt +2 -1
  2. package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -12
  3. package/android/src/main/java/com/rnwhisper/RNWhisper.java +75 -34
  4. package/android/src/main/java/com/rnwhisper/WhisperContext.java +20 -3
  5. package/android/src/main/jni.cpp +29 -1
  6. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  7. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  8. package/cpp/ggml-aarch64.c +3209 -0
  9. package/cpp/ggml-aarch64.h +39 -0
  10. package/cpp/ggml-alloc.c +725 -517
  11. package/cpp/ggml-alloc.h +47 -65
  12. package/cpp/ggml-backend-impl.h +166 -55
  13. package/cpp/ggml-backend.cpp +2635 -0
  14. package/cpp/ggml-backend.h +202 -85
  15. package/cpp/ggml-common.h +1853 -0
  16. package/cpp/ggml-cpu-impl.h +614 -0
  17. package/cpp/ggml-impl.h +143 -180
  18. package/cpp/ggml-metal.h +13 -11
  19. package/cpp/ggml-metal.m +2955 -1632
  20. package/cpp/ggml-quants.c +9824 -3263
  21. package/cpp/ggml-quants.h +133 -248
  22. package/cpp/ggml-whisper.metallib +0 -0
  23. package/cpp/ggml.c +8482 -5142
  24. package/cpp/ggml.h +633 -349
  25. package/cpp/rn-whisper.cpp +91 -0
  26. package/cpp/rn-whisper.h +2 -0
  27. package/cpp/whisper.cpp +1427 -658
  28. package/cpp/whisper.h +84 -28
  29. package/ios/RNWhisper.mm +124 -37
  30. package/ios/RNWhisperAudioUtils.h +1 -0
  31. package/ios/RNWhisperAudioUtils.m +20 -13
  32. package/ios/RNWhisperContext.h +3 -2
  33. package/ios/RNWhisperContext.mm +39 -7
  34. package/jest/mock.js +9 -1
  35. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  36. package/lib/commonjs/index.js +48 -19
  37. package/lib/commonjs/index.js.map +1 -1
  38. package/lib/commonjs/version.json +1 -1
  39. package/lib/module/NativeRNWhisper.js.map +1 -1
  40. package/lib/module/index.js +48 -19
  41. package/lib/module/index.js.map +1 -1
  42. package/lib/module/version.json +1 -1
  43. package/lib/typescript/NativeRNWhisper.d.ts +6 -3
  44. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  45. package/lib/typescript/index.d.ts +25 -3
  46. package/lib/typescript/index.d.ts.map +1 -1
  47. package/package.json +6 -5
  48. package/src/NativeRNWhisper.ts +12 -3
  49. package/src/index.ts +63 -24
  50. package/src/version.json +1 -1
  51. package/whisper-rn.podspec +9 -2
  52. package/cpp/ggml-backend.c +0 -1718
  53. package/cpp/ggml-metal-whisper.metal +0 -5820
package/src/index.ts CHANGED
@@ -174,6 +174,15 @@ export type TranscribeRealtimeNativeEvent = {
174
174
  payload: TranscribeRealtimeNativePayload
175
175
  }
176
176
 
177
+ export type BenchResult = {
178
+ config: string
179
+ nThreads: number
180
+ encodeMs: number
181
+ decodeMs: number
182
+ batchMs: number
183
+ promptMs: number
184
+ }
185
+
177
186
  const updateAudioSession = async (setting: AudioSessionSettingIos) => {
178
187
  await AudioSessionIos.setCategory(
179
188
  setting.category,
@@ -202,32 +211,10 @@ export class WhisperContext {
202
211
  this.reasonNoGPU = reasonNoGPU
203
212
  }
204
213
 
205
- /** Transcribe audio file */
206
- transcribe(
207
- filePath: string | number,
208
- options: TranscribeFileOptions = {},
209
- ): {
210
- /** Stop the transcribe */
214
+ private transcribeWithNativeMethod(method: 'transcribeFile' | 'transcribeData', data: string, options: TranscribeFileOptions = {}): {
211
215
  stop: () => Promise<void>
212
- /** Transcribe result promise */
213
216
  promise: Promise<TranscribeResult>
214
217
  } {
215
- let path = ''
216
- if (typeof filePath === 'number') {
217
- try {
218
- const source = Image.resolveAssetSource(filePath)
219
- if (source) path = source.uri
220
- } catch (e) {
221
- throw new Error(`Invalid asset: ${filePath}`)
222
- }
223
- } else {
224
- if (filePath.startsWith('http'))
225
- throw new Error(
226
- 'Transcribe remote file is not supported, please download it first',
227
- )
228
- path = filePath
229
- }
230
- if (path.startsWith('file://')) path = path.slice(7)
231
218
  const jobId: number = Math.floor(Math.random() * 10000)
232
219
 
233
220
  const { onProgress, onNewSegments, ...rest } = options
@@ -276,7 +263,7 @@ export class WhisperContext {
276
263
  removeProgressListener()
277
264
  removeNewSegmenetsListener()
278
265
  },
279
- promise: RNWhisper.transcribeFile(this.id, jobId, path, {
266
+ promise: RNWhisper[method](this.id, jobId, data, {
280
267
  ...rest,
281
268
  onProgress: !!onProgress,
282
269
  onNewSegments: !!onNewSegments,
@@ -298,6 +285,48 @@ export class WhisperContext {
298
285
  }
299
286
  }
300
287
 
288
+ /**
289
+ * Transcribe audio file (path or base64 encoded wav file)
290
+ * base64: need add `data:audio/wav;base64,` prefix
291
+ */
292
+ transcribe(
293
+ filePathOrBase64: string | number,
294
+ options: TranscribeFileOptions = {},
295
+ ): {
296
+ /** Stop the transcribe */
297
+ stop: () => Promise<void>
298
+ /** Transcribe result promise */
299
+ promise: Promise<TranscribeResult>
300
+ } {
301
+ let path = ''
302
+ if (typeof filePathOrBase64 === 'number') {
303
+ try {
304
+ const source = Image.resolveAssetSource(filePathOrBase64)
305
+ if (source) path = source.uri
306
+ } catch (e) {
307
+ throw new Error(`Invalid asset: ${filePathOrBase64}`)
308
+ }
309
+ } else {
310
+ if (filePathOrBase64.startsWith('http'))
311
+ throw new Error(
312
+ 'Transcribe remote file is not supported, please download it first',
313
+ )
314
+ path = filePathOrBase64
315
+ }
316
+ if (path.startsWith('file://')) path = path.slice(7)
317
+ return this.transcribeWithNativeMethod('transcribeFile', path, options)
318
+ }
319
+
320
+ /**
321
+ * Transcribe audio data (base64 encoded float32 PCM data)
322
+ */
323
+ transcribeData(data: string, options: TranscribeFileOptions = {}): {
324
+ stop: () => Promise<void>
325
+ promise: Promise<TranscribeResult>
326
+ } {
327
+ return this.transcribeWithNativeMethod('transcribeData', data, options)
328
+ }
329
+
301
330
  /** Transcribe the microphone audio stream, the microphone user permission is required */
302
331
  async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
303
332
  /** Stop the realtime transcribe */
@@ -433,6 +462,12 @@ export class WhisperContext {
433
462
  }
434
463
  }
435
464
 
465
+ async bench(maxThreads: number): Promise<BenchResult> {
466
+ const result = await RNWhisper.bench(this.id, maxThreads)
467
+ const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result)
468
+ return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs } as BenchResult
469
+ }
470
+
436
471
  async release(): Promise<void> {
437
472
  return RNWhisper.releaseContext(this.id)
438
473
  }
@@ -455,6 +490,8 @@ export type ContextOptions = {
455
490
  useCoreMLIos?: boolean
456
491
  /** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */
457
492
  useGpu?: boolean
493
+ /** Use Flash Attention, only recommended if GPU available */
494
+ useFlashAttn?: boolean,
458
495
  }
459
496
 
460
497
  const coreMLModelAssetPaths = [
@@ -470,6 +507,7 @@ export async function initWhisper({
470
507
  isBundleAsset,
471
508
  useGpu = true,
472
509
  useCoreMLIos = true,
510
+ useFlashAttn = false,
473
511
  }: ContextOptions): Promise<WhisperContext> {
474
512
  let path = ''
475
513
  let coreMLAssets: CoreMLAsset[] | undefined
@@ -518,6 +556,7 @@ export async function initWhisper({
518
556
  const { contextId, gpu, reasonNoGPU } = await RNWhisper.initContext({
519
557
  filePath: path,
520
558
  isBundleAsset: !!isBundleAsset,
559
+ useFlashAttn,
521
560
  useGpu,
522
561
  useCoreMLIos,
523
562
  // Only development mode need download Core ML model assets (from packager server)
package/src/version.json CHANGED
@@ -1 +1 @@
1
- {"version":"1.5.4"}
1
+ {"version":"1.7.1"}
@@ -31,8 +31,10 @@ Pod::Spec.new do |s|
31
31
  s.platforms = { :ios => "11.0", :tvos => "11.0" }
32
32
  s.source = { :git => "https://github.com/mybigday/whisper.rn.git", :tag => "#{s.version}" }
33
33
 
34
- s.source_files = "ios/**/*.{h,m,mm}", "cpp/**/*.{h,cpp,c,m,mm}"
35
- s.resources = "cpp/**/*.{metal}"
34
+ s.source_files = "ios/**/*.{h,m,mm}", "cpp/*.{h,cpp,c}", "cpp/coreml/*.{h,m,mm}"
35
+ s.resources = "cpp/*.{metallib}"
36
+
37
+ s.requires_arc = true
36
38
 
37
39
  s.dependency "React-Core"
38
40
 
@@ -60,4 +62,9 @@ Pod::Spec.new do |s|
60
62
  s.dependency "RCTTypeSafety"
61
63
  s.dependency "ReactCommon/turbomodule/core"
62
64
  end
65
+
66
+ s.subspec "no-require-arc" do |ss|
67
+ ss.requires_arc = false
68
+ ss.source_files = "cpp/*.m"
69
+ end
63
70
  end