npm - whisper.rn - Versions diffs - 0.4.0-rc.8 → 0.4.0-rc.9 - Mend

whisper.rn 0.4.0-rc.8 → 0.4.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/android/src/main/CMakeLists.txt +2 -1
package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -12
package/android/src/main/java/com/rnwhisper/RNWhisper.java +75 -34
package/android/src/main/java/com/rnwhisper/WhisperContext.java +20 -3
package/android/src/main/jni.cpp +29 -1
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
package/cpp/ggml-aarch64.c +3209 -0
package/cpp/ggml-aarch64.h +39 -0
package/cpp/ggml-alloc.c +725 -517
package/cpp/ggml-alloc.h +47 -65
package/cpp/ggml-backend-impl.h +166 -55
package/cpp/ggml-backend.cpp +2635 -0
package/cpp/ggml-backend.h +202 -85
package/cpp/ggml-common.h +1853 -0
package/cpp/ggml-cpu-impl.h +614 -0
package/cpp/ggml-impl.h +143 -180
package/cpp/ggml-metal.h +13 -11
package/cpp/ggml-metal.m +2955 -1632
package/cpp/ggml-quants.c +9824 -3263
package/cpp/ggml-quants.h +133 -248
package/cpp/ggml-whisper.metallib +0 -0
package/cpp/ggml.c +8482 -5142
package/cpp/ggml.h +633 -349
package/cpp/rn-whisper.cpp +91 -0
package/cpp/rn-whisper.h +2 -0
package/cpp/whisper.cpp +1427 -658
package/cpp/whisper.h +84 -28
package/ios/RNWhisper.mm +124 -37
package/ios/RNWhisperAudioUtils.h +1 -0
package/ios/RNWhisperAudioUtils.m +20 -13
package/ios/RNWhisperContext.h +3 -2
package/ios/RNWhisperContext.mm +39 -7
package/jest/mock.js +9 -1
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +48 -19
package/lib/commonjs/index.js.map +1 -1
package/lib/commonjs/version.json +1 -1
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +48 -19
package/lib/module/index.js.map +1 -1
package/lib/module/version.json +1 -1
package/lib/typescript/NativeRNWhisper.d.ts +6 -3
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts +25 -3
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +6 -5
package/src/NativeRNWhisper.ts +12 -3
package/src/index.ts +63 -24
package/src/version.json +1 -1
package/whisper-rn.podspec +9 -2
package/cpp/ggml-backend.c +0 -1718
package/cpp/ggml-metal-whisper.metal +0 -5820

package/src/index.ts CHANGED Viewed

@@ -174,6 +174,15 @@ export type TranscribeRealtimeNativeEvent = {
   payload: TranscribeRealtimeNativePayload
 }
+export type BenchResult = {
+  config: string
+  nThreads: number
+  encodeMs: number
+  decodeMs: number
+  batchMs: number
+  promptMs: number
+}
 const updateAudioSession = async (setting: AudioSessionSettingIos) => {
   await AudioSessionIos.setCategory(
     setting.category,
@@ -202,32 +211,10 @@ export class WhisperContext {
     this.reasonNoGPU = reasonNoGPU
   }
-  /** Transcribe audio file */
-  transcribe(
-    filePath: string | number,
-    options: TranscribeFileOptions = {},
-  ): {
-    /** Stop the transcribe */
+  private transcribeWithNativeMethod(method: 'transcribeFile' | 'transcribeData', data: string, options: TranscribeFileOptions = {}): {
     stop: () => Promise<void>
-    /** Transcribe result promise */
     promise: Promise<TranscribeResult>
   } {
-    let path = ''
-    if (typeof filePath === 'number') {
-      try {
-        const source = Image.resolveAssetSource(filePath)
-        if (source) path = source.uri
-      } catch (e) {
-        throw new Error(`Invalid asset: ${filePath}`)
-      }
-    } else {
-      if (filePath.startsWith('http'))
-        throw new Error(
-          'Transcribe remote file is not supported, please download it first',
-        )
-      path = filePath
-    }
-    if (path.startsWith('file://')) path = path.slice(7)
     const jobId: number = Math.floor(Math.random() * 10000)
     const { onProgress, onNewSegments, ...rest } = options
@@ -276,7 +263,7 @@ export class WhisperContext {
         removeProgressListener()
         removeNewSegmenetsListener()
       },
-      promise: RNWhisper.transcribeFile(this.id, jobId, path, {
+      promise: RNWhisper[method](this.id, jobId, data, {
         ...rest,
         onProgress: !!onProgress,
         onNewSegments: !!onNewSegments,
@@ -298,6 +285,48 @@ export class WhisperContext {
     }
   }
+  /**
+   * Transcribe audio file (path or base64 encoded wav file)
+   * base64: need add `data:audio/wav;base64,` prefix
+   */
+  transcribe(
+    filePathOrBase64: string | number,
+    options: TranscribeFileOptions = {},
+  ): {
+    /** Stop the transcribe */
+    stop: () => Promise<void>
+    /** Transcribe result promise */
+    promise: Promise<TranscribeResult>
+  } {
+    let path = ''
+    if (typeof filePathOrBase64 === 'number') {
+      try {
+        const source = Image.resolveAssetSource(filePathOrBase64)
+        if (source) path = source.uri
+      } catch (e) {
+        throw new Error(`Invalid asset: ${filePathOrBase64}`)
+      }
+    } else {
+      if (filePathOrBase64.startsWith('http'))
+        throw new Error(
+          'Transcribe remote file is not supported, please download it first',
+        )
+      path = filePathOrBase64
+    }
+    if (path.startsWith('file://')) path = path.slice(7)
+    return this.transcribeWithNativeMethod('transcribeFile', path, options)
+  }
+  /**
+   * Transcribe audio data (base64 encoded float32 PCM data)
+   */
+  transcribeData(data: string, options: TranscribeFileOptions = {}): {
+    stop: () => Promise<void>
+    promise: Promise<TranscribeResult>
+  } {
+    return this.transcribeWithNativeMethod('transcribeData', data, options)
+  }
   /** Transcribe the microphone audio stream, the microphone user permission is required */
   async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
     /** Stop the realtime transcribe */
@@ -433,6 +462,12 @@ export class WhisperContext {
     }
   }
+  async bench(maxThreads: number): Promise<BenchResult> {
+    const result = await RNWhisper.bench(this.id, maxThreads)
+    const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result)
+    return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs } as BenchResult
+  }
   async release(): Promise<void> {
     return RNWhisper.releaseContext(this.id)
   }
@@ -455,6 +490,8 @@ export type ContextOptions = {
   useCoreMLIos?: boolean
   /** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */
   useGpu?: boolean
+  /** Use Flash Attention, only recommended if GPU available */
+  useFlashAttn?: boolean,
 }
 const coreMLModelAssetPaths = [
@@ -470,6 +507,7 @@ export async function initWhisper({
   isBundleAsset,
   useGpu = true,
   useCoreMLIos = true,
+  useFlashAttn = false,
 }: ContextOptions): Promise<WhisperContext> {
   let path = ''
   let coreMLAssets: CoreMLAsset[] | undefined
@@ -518,6 +556,7 @@ export async function initWhisper({
   const { contextId, gpu, reasonNoGPU } = await RNWhisper.initContext({
     filePath: path,
     isBundleAsset: !!isBundleAsset,
+    useFlashAttn,
     useGpu,
     useCoreMLIos,
     // Only development mode need download Core ML model assets (from packager server)

package/src/version.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":"1.5.4"}
1	+ {"version":"1.7.1"}

package/whisper-rn.podspec CHANGED Viewed

@@ -31,8 +31,10 @@ Pod::Spec.new do |s|
   s.platforms    = { :ios => "11.0", :tvos => "11.0" }
   s.source       = { :git => "https://github.com/mybigday/whisper.rn.git", :tag => "#{s.version}" }
-  s.source_files = "ios/**/*.{h,m,mm}", "cpp/**/*.{h,cpp,c,m,mm}"
-  s.resources = "cpp/**/*.{metal}"
+  s.source_files = "ios/**/*.{h,m,mm}", "cpp/*.{h,cpp,c}", "cpp/coreml/*.{h,m,mm}"
+  s.resources = "cpp/*.{metallib}"
+  s.requires_arc = true
   s.dependency "React-Core"
@@ -60,4 +62,9 @@ Pod::Spec.new do |s|
     s.dependency "RCTTypeSafety"
     s.dependency "ReactCommon/turbomodule/core"
   end
+  s.subspec "no-require-arc" do |ss|
+    ss.requires_arc = false
+    ss.source_files = "cpp/*.m"
+  end
 end