npm - whisper.rn - Versions diffs - 0.3.0-rc.4 → 0.3.0-rc.6 - Mend

whisper.rn 0.3.0-rc.4 → 0.3.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +74 -11
package/android/build.gradle +9 -0
package/android/src/main/java/com/rnwhisper/Downloader.java +83 -0
package/android/src/main/java/com/rnwhisper/RNWhisperPackage.java +33 -13
package/android/src/main/java/com/rnwhisper/WhisperContext.java +9 -9
package/android/src/main/jni/whisper/Whisper.mk +1 -1
package/android/src/main/jni/whisper/jni.cpp +102 -0
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +286 -0
package/android/src/{main → oldarch}/java/com/rnwhisper/RNWhisperModule.java +59 -5
package/ios/RNWhisper.mm +54 -8
package/ios/RNWhisperDownloader.h +8 -0
package/ios/RNWhisperDownloader.m +39 -0
package/lib/commonjs/NativeRNWhisper.js +10 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -0
package/lib/commonjs/index.js +81 -23
package/lib/commonjs/index.js.map +1 -1
package/lib/module/NativeRNWhisper.js +3 -0
package/lib/module/NativeRNWhisper.js.map +1 -0
package/lib/module/index.js +73 -16
package/lib/module/index.js.map +1 -1
package/lib/typescript/NativeRNWhisper.d.ts +65 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -0
package/lib/typescript/index.d.ts +17 -43
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +6 -1
package/src/NativeRNWhisper.ts +79 -0
package/src/index.ts +164 -122

package/src/NativeRNWhisper.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import type { TurboModule } from 'react-native/Libraries/TurboModule/RCTExport'
+import { TurboModuleRegistry } from 'react-native'
+export type TranscribeOptions = {
+  /** Spoken language (Default: 'auto' for auto-detect) */
+  language?: string,
+  /** Translate from source language to english (Default: false) */
+  translate?: boolean,
+  /** Number of threads to use during computation (Default: 2 for 4-core devices, 4 for more cores) */
+  maxThreads?: number,
+  /** Maximum number of text context tokens to store */
+  maxContext?: number,
+  /** Maximum segment length in characters */
+  maxLen?: number,
+  /** Enable token-level timestamps */
+  tokenTimestamps?: boolean,
+  /** Word timestamp probability threshold */
+  wordThold?: number,
+  /** Time offset in milliseconds */
+  offset?: number,
+  /** Duration of audio to process in milliseconds */
+  duration?: number,
+  /** Tnitial decoding temperature */
+  temperature?: number,
+  temperatureInc?: number,
+  /** Beam size for beam search */
+  beamSize?: number,
+  /** Number of best candidates to keep */
+  bestOf?: number,
+  /** Speed up audio by x2 (reduced accuracy) */
+  speedUp?: boolean,
+  /** Initial Prompt */
+  prompt?: string,
+}
+export type TranscribeResult = {
+  result: string,
+  segments: Array<{
+    text: string,
+    t0: number,
+    t1: number,
+  }>,
+}
+export type CoreMLAsset = {
+  uri: string,
+  filepath: string,
+}
+type NativeContextOptions = {
+  filePath: string,
+  isBundleAsset: boolean,
+  downloadCoreMLAssets?: boolean,
+  coreMLAssets?: CoreMLAsset[],
+}
+export interface Spec extends TurboModule {
+  getConstants(): {
+    useCoreML: boolean
+    coreMLAllowFallback: boolean
+  };
+  initContext(options: NativeContextOptions): Promise<number>;
+  releaseContext(contextId: number): Promise<void>;
+  releaseAllContexts(): Promise<void>;
+  transcribeFile(
+    contextId: number,
+    jobId: number,
+    path: string,
+    options: TranscribeOptions,
+  ): Promise<TranscribeResult>;
+  startRealtimeTranscribe(
+    contextId: number,
+    jobId: number,
+    options: TranscribeOptions,
+  ): Promise<void>;
+  abortTranscribe(contextId: number, jobId: number): Promise<void>;
+}
+export default TurboModuleRegistry.get<Spec>('RNWhisper') as Spec

package/src/index.ts CHANGED Viewed

@@ -1,77 +1,39 @@
 import {
   NativeEventEmitter,
   DeviceEventEmitter,
-  NativeModules,
   Platform,
   DeviceEventEmitterStatic,
+  Image,
 } from 'react-native'
+import RNWhisper from './NativeRNWhisper'
+import type {
+  TranscribeOptions,
+  TranscribeResult,
+  CoreMLAsset,
+} from './NativeRNWhisper'
 import { version } from './version.json'
-const LINKING_ERROR =
-  `The package 'whisper.rn' doesn't seem to be linked. Make sure: \n\n${Platform.select({ ios: "- You have run 'pod install'\n", default: '' })
-  }- You rebuilt the app after installing the package`
-const RNWhisper = NativeModules.RNWhisper
-  ? NativeModules.RNWhisper
-  : new Proxy(
-    {},
-    {
-      get() {
-        throw new Error(LINKING_ERROR)
-      },
-    },
-  )
 let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
 if (Platform.OS === 'ios') {
+  // @ts-ignore
   EventEmitter = new NativeEventEmitter(RNWhisper)
 }
 if (Platform.OS === 'android') {
   EventEmitter = DeviceEventEmitter
 }
+export type { TranscribeOptions, TranscribeResult }
 const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'
 const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
-export type TranscribeOptions = {
-  /** Spoken language (Default: 'auto' for auto-detect) */
-  language?: string,
-  /** Translate from source language to english (Default: false) */
-  translate?: boolean,
-  /** Number of threads to use during computation (Default: 2 for 4-core devices, 4 for more cores) */
-  maxThreads?: number,
-  /** Maximum number of text context tokens to store */
-  maxContext?: number,
-  /** Maximum segment length in characters */
-  maxLen?: number,
-  /** Enable token-level timestamps */
-  tokenTimestamps?: boolean,
-  /** Word timestamp probability threshold */
-  wordThold?: number,
-  /** Time offset in milliseconds */
-  offset?: number,
-  /** Duration of audio to process in milliseconds */
-  duration?: number,
-  /** Tnitial decoding temperature */
-  temperature?: number,
-  temperatureInc?: number,
-  /** Beam size for beam search */
-  beamSize?: number,
-  /** Number of best candidates to keep */
-  bestOf?: number,
-  /** Speed up audio by x2 (reduced accuracy) */
-  speedUp?: boolean,
-  /** Initial Prompt */
-  prompt?: string,
-}
 export type TranscribeRealtimeOptions = TranscribeOptions & {
   /**
-   * Realtime record max duration in seconds.
+   * Realtime record max duration in seconds.
    * Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds,
    * the recommended value will be <= 30 seconds. (Default: 30)
    */
-  realtimeAudioSec?: number,
+  realtimeAudioSec?: number
   /**
    * Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
    * Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
@@ -80,52 +42,43 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
   realtimeAudioSliceSec?: number
 }
-export type TranscribeResult = {
-  result: string,
-  segments: Array<{
-    text: string,
-    t0: number,
-    t1: number,
-  }>,
-}
 export type TranscribeRealtimeEvent = {
-  contextId: number,
-  jobId: number,
+  contextId: number
+  jobId: number
   /** Is capturing audio, when false, the event is the final result */
-  isCapturing: boolean,
-  isStoppedByAction?: boolean,
-  code: number,
-  data?: TranscribeResult,
-  error?: string,
-  processTime: number,
-  recordingTime: number,
+  isCapturing: boolean
+  isStoppedByAction?: boolean
+  code: number
+  data?: TranscribeResult
+  error?: string
+  processTime: number
+  recordingTime: number
   slices?: Array<{
-    code: number,
-    error?: string,
-    data?: TranscribeResult,
-    processTime: number,
-    recordingTime: number,
-  }>,
+    code: number
+    error?: string
+    data?: TranscribeResult
+    processTime: number
+    recordingTime: number
+  }>
 }
 export type TranscribeRealtimeNativePayload = {
   /** Is capturing audio, when false, the event is the final result */
-  isCapturing: boolean,
-  isStoppedByAction?: boolean,
-  code: number,
-  processTime: number,
-  recordingTime: number,
-  isUseSlices: boolean,
-  sliceIndex: number,
-  data?: TranscribeResult,
-  error?: string,
+  isCapturing: boolean
+  isStoppedByAction?: boolean
+  code: number
+  processTime: number
+  recordingTime: number
+  isUseSlices: boolean
+  sliceIndex: number
+  data?: TranscribeResult
+  error?: string
 }
 export type TranscribeRealtimeNativeEvent = {
-  contextId: number,
-  jobId: number,
-  payload: TranscribeRealtimeNativePayload,
+  contextId: number
+  jobId: number
+  payload: TranscribeRealtimeNativePayload
 }
 export class WhisperContext {
@@ -136,12 +89,29 @@ export class WhisperContext {
   }
   /** Transcribe audio file */
-  transcribe(path: string, options: TranscribeOptions = {}): {
+  transcribe(
+    filePath: string | number,
+    options: TranscribeOptions = {},
+  ): {
     /** Stop the transcribe */
-    stop: () => void,
+    stop: () => void
     /** Transcribe result promise */
-    promise: Promise<TranscribeResult>,
+    promise: Promise<TranscribeResult>
   } {
+    let path = ''
+    if (typeof filePath === 'number') {
+      try {
+        const source = Image.resolveAssetSource(filePath)
+        if (source) path = source.uri
+      } catch (e) {
+        throw new Error(`Invalid asset: ${filePath}`)
+      }
+    } else {
+      if (filePath.startsWith('http'))
+        throw new Error('Transcribe remote file is not supported, please download it first')
+      path = filePath
+    }
+    if (path.startsWith('file://')) path = path.slice(7)
     const jobId: number = Math.floor(Math.random() * 10000)
     return {
       stop: () => RNWhisper.abortTranscribe(this.id, jobId),
@@ -152,9 +122,9 @@ export class WhisperContext {
   /** Transcribe the microphone audio stream, the microphone user permission is required */
   async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
     /** Stop the realtime transcribe */
-    stop: () => void,
+    stop: () => void
     /** Subscribe to realtime transcribe events */
-    subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void,
+    subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
   }> {
     const jobId: number = Math.floor(Math.random() * 10000)
     await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
@@ -170,37 +140,42 @@ export class WhisperContext {
         const { segments = [] } = slices[sliceIndex]?.data || {}
         tOffset = segments[segments.length - 1]?.t1 || 0
       }
-      ({ sliceIndex } = payload)
+      ;({ sliceIndex } = payload)
       slices[sliceIndex] = {
         ...payload,
-        data: payload.data ? {
-          ...payload.data,
-          segments: payload.data.segments.map((segment) => ({
-            ...segment,
-            t0: segment.t0 + tOffset,
-            t1: segment.t1 + tOffset,
-          })) || [],
-        } : undefined,
+        data: payload.data
+          ? {
+              ...payload.data,
+              segments:
+                payload.data.segments.map((segment) => ({
+                  ...segment,
+                  t0: segment.t0 + tOffset,
+                  t1: segment.t1 + tOffset,
+                })) || [],
+            }
+          : undefined,
       }
     }
-    const mergeSlicesIfNeeded = (payload: TranscribeRealtimeNativePayload): TranscribeRealtimeNativePayload => {
+    const mergeSlicesIfNeeded = (
+      payload: TranscribeRealtimeNativePayload,
+    ): TranscribeRealtimeNativePayload => {
       if (!payload.isUseSlices) return payload
       const mergedPayload: any = {}
-      slices.forEach(
-        (slice) => {
-          mergedPayload.data = {
-            result: (mergedPayload.data?.result || '') + (slice.data?.result || ''),
-            segments: [
-              ...(mergedPayload?.data?.segments || []),
-              ...(slice.data?.segments || []),
-            ],
-          }
-          mergedPayload.processTime = slice.processTime
-          mergedPayload.recordingTime = (mergedPayload?.recordingTime || 0) + slice.recordingTime
+      slices.forEach((slice) => {
+        mergedPayload.data = {
+          result:
+            (mergedPayload.data?.result || '') + (slice.data?.result || ''),
+          segments: [
+            ...(mergedPayload?.data?.segments || []),
+            ...(slice.data?.segments || []),
+          ],
         }
-      )
+        mergedPayload.processTime = slice.processTime
+        mergedPayload.recordingTime =
+          (mergedPayload?.recordingTime || 0) + slice.recordingTime
+      })
       return { ...payload, ...mergedPayload, slices }
     }
@@ -219,7 +194,7 @@ export class WhisperContext {
               jobId: evt.jobId,
               ...mergeSlicesIfNeeded(payload),
             })
-          }
+          },
         )
         let endListener: any = EventEmitter.addListener(
           EVENT_ON_REALTIME_TRANSCRIBE_END,
@@ -235,7 +210,7 @@ export class WhisperContext {
               contextId,
               jobId: evt.jobId,
               ...mergeSlicesIfNeeded(lastPayload),
-              isCapturing: false
+              isCapturing: false,
             })
             if (transcribeListener) {
               transcribeListener.remove()
@@ -245,7 +220,7 @@ export class WhisperContext {
               endListener.remove()
               endListener = null
             }
-          }
+          },
         )
       },
     }
@@ -256,10 +231,75 @@ export class WhisperContext {
   }
 }
-export async function initWhisper(
-  { filePath, isBundleAsset }: { filePath?: string, isBundleAsset?: boolean } = {}
-): Promise<WhisperContext> {
-  const id = await RNWhisper.initContext(filePath, !!isBundleAsset)
+export type ContextOptions = {
+  filePath: string | number
+  /**
+   * CoreML model assets, if you're using `require` on filePath,
+   * use this option is required if you want to enable Core ML,
+   * you will need bundle weights/weight.bin, model.mil, coremldata.bin into app by `require`
+   */
+  coreMLModelAsset?: {
+    filename: string
+    assets: number[]
+  }
+  /** Is the file path a bundle asset for pure string filePath */
+  isBundleAsset?: boolean
+}
+const coreMLModelAssetPaths = [
+  'analytics/coremldata.bin',
+  'weights/weight.bin',
+  'model.mil',
+  'coremldata.bin',
+]
+export async function initWhisper({
+  filePath,
+  coreMLModelAsset,
+  isBundleAsset,
+}: ContextOptions): Promise<WhisperContext> {
+  let path = ''
+  let coreMLAssets: CoreMLAsset[] | undefined
+  if (coreMLModelAsset) {
+    const { filename, assets } = coreMLModelAsset
+    if (filename && assets) {
+      coreMLAssets = assets
+        ?.map((asset) => {
+          const { uri } = Image.resolveAssetSource(asset)
+          const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
+          if (filepath) {
+            return {
+              uri,
+              filepath: `${filename}/${filepath}`,
+            }
+          }
+          return undefined
+        })
+        .filter((asset): asset is CoreMLAsset => asset !== undefined)
+    }
+  }
+  if (typeof filePath === 'number') {
+    try {
+      const source = Image.resolveAssetSource(filePath)
+      if (source) {
+        path = source.uri
+      }
+    } catch (e) {
+      throw new Error(`Invalid asset: ${filePath}`)
+    }
+  } else {
+    if (!isBundleAsset && filePath.startsWith('http'))
+      throw new Error('Transcribe remote file is not supported, please download it first')
+    path = filePath
+  }
+  if (path.startsWith('file://')) path = path.slice(7)
+  const id = await RNWhisper.initContext({
+    filePath: path,
+    isBundleAsset: !!isBundleAsset,
+    // Only development mode need download Core ML model assets (from packager server)
+    downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
+    coreMLAssets,
+  })
   return new WhisperContext(id)
 }
@@ -270,8 +310,10 @@ export async function releaseAllWhisper(): Promise<void> {
 /** Current version of whisper.cpp */
 export const libVersion: string = version
+const { useCoreML, coreMLAllowFallback } = RNWhisper.getConstants?.() || {}
 /** Is use CoreML models on iOS */
-export const isUseCoreML: boolean = !!RNWhisper.WHISPER_USE_COREML
+export const isUseCoreML: boolean = !!useCoreML
 /** Is allow fallback to CPU if load CoreML model failed */
-export const isCoreMLAllowFallback: boolean = !!RNWhisper.WHISPER_COREML_ALLOW_FALLBACK
+export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback