whisper.rn 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +28 -0
  2. package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +37 -116
  4. package/android/src/main/jni.cpp +23 -12
  5. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  6. package/ios/RNWhisper.mm +81 -22
  7. package/ios/RNWhisper.xcodeproj/project.pbxproj +6 -0
  8. package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  9. package/ios/RNWhisperAudioSessionUtils.h +13 -0
  10. package/ios/RNWhisperAudioSessionUtils.m +91 -0
  11. package/ios/RNWhisperAudioUtils.h +1 -0
  12. package/ios/RNWhisperAudioUtils.m +21 -0
  13. package/ios/RNWhisperContext.h +1 -0
  14. package/ios/RNWhisperContext.mm +56 -28
  15. package/jest/mock.js +10 -0
  16. package/lib/commonjs/AudioSessionIos.js +91 -0
  17. package/lib/commonjs/AudioSessionIos.js.map +1 -0
  18. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  19. package/lib/commonjs/index.js +82 -14
  20. package/lib/commonjs/index.js.map +1 -1
  21. package/lib/module/AudioSessionIos.js +83 -0
  22. package/lib/module/AudioSessionIos.js.map +1 -0
  23. package/lib/module/NativeRNWhisper.js.map +1 -1
  24. package/lib/module/index.js +77 -14
  25. package/lib/module/index.js.map +1 -1
  26. package/lib/typescript/AudioSessionIos.d.ts +54 -0
  27. package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
  28. package/lib/typescript/NativeRNWhisper.d.ts +8 -0
  29. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  30. package/lib/typescript/index.d.ts +37 -2
  31. package/lib/typescript/index.d.ts.map +1 -1
  32. package/package.json +1 -1
  33. package/src/AudioSessionIos.ts +90 -0
  34. package/src/NativeRNWhisper.ts +11 -1
  35. package/src/index.ts +153 -26
package/src/index.ts CHANGED
@@ -11,6 +11,12 @@ import type {
11
11
  TranscribeResult,
12
12
  CoreMLAsset,
13
13
  } from './NativeRNWhisper'
14
+ import AudioSessionIos from './AudioSessionIos'
15
+ import type {
16
+ AudioSessionCategoryIos,
17
+ AudioSessionCategoryOptionIos,
18
+ AudioSessionModeIos,
19
+ } from './AudioSessionIos'
14
20
  import { version } from './version.json'
15
21
 
16
22
  let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
@@ -22,20 +28,43 @@ if (Platform.OS === 'android') {
22
28
  EventEmitter = DeviceEventEmitter
23
29
  }
24
30
 
25
- export type { TranscribeOptions, TranscribeResult }
26
-
31
+ export type {
32
+ TranscribeOptions,
33
+ TranscribeResult,
34
+ AudioSessionCategoryIos,
35
+ AudioSessionCategoryOptionIos,
36
+ AudioSessionModeIos,
37
+ }
27
38
 
28
39
  const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'
40
+ const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments'
29
41
 
30
42
  const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'
31
43
  const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
32
44
 
45
+ export type TranscribeNewSegmentsResult = {
46
+ nNew: number
47
+ totalNNew: number
48
+ result: string
49
+ segments: TranscribeResult['segments']
50
+ }
51
+
52
+ export type TranscribeNewSegmentsNativeEvent = {
53
+ contextId: number
54
+ jobId: number
55
+ result: TranscribeNewSegmentsResult
56
+ }
57
+
33
58
  // Fn -> Boolean in TranscribeFileNativeOptions
34
59
  export type TranscribeFileOptions = TranscribeOptions & {
35
60
  /**
36
61
  * Progress callback, the progress is between 0 and 100
37
62
  */
38
63
  onProgress?: (progress: number) => void
64
+ /**
65
+ * Callback when new segments are transcribed
66
+ */
67
+ onNewSegments?: (result: TranscribeNewSegmentsResult) => void
39
68
  }
40
69
 
41
70
  export type TranscribeProgressNativeEvent = {
@@ -44,6 +73,13 @@ export type TranscribeProgressNativeEvent = {
44
73
  progress: number
45
74
  }
46
75
 
76
+ export type AudioSessionSettingIos = {
77
+ category: AudioSessionCategoryIos
78
+ options?: AudioSessionCategoryOptionIos[]
79
+ mode?: AudioSessionModeIos
80
+ active?: boolean
81
+ }
82
+
47
83
  // Codegen missing TSIntersectionType support so we dont put it into the native spec
48
84
  export type TranscribeRealtimeOptions = TranscribeOptions & {
49
85
  /**
@@ -81,6 +117,17 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
81
117
  * Frequency to apply High-pass filter in VAD. (Default: 100.0)
82
118
  */
83
119
  vadFreqThold?: number
120
+ /**
121
+ * iOS: Audio session settings when start transcribe
122
+ * Keep empty to use current audio session state
123
+ */
124
+ audioSessionOnStartIos?: AudioSessionSettingIos
125
+ /**
126
+ * iOS: Audio session settings when stop transcribe
127
+ * - Keep empty to use last audio session state
128
+ * - Use `restore` to restore audio session state before start transcribe
129
+ */
130
+ audioSessionOnStopIos?: string | AudioSessionSettingIos
84
131
  }
85
132
 
86
133
  export type TranscribeRealtimeEvent = {
@@ -122,6 +169,17 @@ export type TranscribeRealtimeNativeEvent = {
122
169
  payload: TranscribeRealtimeNativePayload
123
170
  }
124
171
 
172
+ const updateAudioSession = async (setting: AudioSessionSettingIos) => {
173
+ await AudioSessionIos.setCategory(
174
+ setting.category,
175
+ setting.options || [],
176
+ )
177
+ if (setting.mode) {
178
+ await AudioSessionIos.setMode(setting.mode)
179
+ }
180
+ await AudioSessionIos.setActive(setting.active ?? true)
181
+ }
182
+
125
183
  export class WhisperContext {
126
184
  id: number
127
185
 
@@ -149,13 +207,16 @@ export class WhisperContext {
149
207
  }
150
208
  } else {
151
209
  if (filePath.startsWith('http'))
152
- throw new Error('Transcribe remote file is not supported, please download it first')
210
+ throw new Error(
211
+ 'Transcribe remote file is not supported, please download it first',
212
+ )
153
213
  path = filePath
154
214
  }
155
215
  if (path.startsWith('file://')) path = path.slice(7)
156
216
  const jobId: number = Math.floor(Math.random() * 10000)
157
217
 
158
- const { onProgress, ...rest } = options
218
+ const { onProgress, onNewSegments, ...rest } = options
219
+
159
220
  let progressListener: any
160
221
  let lastProgress: number = 0
161
222
  if (onProgress) {
@@ -175,25 +236,50 @@ export class WhisperContext {
175
236
  progressListener = null
176
237
  }
177
238
  }
239
+
240
+ let newSegmentsListener: any
241
+ if (onNewSegments) {
242
+ newSegmentsListener = EventEmitter.addListener(
243
+ EVENT_ON_TRANSCRIBE_NEW_SEGMENTS,
244
+ (evt: TranscribeNewSegmentsNativeEvent) => {
245
+ const { contextId, result } = evt
246
+ if (contextId !== this.id || evt.jobId !== jobId) return
247
+ onNewSegments(result)
248
+ },
249
+ )
250
+ }
251
+ const removeNewSegmenetsListener = () => {
252
+ if (newSegmentsListener) {
253
+ newSegmentsListener.remove()
254
+ newSegmentsListener = null
255
+ }
256
+ }
257
+
178
258
  return {
179
259
  stop: async () => {
180
260
  await RNWhisper.abortTranscribe(this.id, jobId)
181
261
  removeProgressListener()
262
+ removeNewSegmenetsListener()
182
263
  },
183
264
  promise: RNWhisper.transcribeFile(this.id, jobId, path, {
184
265
  ...rest,
185
- onProgress: !!onProgress
186
- }).then((result) => {
187
- removeProgressListener()
188
- if (!result.isAborted && lastProgress !== 100) {
189
- // Handle the case that the last progress event is not triggered
190
- onProgress?.(100)
191
- }
192
- return result
193
- }).catch((e) => {
194
- removeProgressListener()
195
- throw e
196
- }),
266
+ onProgress: !!onProgress,
267
+ onNewSegments: !!onNewSegments,
268
+ })
269
+ .then((result) => {
270
+ removeProgressListener()
271
+ removeNewSegmenetsListener()
272
+ if (!result.isAborted && lastProgress !== 100) {
273
+ // Handle the case that the last progress event is not triggered
274
+ onProgress?.(100)
275
+ }
276
+ return result
277
+ })
278
+ .catch((e) => {
279
+ removeProgressListener()
280
+ removeNewSegmenetsListener()
281
+ throw e
282
+ }),
197
283
  }
198
284
  }
199
285
 
@@ -204,8 +290,6 @@ export class WhisperContext {
204
290
  /** Subscribe to realtime transcribe events */
205
291
  subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
206
292
  }> {
207
- const jobId: number = Math.floor(Math.random() * 10000)
208
- await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
209
293
  let lastTranscribePayload: TranscribeRealtimeNativePayload
210
294
 
211
295
  const slices: TranscribeRealtimeNativePayload[] = []
@@ -257,8 +341,40 @@ export class WhisperContext {
257
341
  return { ...payload, ...mergedPayload, slices }
258
342
  }
259
343
 
344
+ let prevAudioSession: AudioSessionSettingIos | undefined
345
+ if (Platform.OS === 'ios' && options?.audioSessionOnStartIos) {
346
+ // iOS: Remember current audio session state
347
+ if (options?.audioSessionOnStopIos === 'restore') {
348
+ const categoryResult = await AudioSessionIos.getCurrentCategory()
349
+ const mode = await AudioSessionIos.getCurrentMode()
350
+
351
+ prevAudioSession = {
352
+ ...categoryResult,
353
+ mode,
354
+ active: false, // TODO: Need to check isOtherAudioPlaying to set active
355
+ }
356
+ }
357
+
358
+ // iOS: Update audio session state
359
+ await updateAudioSession(options?.audioSessionOnStartIos)
360
+ }
361
+ if (Platform.OS === 'ios' && typeof options?.audioSessionOnStopIos === 'object') {
362
+ prevAudioSession = options?.audioSessionOnStopIos
363
+ }
364
+
365
+ const jobId: number = Math.floor(Math.random() * 10000)
366
+ try {
367
+ await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
368
+ } catch (e) {
369
+ if (prevAudioSession) await updateAudioSession(prevAudioSession)
370
+ throw e
371
+ }
372
+
260
373
  return {
261
- stop: () => RNWhisper.abortTranscribe(this.id, jobId),
374
+ stop: async () => {
375
+ await RNWhisper.abortTranscribe(this.id, jobId)
376
+ if (prevAudioSession) await updateAudioSession(prevAudioSession)
377
+ },
262
378
  subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
263
379
  let transcribeListener: any = EventEmitter.addListener(
264
380
  EVENT_ON_REALTIME_TRANSCRIBE,
@@ -318,7 +434,7 @@ export type ContextOptions = {
318
434
  */
319
435
  coreMLModelAsset?: {
320
436
  filename: string
321
- assets: number[]
437
+ assets: string[] | number[]
322
438
  }
323
439
  /** Is the file path a bundle asset for pure string filePath */
324
440
  isBundleAsset?: boolean
@@ -343,12 +459,19 @@ export async function initWhisper({
343
459
  if (filename && assets) {
344
460
  coreMLAssets = assets
345
461
  ?.map((asset) => {
346
- const { uri } = Image.resolveAssetSource(asset)
347
- const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
348
- if (filepath) {
462
+ if (typeof asset === 'number') {
463
+ const { uri } = Image.resolveAssetSource(asset)
464
+ const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
465
+ if (filepath) {
466
+ return {
467
+ uri,
468
+ filepath: `${filename}/${filepath}`,
469
+ }
470
+ }
471
+ } else if (typeof asset === 'string') {
349
472
  return {
350
- uri,
351
- filepath: `${filename}/${filepath}`,
473
+ uri: asset,
474
+ filepath: `${filename}/${asset}`,
352
475
  }
353
476
  }
354
477
  return undefined
@@ -367,7 +490,9 @@ export async function initWhisper({
367
490
  }
368
491
  } else {
369
492
  if (!isBundleAsset && filePath.startsWith('http'))
370
- throw new Error('Transcribe remote file is not supported, please download it first')
493
+ throw new Error(
494
+ 'Transcribe remote file is not supported, please download it first',
495
+ )
371
496
  path = filePath
372
497
  }
373
498
  if (path.startsWith('file://')) path = path.slice(7)
@@ -395,3 +520,5 @@ export const isUseCoreML: boolean = !!useCoreML
395
520
 
396
521
  /** Is allow fallback to CPU if load CoreML model failed */
397
522
  export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback
523
+
524
+ export { AudioSessionIos }