whisper.rn 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +37 -116
- package/android/src/main/jni.cpp +23 -12
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/ios/RNWhisper.mm +81 -22
- package/ios/RNWhisper.xcodeproj/project.pbxproj +6 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisperAudioSessionUtils.h +13 -0
- package/ios/RNWhisperAudioSessionUtils.m +91 -0
- package/ios/RNWhisperAudioUtils.h +1 -0
- package/ios/RNWhisperAudioUtils.m +21 -0
- package/ios/RNWhisperContext.h +1 -0
- package/ios/RNWhisperContext.mm +56 -28
- package/jest/mock.js +10 -0
- package/lib/commonjs/AudioSessionIos.js +91 -0
- package/lib/commonjs/AudioSessionIos.js.map +1 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +82 -14
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/AudioSessionIos.js +83 -0
- package/lib/module/AudioSessionIos.js.map +1 -0
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +77 -14
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/AudioSessionIos.d.ts +54 -0
- package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts +8 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +37 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/AudioSessionIos.ts +90 -0
- package/src/NativeRNWhisper.ts +11 -1
- package/src/index.ts +153 -26
package/src/index.ts
CHANGED
|
@@ -11,6 +11,12 @@ import type {
|
|
|
11
11
|
TranscribeResult,
|
|
12
12
|
CoreMLAsset,
|
|
13
13
|
} from './NativeRNWhisper'
|
|
14
|
+
import AudioSessionIos from './AudioSessionIos'
|
|
15
|
+
import type {
|
|
16
|
+
AudioSessionCategoryIos,
|
|
17
|
+
AudioSessionCategoryOptionIos,
|
|
18
|
+
AudioSessionModeIos,
|
|
19
|
+
} from './AudioSessionIos'
|
|
14
20
|
import { version } from './version.json'
|
|
15
21
|
|
|
16
22
|
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
|
@@ -22,20 +28,43 @@ if (Platform.OS === 'android') {
|
|
|
22
28
|
EventEmitter = DeviceEventEmitter
|
|
23
29
|
}
|
|
24
30
|
|
|
25
|
-
export type {
|
|
26
|
-
|
|
31
|
+
export type {
|
|
32
|
+
TranscribeOptions,
|
|
33
|
+
TranscribeResult,
|
|
34
|
+
AudioSessionCategoryIos,
|
|
35
|
+
AudioSessionCategoryOptionIos,
|
|
36
|
+
AudioSessionModeIos,
|
|
37
|
+
}
|
|
27
38
|
|
|
28
39
|
const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'
|
|
40
|
+
const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments'
|
|
29
41
|
|
|
30
42
|
const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'
|
|
31
43
|
const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
|
|
32
44
|
|
|
45
|
+
export type TranscribeNewSegmentsResult = {
|
|
46
|
+
nNew: number
|
|
47
|
+
totalNNew: number
|
|
48
|
+
result: string
|
|
49
|
+
segments: TranscribeResult['segments']
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export type TranscribeNewSegmentsNativeEvent = {
|
|
53
|
+
contextId: number
|
|
54
|
+
jobId: number
|
|
55
|
+
result: TranscribeNewSegmentsResult
|
|
56
|
+
}
|
|
57
|
+
|
|
33
58
|
// Fn -> Boolean in TranscribeFileNativeOptions
|
|
34
59
|
export type TranscribeFileOptions = TranscribeOptions & {
|
|
35
60
|
/**
|
|
36
61
|
* Progress callback, the progress is between 0 and 100
|
|
37
62
|
*/
|
|
38
63
|
onProgress?: (progress: number) => void
|
|
64
|
+
/**
|
|
65
|
+
* Callback when new segments are transcribed
|
|
66
|
+
*/
|
|
67
|
+
onNewSegments?: (result: TranscribeNewSegmentsResult) => void
|
|
39
68
|
}
|
|
40
69
|
|
|
41
70
|
export type TranscribeProgressNativeEvent = {
|
|
@@ -44,6 +73,13 @@ export type TranscribeProgressNativeEvent = {
|
|
|
44
73
|
progress: number
|
|
45
74
|
}
|
|
46
75
|
|
|
76
|
+
export type AudioSessionSettingIos = {
|
|
77
|
+
category: AudioSessionCategoryIos
|
|
78
|
+
options?: AudioSessionCategoryOptionIos[]
|
|
79
|
+
mode?: AudioSessionModeIos
|
|
80
|
+
active?: boolean
|
|
81
|
+
}
|
|
82
|
+
|
|
47
83
|
// Codegen missing TSIntersectionType support so we dont put it into the native spec
|
|
48
84
|
export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
49
85
|
/**
|
|
@@ -81,6 +117,17 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
81
117
|
* Frequency to apply High-pass filter in VAD. (Default: 100.0)
|
|
82
118
|
*/
|
|
83
119
|
vadFreqThold?: number
|
|
120
|
+
/**
|
|
121
|
+
* iOS: Audio session settings when start transcribe
|
|
122
|
+
* Keep empty to use current audio session state
|
|
123
|
+
*/
|
|
124
|
+
audioSessionOnStartIos?: AudioSessionSettingIos
|
|
125
|
+
/**
|
|
126
|
+
* iOS: Audio session settings when stop transcribe
|
|
127
|
+
* - Keep empty to use last audio session state
|
|
128
|
+
* - Use `restore` to restore audio session state before start transcribe
|
|
129
|
+
*/
|
|
130
|
+
audioSessionOnStopIos?: string | AudioSessionSettingIos
|
|
84
131
|
}
|
|
85
132
|
|
|
86
133
|
export type TranscribeRealtimeEvent = {
|
|
@@ -122,6 +169,17 @@ export type TranscribeRealtimeNativeEvent = {
|
|
|
122
169
|
payload: TranscribeRealtimeNativePayload
|
|
123
170
|
}
|
|
124
171
|
|
|
172
|
+
const updateAudioSession = async (setting: AudioSessionSettingIos) => {
|
|
173
|
+
await AudioSessionIos.setCategory(
|
|
174
|
+
setting.category,
|
|
175
|
+
setting.options || [],
|
|
176
|
+
)
|
|
177
|
+
if (setting.mode) {
|
|
178
|
+
await AudioSessionIos.setMode(setting.mode)
|
|
179
|
+
}
|
|
180
|
+
await AudioSessionIos.setActive(setting.active ?? true)
|
|
181
|
+
}
|
|
182
|
+
|
|
125
183
|
export class WhisperContext {
|
|
126
184
|
id: number
|
|
127
185
|
|
|
@@ -149,13 +207,16 @@ export class WhisperContext {
|
|
|
149
207
|
}
|
|
150
208
|
} else {
|
|
151
209
|
if (filePath.startsWith('http'))
|
|
152
|
-
throw new Error(
|
|
210
|
+
throw new Error(
|
|
211
|
+
'Transcribe remote file is not supported, please download it first',
|
|
212
|
+
)
|
|
153
213
|
path = filePath
|
|
154
214
|
}
|
|
155
215
|
if (path.startsWith('file://')) path = path.slice(7)
|
|
156
216
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
157
217
|
|
|
158
|
-
const { onProgress, ...rest } = options
|
|
218
|
+
const { onProgress, onNewSegments, ...rest } = options
|
|
219
|
+
|
|
159
220
|
let progressListener: any
|
|
160
221
|
let lastProgress: number = 0
|
|
161
222
|
if (onProgress) {
|
|
@@ -175,25 +236,50 @@ export class WhisperContext {
|
|
|
175
236
|
progressListener = null
|
|
176
237
|
}
|
|
177
238
|
}
|
|
239
|
+
|
|
240
|
+
let newSegmentsListener: any
|
|
241
|
+
if (onNewSegments) {
|
|
242
|
+
newSegmentsListener = EventEmitter.addListener(
|
|
243
|
+
EVENT_ON_TRANSCRIBE_NEW_SEGMENTS,
|
|
244
|
+
(evt: TranscribeNewSegmentsNativeEvent) => {
|
|
245
|
+
const { contextId, result } = evt
|
|
246
|
+
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
247
|
+
onNewSegments(result)
|
|
248
|
+
},
|
|
249
|
+
)
|
|
250
|
+
}
|
|
251
|
+
const removeNewSegmenetsListener = () => {
|
|
252
|
+
if (newSegmentsListener) {
|
|
253
|
+
newSegmentsListener.remove()
|
|
254
|
+
newSegmentsListener = null
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
178
258
|
return {
|
|
179
259
|
stop: async () => {
|
|
180
260
|
await RNWhisper.abortTranscribe(this.id, jobId)
|
|
181
261
|
removeProgressListener()
|
|
262
|
+
removeNewSegmenetsListener()
|
|
182
263
|
},
|
|
183
264
|
promise: RNWhisper.transcribeFile(this.id, jobId, path, {
|
|
184
265
|
...rest,
|
|
185
|
-
onProgress: !!onProgress
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
266
|
+
onProgress: !!onProgress,
|
|
267
|
+
onNewSegments: !!onNewSegments,
|
|
268
|
+
})
|
|
269
|
+
.then((result) => {
|
|
270
|
+
removeProgressListener()
|
|
271
|
+
removeNewSegmenetsListener()
|
|
272
|
+
if (!result.isAborted && lastProgress !== 100) {
|
|
273
|
+
// Handle the case that the last progress event is not triggered
|
|
274
|
+
onProgress?.(100)
|
|
275
|
+
}
|
|
276
|
+
return result
|
|
277
|
+
})
|
|
278
|
+
.catch((e) => {
|
|
279
|
+
removeProgressListener()
|
|
280
|
+
removeNewSegmenetsListener()
|
|
281
|
+
throw e
|
|
282
|
+
}),
|
|
197
283
|
}
|
|
198
284
|
}
|
|
199
285
|
|
|
@@ -204,8 +290,6 @@ export class WhisperContext {
|
|
|
204
290
|
/** Subscribe to realtime transcribe events */
|
|
205
291
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
|
|
206
292
|
}> {
|
|
207
|
-
const jobId: number = Math.floor(Math.random() * 10000)
|
|
208
|
-
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
209
293
|
let lastTranscribePayload: TranscribeRealtimeNativePayload
|
|
210
294
|
|
|
211
295
|
const slices: TranscribeRealtimeNativePayload[] = []
|
|
@@ -257,8 +341,40 @@ export class WhisperContext {
|
|
|
257
341
|
return { ...payload, ...mergedPayload, slices }
|
|
258
342
|
}
|
|
259
343
|
|
|
344
|
+
let prevAudioSession: AudioSessionSettingIos | undefined
|
|
345
|
+
if (Platform.OS === 'ios' && options?.audioSessionOnStartIos) {
|
|
346
|
+
// iOS: Remember current audio session state
|
|
347
|
+
if (options?.audioSessionOnStopIos === 'restore') {
|
|
348
|
+
const categoryResult = await AudioSessionIos.getCurrentCategory()
|
|
349
|
+
const mode = await AudioSessionIos.getCurrentMode()
|
|
350
|
+
|
|
351
|
+
prevAudioSession = {
|
|
352
|
+
...categoryResult,
|
|
353
|
+
mode,
|
|
354
|
+
active: false, // TODO: Need to check isOtherAudioPlaying to set active
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// iOS: Update audio session state
|
|
359
|
+
await updateAudioSession(options?.audioSessionOnStartIos)
|
|
360
|
+
}
|
|
361
|
+
if (Platform.OS === 'ios' && typeof options?.audioSessionOnStopIos === 'object') {
|
|
362
|
+
prevAudioSession = options?.audioSessionOnStopIos
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const jobId: number = Math.floor(Math.random() * 10000)
|
|
366
|
+
try {
|
|
367
|
+
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
368
|
+
} catch (e) {
|
|
369
|
+
if (prevAudioSession) await updateAudioSession(prevAudioSession)
|
|
370
|
+
throw e
|
|
371
|
+
}
|
|
372
|
+
|
|
260
373
|
return {
|
|
261
|
-
stop: () =>
|
|
374
|
+
stop: async () => {
|
|
375
|
+
await RNWhisper.abortTranscribe(this.id, jobId)
|
|
376
|
+
if (prevAudioSession) await updateAudioSession(prevAudioSession)
|
|
377
|
+
},
|
|
262
378
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
|
|
263
379
|
let transcribeListener: any = EventEmitter.addListener(
|
|
264
380
|
EVENT_ON_REALTIME_TRANSCRIBE,
|
|
@@ -318,7 +434,7 @@ export type ContextOptions = {
|
|
|
318
434
|
*/
|
|
319
435
|
coreMLModelAsset?: {
|
|
320
436
|
filename: string
|
|
321
|
-
assets: number[]
|
|
437
|
+
assets: string[] | number[]
|
|
322
438
|
}
|
|
323
439
|
/** Is the file path a bundle asset for pure string filePath */
|
|
324
440
|
isBundleAsset?: boolean
|
|
@@ -343,12 +459,19 @@ export async function initWhisper({
|
|
|
343
459
|
if (filename && assets) {
|
|
344
460
|
coreMLAssets = assets
|
|
345
461
|
?.map((asset) => {
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
462
|
+
if (typeof asset === 'number') {
|
|
463
|
+
const { uri } = Image.resolveAssetSource(asset)
|
|
464
|
+
const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
|
|
465
|
+
if (filepath) {
|
|
466
|
+
return {
|
|
467
|
+
uri,
|
|
468
|
+
filepath: `${filename}/${filepath}`,
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
} else if (typeof asset === 'string') {
|
|
349
472
|
return {
|
|
350
|
-
uri,
|
|
351
|
-
filepath: `${filename}/${
|
|
473
|
+
uri: asset,
|
|
474
|
+
filepath: `${filename}/${asset}`,
|
|
352
475
|
}
|
|
353
476
|
}
|
|
354
477
|
return undefined
|
|
@@ -367,7 +490,9 @@ export async function initWhisper({
|
|
|
367
490
|
}
|
|
368
491
|
} else {
|
|
369
492
|
if (!isBundleAsset && filePath.startsWith('http'))
|
|
370
|
-
throw new Error(
|
|
493
|
+
throw new Error(
|
|
494
|
+
'Transcribe remote file is not supported, please download it first',
|
|
495
|
+
)
|
|
371
496
|
path = filePath
|
|
372
497
|
}
|
|
373
498
|
if (path.startsWith('file://')) path = path.slice(7)
|
|
@@ -395,3 +520,5 @@ export const isUseCoreML: boolean = !!useCoreML
|
|
|
395
520
|
|
|
396
521
|
/** Is allow fallback to CPU if load CoreML model failed */
|
|
397
522
|
export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback
|
|
523
|
+
|
|
524
|
+
export { AudioSessionIos }
|