whisper.rn 0.3.0-rc.5 → 0.3.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -9
- package/android/src/main/java/com/rnwhisper/Downloader.java +83 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +9 -9
- package/android/src/main/jni/whisper/jni.cpp +102 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +59 -5
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +59 -5
- package/ios/RNWhisper.mm +32 -7
- package/ios/RNWhisperDownloader.h +8 -0
- package/ios/RNWhisperDownloader.m +39 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +61 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +62 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +11 -1
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +15 -4
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +13 -1
- package/src/index.ts +156 -64
package/src/index.ts
CHANGED
|
@@ -3,9 +3,14 @@ import {
|
|
|
3
3
|
DeviceEventEmitter,
|
|
4
4
|
Platform,
|
|
5
5
|
DeviceEventEmitterStatic,
|
|
6
|
+
Image,
|
|
6
7
|
} from 'react-native'
|
|
7
8
|
import RNWhisper from './NativeRNWhisper'
|
|
8
|
-
import type {
|
|
9
|
+
import type {
|
|
10
|
+
TranscribeOptions,
|
|
11
|
+
TranscribeResult,
|
|
12
|
+
CoreMLAsset,
|
|
13
|
+
} from './NativeRNWhisper'
|
|
9
14
|
import { version } from './version.json'
|
|
10
15
|
|
|
11
16
|
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
|
@@ -24,11 +29,11 @@ const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
|
|
|
24
29
|
|
|
25
30
|
export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
26
31
|
/**
|
|
27
|
-
* Realtime record max duration in seconds.
|
|
32
|
+
* Realtime record max duration in seconds.
|
|
28
33
|
* Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds,
|
|
29
34
|
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
30
35
|
*/
|
|
31
|
-
realtimeAudioSec?: number
|
|
36
|
+
realtimeAudioSec?: number
|
|
32
37
|
/**
|
|
33
38
|
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
|
|
34
39
|
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
|
|
@@ -38,42 +43,42 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
38
43
|
}
|
|
39
44
|
|
|
40
45
|
export type TranscribeRealtimeEvent = {
|
|
41
|
-
contextId: number
|
|
42
|
-
jobId: number
|
|
46
|
+
contextId: number
|
|
47
|
+
jobId: number
|
|
43
48
|
/** Is capturing audio, when false, the event is the final result */
|
|
44
|
-
isCapturing: boolean
|
|
45
|
-
isStoppedByAction?: boolean
|
|
46
|
-
code: number
|
|
47
|
-
data?: TranscribeResult
|
|
48
|
-
error?: string
|
|
49
|
-
processTime: number
|
|
50
|
-
recordingTime: number
|
|
49
|
+
isCapturing: boolean
|
|
50
|
+
isStoppedByAction?: boolean
|
|
51
|
+
code: number
|
|
52
|
+
data?: TranscribeResult
|
|
53
|
+
error?: string
|
|
54
|
+
processTime: number
|
|
55
|
+
recordingTime: number
|
|
51
56
|
slices?: Array<{
|
|
52
|
-
code: number
|
|
53
|
-
error?: string
|
|
54
|
-
data?: TranscribeResult
|
|
55
|
-
processTime: number
|
|
56
|
-
recordingTime: number
|
|
57
|
-
}
|
|
57
|
+
code: number
|
|
58
|
+
error?: string
|
|
59
|
+
data?: TranscribeResult
|
|
60
|
+
processTime: number
|
|
61
|
+
recordingTime: number
|
|
62
|
+
}>
|
|
58
63
|
}
|
|
59
64
|
|
|
60
65
|
export type TranscribeRealtimeNativePayload = {
|
|
61
66
|
/** Is capturing audio, when false, the event is the final result */
|
|
62
|
-
isCapturing: boolean
|
|
63
|
-
isStoppedByAction?: boolean
|
|
64
|
-
code: number
|
|
65
|
-
processTime: number
|
|
66
|
-
recordingTime: number
|
|
67
|
-
isUseSlices: boolean
|
|
68
|
-
sliceIndex: number
|
|
69
|
-
data?: TranscribeResult
|
|
70
|
-
error?: string
|
|
67
|
+
isCapturing: boolean
|
|
68
|
+
isStoppedByAction?: boolean
|
|
69
|
+
code: number
|
|
70
|
+
processTime: number
|
|
71
|
+
recordingTime: number
|
|
72
|
+
isUseSlices: boolean
|
|
73
|
+
sliceIndex: number
|
|
74
|
+
data?: TranscribeResult
|
|
75
|
+
error?: string
|
|
71
76
|
}
|
|
72
77
|
|
|
73
78
|
export type TranscribeRealtimeNativeEvent = {
|
|
74
|
-
contextId: number
|
|
75
|
-
jobId: number
|
|
76
|
-
payload: TranscribeRealtimeNativePayload
|
|
79
|
+
contextId: number
|
|
80
|
+
jobId: number
|
|
81
|
+
payload: TranscribeRealtimeNativePayload
|
|
77
82
|
}
|
|
78
83
|
|
|
79
84
|
export class WhisperContext {
|
|
@@ -84,12 +89,29 @@ export class WhisperContext {
|
|
|
84
89
|
}
|
|
85
90
|
|
|
86
91
|
/** Transcribe audio file */
|
|
87
|
-
transcribe(
|
|
92
|
+
transcribe(
|
|
93
|
+
filePath: string | number,
|
|
94
|
+
options: TranscribeOptions = {},
|
|
95
|
+
): {
|
|
88
96
|
/** Stop the transcribe */
|
|
89
|
-
stop: () => void
|
|
97
|
+
stop: () => void
|
|
90
98
|
/** Transcribe result promise */
|
|
91
|
-
promise: Promise<TranscribeResult
|
|
99
|
+
promise: Promise<TranscribeResult>
|
|
92
100
|
} {
|
|
101
|
+
let path = ''
|
|
102
|
+
if (typeof filePath === 'number') {
|
|
103
|
+
try {
|
|
104
|
+
const source = Image.resolveAssetSource(filePath)
|
|
105
|
+
if (source) path = source.uri
|
|
106
|
+
} catch (e) {
|
|
107
|
+
throw new Error(`Invalid asset: ${filePath}`)
|
|
108
|
+
}
|
|
109
|
+
} else {
|
|
110
|
+
if (filePath.startsWith('http'))
|
|
111
|
+
throw new Error('Transcribe remote file is not supported, please download it first')
|
|
112
|
+
path = filePath
|
|
113
|
+
}
|
|
114
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
|
93
115
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
94
116
|
return {
|
|
95
117
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
@@ -100,9 +122,9 @@ export class WhisperContext {
|
|
|
100
122
|
/** Transcribe the microphone audio stream, the microphone user permission is required */
|
|
101
123
|
async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
|
|
102
124
|
/** Stop the realtime transcribe */
|
|
103
|
-
stop: () => void
|
|
125
|
+
stop: () => void
|
|
104
126
|
/** Subscribe to realtime transcribe events */
|
|
105
|
-
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
|
|
127
|
+
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
|
|
106
128
|
}> {
|
|
107
129
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
108
130
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
@@ -118,37 +140,42 @@ export class WhisperContext {
|
|
|
118
140
|
const { segments = [] } = slices[sliceIndex]?.data || {}
|
|
119
141
|
tOffset = segments[segments.length - 1]?.t1 || 0
|
|
120
142
|
}
|
|
121
|
-
({ sliceIndex } = payload)
|
|
143
|
+
;({ sliceIndex } = payload)
|
|
122
144
|
slices[sliceIndex] = {
|
|
123
145
|
...payload,
|
|
124
|
-
data: payload.data
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
146
|
+
data: payload.data
|
|
147
|
+
? {
|
|
148
|
+
...payload.data,
|
|
149
|
+
segments:
|
|
150
|
+
payload.data.segments.map((segment) => ({
|
|
151
|
+
...segment,
|
|
152
|
+
t0: segment.t0 + tOffset,
|
|
153
|
+
t1: segment.t1 + tOffset,
|
|
154
|
+
})) || [],
|
|
155
|
+
}
|
|
156
|
+
: undefined,
|
|
132
157
|
}
|
|
133
158
|
}
|
|
134
159
|
|
|
135
|
-
const mergeSlicesIfNeeded = (
|
|
160
|
+
const mergeSlicesIfNeeded = (
|
|
161
|
+
payload: TranscribeRealtimeNativePayload,
|
|
162
|
+
): TranscribeRealtimeNativePayload => {
|
|
136
163
|
if (!payload.isUseSlices) return payload
|
|
137
164
|
|
|
138
165
|
const mergedPayload: any = {}
|
|
139
|
-
slices.forEach(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
}
|
|
148
|
-
mergedPayload.processTime = slice.processTime
|
|
149
|
-
mergedPayload.recordingTime = (mergedPayload?.recordingTime || 0) + slice.recordingTime
|
|
166
|
+
slices.forEach((slice) => {
|
|
167
|
+
mergedPayload.data = {
|
|
168
|
+
result:
|
|
169
|
+
(mergedPayload.data?.result || '') + (slice.data?.result || ''),
|
|
170
|
+
segments: [
|
|
171
|
+
...(mergedPayload?.data?.segments || []),
|
|
172
|
+
...(slice.data?.segments || []),
|
|
173
|
+
],
|
|
150
174
|
}
|
|
151
|
-
|
|
175
|
+
mergedPayload.processTime = slice.processTime
|
|
176
|
+
mergedPayload.recordingTime =
|
|
177
|
+
(mergedPayload?.recordingTime || 0) + slice.recordingTime
|
|
178
|
+
})
|
|
152
179
|
return { ...payload, ...mergedPayload, slices }
|
|
153
180
|
}
|
|
154
181
|
|
|
@@ -167,7 +194,7 @@ export class WhisperContext {
|
|
|
167
194
|
jobId: evt.jobId,
|
|
168
195
|
...mergeSlicesIfNeeded(payload),
|
|
169
196
|
})
|
|
170
|
-
}
|
|
197
|
+
},
|
|
171
198
|
)
|
|
172
199
|
let endListener: any = EventEmitter.addListener(
|
|
173
200
|
EVENT_ON_REALTIME_TRANSCRIBE_END,
|
|
@@ -183,7 +210,7 @@ export class WhisperContext {
|
|
|
183
210
|
contextId,
|
|
184
211
|
jobId: evt.jobId,
|
|
185
212
|
...mergeSlicesIfNeeded(lastPayload),
|
|
186
|
-
isCapturing: false
|
|
213
|
+
isCapturing: false,
|
|
187
214
|
})
|
|
188
215
|
if (transcribeListener) {
|
|
189
216
|
transcribeListener.remove()
|
|
@@ -193,7 +220,7 @@ export class WhisperContext {
|
|
|
193
220
|
endListener.remove()
|
|
194
221
|
endListener = null
|
|
195
222
|
}
|
|
196
|
-
}
|
|
223
|
+
},
|
|
197
224
|
)
|
|
198
225
|
},
|
|
199
226
|
}
|
|
@@ -204,10 +231,75 @@ export class WhisperContext {
|
|
|
204
231
|
}
|
|
205
232
|
}
|
|
206
233
|
|
|
207
|
-
export
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
234
|
+
export type ContextOptions = {
|
|
235
|
+
filePath: string | number
|
|
236
|
+
/**
|
|
237
|
+
* CoreML model assets, if you're using `require` on filePath,
|
|
238
|
+
* use this option is required if you want to enable Core ML,
|
|
239
|
+
* you will need bundle weights/weight.bin, model.mil, coremldata.bin into app by `require`
|
|
240
|
+
*/
|
|
241
|
+
coreMLModelAsset?: {
|
|
242
|
+
filename: string
|
|
243
|
+
assets: number[]
|
|
244
|
+
}
|
|
245
|
+
/** Is the file path a bundle asset for pure string filePath */
|
|
246
|
+
isBundleAsset?: boolean
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const coreMLModelAssetPaths = [
|
|
250
|
+
'analytics/coremldata.bin',
|
|
251
|
+
'weights/weight.bin',
|
|
252
|
+
'model.mil',
|
|
253
|
+
'coremldata.bin',
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
export async function initWhisper({
|
|
257
|
+
filePath,
|
|
258
|
+
coreMLModelAsset,
|
|
259
|
+
isBundleAsset,
|
|
260
|
+
}: ContextOptions): Promise<WhisperContext> {
|
|
261
|
+
let path = ''
|
|
262
|
+
let coreMLAssets: CoreMLAsset[] | undefined
|
|
263
|
+
if (coreMLModelAsset) {
|
|
264
|
+
const { filename, assets } = coreMLModelAsset
|
|
265
|
+
if (filename && assets) {
|
|
266
|
+
coreMLAssets = assets
|
|
267
|
+
?.map((asset) => {
|
|
268
|
+
const { uri } = Image.resolveAssetSource(asset)
|
|
269
|
+
const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
|
|
270
|
+
if (filepath) {
|
|
271
|
+
return {
|
|
272
|
+
uri,
|
|
273
|
+
filepath: `${filename}/${filepath}`,
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return undefined
|
|
277
|
+
})
|
|
278
|
+
.filter((asset): asset is CoreMLAsset => asset !== undefined)
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
if (typeof filePath === 'number') {
|
|
282
|
+
try {
|
|
283
|
+
const source = Image.resolveAssetSource(filePath)
|
|
284
|
+
if (source) {
|
|
285
|
+
path = source.uri
|
|
286
|
+
}
|
|
287
|
+
} catch (e) {
|
|
288
|
+
throw new Error(`Invalid asset: ${filePath}`)
|
|
289
|
+
}
|
|
290
|
+
} else {
|
|
291
|
+
if (!isBundleAsset && filePath.startsWith('http'))
|
|
292
|
+
throw new Error('Transcribe remote file is not supported, please download it first')
|
|
293
|
+
path = filePath
|
|
294
|
+
}
|
|
295
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
|
296
|
+
const id = await RNWhisper.initContext({
|
|
297
|
+
filePath: path,
|
|
298
|
+
isBundleAsset: !!isBundleAsset,
|
|
299
|
+
// Only development mode need download Core ML model assets (from packager server)
|
|
300
|
+
downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
|
|
301
|
+
coreMLAssets,
|
|
302
|
+
})
|
|
211
303
|
return new WhisperContext(id)
|
|
212
304
|
}
|
|
213
305
|
|