whisper.rn 0.5.0-rc.1 → 0.5.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +119 -50
  2. package/lib/commonjs/AudioSessionIos.js +2 -1
  3. package/lib/commonjs/AudioSessionIos.js.map +1 -1
  4. package/lib/commonjs/index.js +1 -0
  5. package/lib/commonjs/index.js.map +1 -1
  6. package/lib/commonjs/jest-mock.js +126 -0
  7. package/lib/commonjs/jest-mock.js.map +1 -0
  8. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +831 -0
  9. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  10. package/lib/commonjs/realtime-transcription/SliceManager.js +233 -0
  11. package/lib/commonjs/realtime-transcription/SliceManager.js.map +1 -0
  12. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js +133 -0
  13. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  14. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js +201 -0
  15. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  16. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +309 -0
  17. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  18. package/lib/commonjs/realtime-transcription/index.js +27 -0
  19. package/lib/commonjs/realtime-transcription/index.js.map +1 -0
  20. package/lib/commonjs/realtime-transcription/types.js +114 -0
  21. package/lib/commonjs/realtime-transcription/types.js.map +1 -0
  22. package/lib/commonjs/utils/WavFileReader.js +158 -0
  23. package/lib/commonjs/utils/WavFileReader.js.map +1 -0
  24. package/lib/commonjs/utils/WavFileWriter.js +181 -0
  25. package/lib/commonjs/utils/WavFileWriter.js.map +1 -0
  26. package/lib/commonjs/utils/common.js +25 -0
  27. package/lib/commonjs/utils/common.js.map +1 -0
  28. package/lib/module/AudioSessionIos.js +2 -1
  29. package/lib/module/AudioSessionIos.js.map +1 -1
  30. package/lib/module/index.js +1 -0
  31. package/lib/module/index.js.map +1 -1
  32. package/lib/module/jest-mock.js +124 -0
  33. package/lib/module/jest-mock.js.map +1 -0
  34. package/lib/module/realtime-transcription/RealtimeTranscriber.js +825 -0
  35. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  36. package/lib/module/realtime-transcription/SliceManager.js +226 -0
  37. package/lib/module/realtime-transcription/SliceManager.js.map +1 -0
  38. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js +124 -0
  39. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  40. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js +194 -0
  41. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  42. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +302 -0
  43. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  44. package/lib/module/realtime-transcription/index.js +8 -0
  45. package/lib/module/realtime-transcription/index.js.map +1 -0
  46. package/lib/module/realtime-transcription/types.js +107 -0
  47. package/lib/module/realtime-transcription/types.js.map +1 -0
  48. package/lib/module/utils/WavFileReader.js +151 -0
  49. package/lib/module/utils/WavFileReader.js.map +1 -0
  50. package/lib/module/utils/WavFileWriter.js +174 -0
  51. package/lib/module/utils/WavFileWriter.js.map +1 -0
  52. package/lib/module/utils/common.js +18 -0
  53. package/lib/module/utils/common.js.map +1 -0
  54. package/lib/typescript/AudioSessionIos.d.ts +1 -1
  55. package/lib/typescript/AudioSessionIos.d.ts.map +1 -1
  56. package/lib/typescript/index.d.ts.map +1 -1
  57. package/lib/typescript/jest-mock.d.ts +2 -0
  58. package/lib/typescript/jest-mock.d.ts.map +1 -0
  59. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +165 -0
  60. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -0
  61. package/lib/typescript/realtime-transcription/SliceManager.d.ts +72 -0
  62. package/lib/typescript/realtime-transcription/SliceManager.d.ts.map +1 -0
  63. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts +22 -0
  64. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts.map +1 -0
  65. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts +44 -0
  66. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts.map +1 -0
  67. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts +75 -0
  68. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts.map +1 -0
  69. package/lib/typescript/realtime-transcription/index.d.ts +6 -0
  70. package/lib/typescript/realtime-transcription/index.d.ts.map +1 -0
  71. package/lib/typescript/realtime-transcription/types.d.ts +216 -0
  72. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -0
  73. package/lib/typescript/utils/WavFileReader.d.ts +61 -0
  74. package/lib/typescript/utils/WavFileReader.d.ts.map +1 -0
  75. package/lib/typescript/utils/WavFileWriter.d.ts +57 -0
  76. package/lib/typescript/utils/WavFileWriter.d.ts.map +1 -0
  77. package/lib/typescript/utils/common.d.ts +9 -0
  78. package/lib/typescript/utils/common.d.ts.map +1 -0
  79. package/package.json +18 -6
  80. package/src/AudioSessionIos.ts +3 -2
  81. package/src/index.ts +4 -0
  82. package/{jest/mock.js → src/jest-mock.ts} +2 -2
  83. package/src/realtime-transcription/RealtimeTranscriber.ts +983 -0
  84. package/src/realtime-transcription/SliceManager.ts +252 -0
  85. package/src/realtime-transcription/adapters/AudioPcmStreamAdapter.ts +143 -0
  86. package/src/realtime-transcription/adapters/JestAudioStreamAdapter.ts +251 -0
  87. package/src/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.ts +378 -0
  88. package/src/realtime-transcription/index.ts +34 -0
  89. package/src/realtime-transcription/types.ts +277 -0
  90. package/src/utils/WavFileReader.ts +202 -0
  91. package/src/utils/WavFileWriter.ts +206 -0
  92. package/src/utils/common.ts +17 -0
@@ -0,0 +1,202 @@
1
+ import { base64ToUint8Array } from './common'
2
+
3
+ export interface WavFileReaderFs {
4
+ readFile: (filePath: string, encoding: string) => Promise<string>
5
+ exists: (filePath: string) => Promise<boolean>
6
+ unlink: (filePath: string) => Promise<void>
7
+ }
8
+
9
+ export interface WavFileHeader {
10
+ sampleRate: number
11
+ channels: number
12
+ bitsPerSample: number
13
+ dataSize: number
14
+ duration: number
15
+ }
16
+
17
+ export class WavFileReader {
18
+ private filePath: string
19
+
20
+ private header: WavFileHeader | null = null
21
+
22
+ private audioData: Uint8Array | null = null
23
+
24
+ private fs: {
25
+ exists: (filePath: string) => Promise<boolean>
26
+ readFile: (filePath: string, encoding: string) => Promise<string>
27
+ }
28
+
29
+ constructor(fs: {
30
+ exists: (filePath: string) => Promise<boolean>
31
+ readFile: (filePath: string, encoding: string) => Promise<string>
32
+ }, filePath: string) {
33
+ this.fs = fs
34
+ this.filePath = filePath
35
+ }
36
+
37
+ /**
38
+ * Read and parse the WAV file
39
+ */
40
+ async initialize(): Promise<void> {
41
+ try {
42
+ // Check if file exists
43
+ const exists = await this.fs.exists(this.filePath)
44
+ if (!exists) {
45
+ throw new Error(`WAV file not found: ${this.filePath}`)
46
+ }
47
+
48
+ // Read the entire file
49
+ const fileContent = await this.fs.readFile(this.filePath, 'base64')
50
+ const fileData = base64ToUint8Array(fileContent)
51
+
52
+ // Parse WAV header
53
+ this.header = WavFileReader.parseWavHeader(fileData)
54
+
55
+ // Extract audio data (skip the 44-byte header)
56
+ this.audioData = fileData.slice(44, 44 + this.header.dataSize)
57
+
58
+ console.log(
59
+ `WAV file loaded: ${this.header.duration.toFixed(2)}s, ${
60
+ this.header.sampleRate
61
+ }Hz, ${this.header.channels}ch`,
62
+ )
63
+ } catch (error) {
64
+ throw new Error(`Failed to initialize WAV file reader: ${error}`)
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Parse WAV file header
70
+ */
71
+ private static parseWavHeader(data: Uint8Array): WavFileHeader {
72
+ const view = new DataView(data.buffer, data.byteOffset, data.byteLength)
73
+
74
+ // Verify RIFF header
75
+ const riffHeader = String.fromCharCode(...data.slice(0, 4))
76
+ if (riffHeader !== 'RIFF') {
77
+ throw new Error('Invalid WAV file: Missing RIFF header')
78
+ }
79
+
80
+ // Verify WAVE format
81
+ const waveHeader = String.fromCharCode(...data.slice(8, 12))
82
+ if (waveHeader !== 'WAVE') {
83
+ throw new Error('Invalid WAV file: Missing WAVE header')
84
+ }
85
+
86
+ // Read format chunk
87
+ const fmtHeader = String.fromCharCode(...data.slice(12, 16))
88
+ if (fmtHeader !== 'fmt ') {
89
+ throw new Error('Invalid WAV file: Missing fmt chunk')
90
+ }
91
+
92
+ const audioFormat = view.getUint16(20, true)
93
+ if (audioFormat !== 1) {
94
+ throw new Error('Unsupported WAV format: Only PCM is supported')
95
+ }
96
+
97
+ const channels = view.getUint16(22, true)
98
+ const sampleRate = view.getUint32(24, true)
99
+ const bitsPerSample = view.getUint16(34, true)
100
+
101
+ // Find data chunk
102
+ let dataOffset = 36
103
+ while (dataOffset < data.length - 8) {
104
+ const chunkId = String.fromCharCode(
105
+ ...data.slice(dataOffset, dataOffset + 4),
106
+ )
107
+ const chunkSize = view.getUint32(dataOffset + 4, true)
108
+
109
+ if (chunkId === 'data') {
110
+ const dataSize = chunkSize
111
+ const duration =
112
+ dataSize / (sampleRate * channels * (bitsPerSample / 8))
113
+
114
+ return {
115
+ sampleRate,
116
+ channels,
117
+ bitsPerSample,
118
+ dataSize,
119
+ duration,
120
+ }
121
+ }
122
+
123
+ dataOffset += 8 + chunkSize
124
+ }
125
+
126
+ throw new Error('Invalid WAV file: Missing data chunk')
127
+ }
128
+
129
+ /**
130
+ * Get audio data slice
131
+ */
132
+ getAudioSlice(startByte: number, lengthBytes: number): Uint8Array | null {
133
+ if (!this.audioData) {
134
+ return null
135
+ }
136
+
137
+ const start = Math.max(0, startByte)
138
+ const end = Math.min(this.audioData.length, startByte + lengthBytes)
139
+
140
+ if (start >= end) {
141
+ return null
142
+ }
143
+
144
+ return this.audioData.slice(start, end)
145
+ }
146
+
147
+ getAudioData(): Uint8Array | null {
148
+ return this.audioData
149
+ }
150
+
151
+ /**
152
+ * Get WAV file header information
153
+ */
154
+ getHeader(): WavFileHeader | null {
155
+ return this.header
156
+ }
157
+
158
+ /**
159
+ * Get total audio data size
160
+ */
161
+ getTotalDataSize(): number {
162
+ return this.header?.dataSize || 0
163
+ }
164
+
165
+ /**
166
+ * Convert byte position to time in seconds
167
+ */
168
+ byteToTime(bytePosition: number): number {
169
+ if (!this.header) return 0
170
+
171
+ const bytesPerSecond =
172
+ this.header.sampleRate *
173
+ this.header.channels *
174
+ (this.header.bitsPerSample / 8)
175
+ return bytePosition / bytesPerSecond
176
+ }
177
+
178
+ /**
179
+ * Convert time in seconds to byte position
180
+ */
181
+ timeToByte(timeSeconds: number): number {
182
+ if (!this.header) return 0
183
+
184
+ const bytesPerSecond =
185
+ this.header.sampleRate *
186
+ this.header.channels *
187
+ (this.header.bitsPerSample / 8)
188
+ return Math.floor(timeSeconds * bytesPerSecond)
189
+ }
190
+
191
+ /**
192
+ * Get file statistics
193
+ */
194
+ getStatistics() {
195
+ return {
196
+ filePath: this.filePath,
197
+ header: this.header,
198
+ totalDataSize: this.getTotalDataSize(),
199
+ isInitialized: !!this.header,
200
+ }
201
+ }
202
+ }
@@ -0,0 +1,206 @@
1
+ import { base64ToUint8Array, uint8ArrayToBase64 } from './common'
2
+
3
+ export interface WavFileConfig {
4
+ sampleRate: number
5
+ channels: number
6
+ bitsPerSample: number
7
+ }
8
+
9
+ export interface WavFileWriterFs {
10
+ writeFile: (filePath: string, data: string, encoding: string) => Promise<void>
11
+ appendFile: (filePath: string, data: string, encoding: string) => Promise<void>
12
+ readFile: (filePath: string, encoding: string) => Promise<string>
13
+ exists: (filePath: string) => Promise<boolean>
14
+ unlink: (filePath: string) => Promise<void>
15
+ }
16
+
17
+ export class WavFileWriter {
18
+ private fs: WavFileWriterFs
19
+
20
+ private filePath: string
21
+
22
+ private config: WavFileConfig
23
+
24
+ private dataSize = 0
25
+
26
+ private isWriting = false
27
+
28
+ private writeQueue: Uint8Array[] = []
29
+
30
+ constructor(fs: WavFileWriterFs, filePath: string, config: WavFileConfig) {
31
+ this.fs = fs
32
+ this.filePath = filePath
33
+ this.config = config
34
+ }
35
+
36
+ /**
37
+ * Initialize the WAV file with headers
38
+ */
39
+ async initialize(): Promise<void> {
40
+ if (this.isWriting) {
41
+ return
42
+ }
43
+
44
+ try {
45
+ // Create the initial WAV header (we'll update the size later)
46
+ const header = this.createWavHeader(0)
47
+ await this.fs.writeFile(this.filePath, uint8ArrayToBase64(header), 'base64')
48
+
49
+ this.dataSize = 0
50
+ this.isWriting = true
51
+ this.writeQueue = []
52
+ } catch (error) {
53
+ throw new Error(`Failed to initialize WAV file: ${error}`)
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Append PCM audio data to the WAV file
59
+ */
60
+ async appendAudioData(audioData: Uint8Array): Promise<void> {
61
+ if (!this.isWriting) {
62
+ throw new Error('WAV file not initialized')
63
+ }
64
+
65
+ try {
66
+ // Queue the data for writing
67
+ this.writeQueue.push(audioData)
68
+
69
+ // Process the write queue
70
+ await this.processWriteQueue()
71
+ } catch (error) {
72
+ console.warn(`Failed to append audio data to WAV file: ${error}`)
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Process the write queue to avoid blocking
78
+ */
79
+ private async processWriteQueue(): Promise<void> {
80
+ if (this.writeQueue.length === 0) {
81
+ return
82
+ }
83
+
84
+ try {
85
+ // Combine all queued data
86
+ const totalLength = this.writeQueue.reduce((sum, data) => sum + data.length, 0)
87
+ const combinedData = new Uint8Array(totalLength)
88
+
89
+ let offset = 0
90
+ this.writeQueue.forEach(data => {
91
+ combinedData.set(new Uint8Array(data), offset)
92
+ offset += data.length
93
+ })
94
+
95
+ // Append to file
96
+ const base64Data = uint8ArrayToBase64(combinedData)
97
+ await this.fs.appendFile(this.filePath, base64Data, 'base64')
98
+
99
+ // Update data size
100
+ this.dataSize += combinedData.length
101
+
102
+ // Clear the queue
103
+ this.writeQueue = []
104
+ } catch (error) {
105
+ console.warn(`Failed to process WAV write queue: ${error}`)
106
+ // Don't throw here to avoid breaking the recording
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Finalize the WAV file by updating the header with correct sizes
112
+ */
113
+ async finalize(): Promise<void> {
114
+ if (!this.isWriting) {
115
+ return
116
+ }
117
+
118
+ try {
119
+ // Process any remaining queued data
120
+ await this.processWriteQueue()
121
+
122
+ // Read the current file
123
+ const currentData = await this.fs.readFile(this.filePath, 'base64')
124
+ const currentBytes = base64ToUint8Array(currentData)
125
+
126
+ // Create the correct header with final data size
127
+ const correctHeader = this.createWavHeader(this.dataSize)
128
+
129
+ // Replace the header (first 44 bytes)
130
+ const finalData = new Uint8Array(correctHeader.length + this.dataSize)
131
+ finalData.set(correctHeader, 0)
132
+ finalData.set(currentBytes.slice(44), 44) // Skip old header
133
+
134
+ // Write the final file
135
+ const finalBase64 = uint8ArrayToBase64(finalData)
136
+ await this.fs.writeFile(this.filePath, finalBase64, 'base64')
137
+
138
+ this.isWriting = false
139
+ } catch (error) {
140
+ console.warn(`Failed to finalize WAV file: ${error}`)
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Create WAV file header
146
+ */
147
+ private createWavHeader(dataSize: number): Uint8Array {
148
+ const header = new ArrayBuffer(44)
149
+ const view = new DataView(header)
150
+
151
+ // RIFF header
152
+ view.setUint32(0, 0x52494646, false) // "RIFF"
153
+ view.setUint32(4, 36 + dataSize, true) // File size - 8
154
+ view.setUint32(8, 0x57415645, false) // "WAVE"
155
+
156
+ // Format chunk
157
+ view.setUint32(12, 0x666d7420, false) // "fmt "
158
+ view.setUint32(16, 16, true) // Chunk size
159
+ view.setUint16(20, 1, true) // Audio format (PCM)
160
+ view.setUint16(22, this.config.channels, true) // Number of channels
161
+ view.setUint32(24, this.config.sampleRate, true) // Sample rate
162
+ view.setUint32(28, this.config.sampleRate * this.config.channels * (this.config.bitsPerSample / 8), true) // Byte rate
163
+ view.setUint16(32, this.config.channels * (this.config.bitsPerSample / 8), true) // Block align
164
+ view.setUint16(34, this.config.bitsPerSample, true) // Bits per sample
165
+
166
+ // Data chunk
167
+ view.setUint32(36, 0x64617461, false) // "data"
168
+ view.setUint32(40, dataSize, true) // Data size
169
+
170
+ return new Uint8Array(header)
171
+ }
172
+
173
+ /**
174
+ * Cancel writing and cleanup
175
+ */
176
+ async cancel(): Promise<void> {
177
+ this.isWriting = false
178
+ this.writeQueue = []
179
+
180
+ try {
181
+ // Delete the incomplete file
182
+ const exists = await this.fs.exists(this.filePath)
183
+ if (exists) {
184
+ await this.fs.unlink(this.filePath)
185
+ }
186
+ } catch (error) {
187
+ console.warn(`Failed to cleanup WAV file: ${error}`)
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Get current file statistics
193
+ */
194
+ getStatistics() {
195
+ const durationSec = this.dataSize / (this.config.sampleRate * this.config.channels * (this.config.bitsPerSample / 8))
196
+
197
+ return {
198
+ filePath: this.filePath,
199
+ dataSize: this.dataSize,
200
+ durationSec,
201
+ isWriting: this.isWriting,
202
+ queuedChunks: this.writeQueue.length,
203
+ estimatedFileSizeMB: (44 + this.dataSize) / (1024 * 1024),
204
+ }
205
+ }
206
+ }
@@ -0,0 +1,17 @@
1
+ const Buffer: any = global.Buffer || require('safe-buffer').Buffer
2
+
3
+ /**
4
+ * Convert base64 string to Uint8Array
5
+ */
6
+ export function base64ToUint8Array(base64: string): Uint8Array {
7
+ const buffer = Buffer.from(base64, 'base64')
8
+ return new Uint8Array(buffer)
9
+ }
10
+
11
+ /**
12
+ * Convert Uint8Array to base64 string
13
+ */
14
+ export function uint8ArrayToBase64(buffer: Uint8Array): string {
15
+ const buf = Buffer.from(buffer)
16
+ return buf.toString('base64')
17
+ }