@qvac/transcription-whispercpp 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/index.d.ts +38 -29
- package/index.js +219 -98
- package/lib/error.js +16 -1
- package/package.json +2 -2
- package/prebuilds/android-arm64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/darwin-arm64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/darwin-arm64/qvac__transcription-whispercpp.bare.exports +1 -1
- package/prebuilds/darwin-x64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/darwin-x64/qvac__transcription-whispercpp.bare.exports +1 -1
- package/prebuilds/ios-arm64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/ios-arm64/qvac__transcription-whispercpp.bare.exports +1 -1
- package/prebuilds/ios-arm64-simulator/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/ios-arm64-simulator/qvac__transcription-whispercpp.bare.exports +1 -1
- package/prebuilds/ios-x64-simulator/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/ios-x64-simulator/qvac__transcription-whispercpp.bare.exports +1 -1
- package/prebuilds/linux-arm64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/linux-x64/qvac__transcription-whispercpp.bare +0 -0
- package/prebuilds/win32-x64/qvac__transcription-whispercpp.bare +0 -0
package/README.md
CHANGED
|
@@ -237,7 +237,7 @@ The library provides a straightforward workflow for audio transcription:
|
|
|
237
237
|
|
|
238
238
|
Data loaders abstract the way model files are accessed, whether from the filesystem, a network drive, or any other storage mechanism. More info about model registry and model builds in [resources](#resources).
|
|
239
239
|
|
|
240
|
-
- [Filesystem Data Loader](https://github.com/tetherto/qvac/tree/main/packages/
|
|
240
|
+
- [Filesystem Data Loader](https://github.com/tetherto/qvac/tree/main/packages/dl-filesystem)
|
|
241
241
|
|
|
242
242
|
First, select and instantiate a data loader that provides access to model files:
|
|
243
243
|
|
|
@@ -479,7 +479,7 @@ try {
|
|
|
479
479
|
|
|
480
480
|
### 1. Clone the repo & Install the dependencies
|
|
481
481
|
```bash
|
|
482
|
-
git clone
|
|
482
|
+
git clone https://github.com/tetherto/qvac-lib-infer-whispercpp.git
|
|
483
483
|
cd qvac-lib-infer-whispercpp
|
|
484
484
|
npm install
|
|
485
485
|
```
|
package/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import QvacResponse from "@qvac/infer-base/src/QvacResponse";
|
|
2
2
|
import type { LoggerInterface } from "@qvac/logging";
|
|
3
3
|
import { Readable } from "stream";
|
|
4
4
|
|
|
@@ -19,25 +19,19 @@ declare interface WhisperConfig {
|
|
|
19
19
|
[key: string]: unknown;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
declare interface TranscriptionWhispercppFiles {
|
|
23
|
+
model: string;
|
|
24
|
+
vadModel?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
22
27
|
declare interface TranscriptionWhispercppArgs {
|
|
23
|
-
|
|
28
|
+
files: TranscriptionWhispercppFiles;
|
|
24
29
|
logger?: LoggerInterface;
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
diskPath?: string;
|
|
30
|
+
exclusiveRun?: boolean;
|
|
31
|
+
opts?: { stats?: boolean };
|
|
28
32
|
[args: string]: unknown;
|
|
29
33
|
}
|
|
30
34
|
|
|
31
|
-
declare interface ProgressData {
|
|
32
|
-
action: string;
|
|
33
|
-
totalSize: number;
|
|
34
|
-
totalFiles: number;
|
|
35
|
-
filesProcessed: number;
|
|
36
|
-
currentFile: string;
|
|
37
|
-
currentFileProgress: string;
|
|
38
|
-
overallProgress: string;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
35
|
declare interface TranscriptionWhispercppConfig {
|
|
42
36
|
path?: string;
|
|
43
37
|
enableStats?: boolean;
|
|
@@ -46,7 +40,11 @@ declare interface TranscriptionWhispercppConfig {
|
|
|
46
40
|
[args: string]: unknown;
|
|
47
41
|
}
|
|
48
42
|
|
|
49
|
-
declare
|
|
43
|
+
declare interface InferenceClientState {
|
|
44
|
+
configLoaded: boolean;
|
|
45
|
+
weightsLoaded: boolean;
|
|
46
|
+
destroyed: boolean;
|
|
47
|
+
}
|
|
50
48
|
|
|
51
49
|
/**
|
|
52
50
|
* A single transcription segment emitted by the Whisper addon in an output update.
|
|
@@ -59,7 +57,7 @@ declare interface WhisperTranscriptionSegment {
|
|
|
59
57
|
/**
|
|
60
58
|
* GGML client implementation for the Whisper transcription model
|
|
61
59
|
*/
|
|
62
|
-
declare class TranscriptionWhispercpp
|
|
60
|
+
declare class TranscriptionWhispercpp {
|
|
63
61
|
/**
|
|
64
62
|
* Creates an instance of WhisperClient.
|
|
65
63
|
* @constructor
|
|
@@ -71,16 +69,23 @@ declare class TranscriptionWhispercpp extends BaseInference {
|
|
|
71
69
|
config: TranscriptionWhispercppConfig
|
|
72
70
|
);
|
|
73
71
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
72
|
+
getState(): InferenceClientState;
|
|
73
|
+
|
|
74
|
+
load(...args: unknown[]): Promise<void>;
|
|
75
|
+
|
|
76
|
+
unload(): Promise<void>;
|
|
77
|
+
|
|
78
|
+
destroy(): Promise<void>;
|
|
79
|
+
|
|
80
|
+
pause(): Promise<void>;
|
|
81
|
+
|
|
82
|
+
unpause(): Promise<void>;
|
|
83
|
+
|
|
84
|
+
stop(): Promise<void>;
|
|
85
|
+
|
|
86
|
+
status(): Promise<string>;
|
|
87
|
+
|
|
88
|
+
cancel(): Promise<void>;
|
|
84
89
|
|
|
85
90
|
/**
|
|
86
91
|
* Reload the model with new configuration parameters.
|
|
@@ -100,6 +105,10 @@ declare class TranscriptionWhispercpp extends BaseInference {
|
|
|
100
105
|
run(
|
|
101
106
|
audioStream: Readable
|
|
102
107
|
): Promise<QvacResponse<TranscriptionWhispercpp.WhisperRunOutput>>;
|
|
108
|
+
|
|
109
|
+
runStreaming(
|
|
110
|
+
audioStream: Readable
|
|
111
|
+
): Promise<QvacResponse<TranscriptionWhispercpp.WhisperRunOutput>>;
|
|
103
112
|
}
|
|
104
113
|
|
|
105
114
|
declare namespace TranscriptionWhispercpp {
|
|
@@ -137,10 +146,10 @@ declare namespace TranscriptionWhispercpp {
|
|
|
137
146
|
VadParams,
|
|
138
147
|
WhisperConfig,
|
|
139
148
|
TranscriptionWhispercppArgs,
|
|
149
|
+
TranscriptionWhispercppFiles,
|
|
140
150
|
TranscriptionWhispercppConfig,
|
|
141
|
-
ProgressData,
|
|
142
|
-
ReportProgressCallback,
|
|
143
151
|
WhisperTranscriptionSegment,
|
|
152
|
+
InferenceClientState,
|
|
144
153
|
};
|
|
145
154
|
}
|
|
146
155
|
|
package/index.js
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
|
-
const path = require('bare-path')
|
|
4
3
|
const fs = require('bare-fs')
|
|
5
|
-
const
|
|
6
|
-
const
|
|
4
|
+
const QvacLogger = require('@qvac/logging')
|
|
5
|
+
const { createJobHandler, exclusiveRunQueue } = require('@qvac/infer-base')
|
|
7
6
|
|
|
8
7
|
const { WhisperInterface } = require('./whisper')
|
|
9
8
|
const { checkConfig } = require('./configChecker')
|
|
@@ -14,47 +13,134 @@ const END_OF_INPUT = 'end of job'
|
|
|
14
13
|
/**
|
|
15
14
|
* GGML client implementation for the Whisper transcription model
|
|
16
15
|
*/
|
|
17
|
-
class TranscriptionWhispercpp
|
|
16
|
+
class TranscriptionWhispercpp {
|
|
18
17
|
/**
|
|
19
18
|
* Creates an instance of WhisperClient.
|
|
20
19
|
* @constructor
|
|
21
|
-
* @param {Object} args - arguments for inference setup
|
|
20
|
+
* @param {Object} args - arguments for inference setup (`files`, `logger`, `exclusiveRun`, `opts`, …)
|
|
21
|
+
* @param {Object} args.files - local model file paths
|
|
22
|
+
* @param {string} args.files.model - path to the Whisper GGML model file
|
|
23
|
+
* @param {string} [args.files.vadModel] - optional path to the Silero VAD model
|
|
22
24
|
* @param {Object} config - environment-specific inference setup configuration
|
|
23
25
|
*/
|
|
24
26
|
constructor (
|
|
25
|
-
{
|
|
27
|
+
{ files, logger = null, exclusiveRun = true, ...args },
|
|
26
28
|
config
|
|
27
29
|
) {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
if (!files || typeof files.model !== 'string' || files.model.length === 0) {
|
|
31
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.MODEL_REQUIRED, adds: 'files.model is required' })
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const { opts = {}, ...passThrough } = { logger, exclusiveRun, ...args }
|
|
35
|
+
this.opts = opts
|
|
36
|
+
this.logger = new QvacLogger(passThrough.logger)
|
|
37
|
+
this.exclusiveRun = !!passThrough.exclusiveRun
|
|
38
|
+
this._withExclusiveRun = exclusiveRunQueue()
|
|
39
|
+
this.state = {
|
|
40
|
+
configLoaded: false,
|
|
41
|
+
weightsLoaded: false,
|
|
42
|
+
destroyed: false
|
|
43
|
+
}
|
|
30
44
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
45
|
+
const vadModel =
|
|
46
|
+
typeof files.vadModel === 'string' && files.vadModel.length > 0
|
|
47
|
+
? files.vadModel
|
|
48
|
+
: null
|
|
49
|
+
|
|
50
|
+
this._files = { model: files.model, vadModel }
|
|
34
51
|
this._config = config
|
|
35
|
-
this.weightsProvider = new WeightsProvider(loader, this.logger)
|
|
36
52
|
|
|
37
53
|
this.params = config.whisperConfig
|
|
38
|
-
|
|
54
|
+
/** Serializes inference runs; separate from `_withExclusiveRun` queue (reload / destroy / unload). */
|
|
55
|
+
this._inferenceQueueWaiter = Promise.resolve()
|
|
56
|
+
/** Batch append returns this id before `_activeJobId` is set; needed for `cancel(jobId)` during buffering. */
|
|
57
|
+
this._pendingWhisperJobId = null
|
|
58
|
+
this._job = createJobHandler({
|
|
59
|
+
cancel: () => {
|
|
60
|
+
const jobId = this._pendingWhisperJobId ?? this.addon?._activeJobId
|
|
61
|
+
return this.addon?.cancel?.(jobId)
|
|
62
|
+
}
|
|
63
|
+
})
|
|
39
64
|
|
|
40
65
|
this.logger.debug('TranscriptionWhispercpp constructor called', {
|
|
41
66
|
params: this.params,
|
|
42
67
|
config: this._config,
|
|
43
|
-
|
|
68
|
+
modelPath: this._files.model,
|
|
69
|
+
vadModelPath: this._files.vadModel
|
|
44
70
|
})
|
|
45
71
|
|
|
46
72
|
this.validateModelFiles()
|
|
47
73
|
}
|
|
48
74
|
|
|
75
|
+
getState () {
|
|
76
|
+
return this.state
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async load (...loadArgs) {
|
|
80
|
+
if (this.state.destroyed) {
|
|
81
|
+
throw new QvacErrorAddonWhisper({
|
|
82
|
+
code: ERR_CODES.FAILED_TO_LOAD_WEIGHTS,
|
|
83
|
+
adds: 'instance was destroyed'
|
|
84
|
+
})
|
|
85
|
+
}
|
|
86
|
+
if (this.state.configLoaded || this.state.weightsLoaded) {
|
|
87
|
+
this.logger.info('Reload requested - unloading existing model first')
|
|
88
|
+
await this.unload()
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
await this._load(...loadArgs)
|
|
92
|
+
this.state.configLoaded = true
|
|
93
|
+
this.state.weightsLoaded = true
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async pause () {
|
|
97
|
+
if (!this.addon?.pause) {
|
|
98
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.FAILED_TO_PAUSE, adds: 'pause not supported' })
|
|
99
|
+
}
|
|
100
|
+
await this.addon.pause()
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async unpause () {
|
|
104
|
+
if (!this.addon?.activate) {
|
|
105
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.FAILED_TO_ACTIVATE, adds: 'activate not supported' })
|
|
106
|
+
}
|
|
107
|
+
await this.addon.activate()
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async stop () {
|
|
111
|
+
if (!this.addon?.stop) {
|
|
112
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.FAILED_TO_STOP, adds: 'stop not supported' })
|
|
113
|
+
}
|
|
114
|
+
await this.addon.stop()
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async status () {
|
|
118
|
+
if (!this.addon?.status) {
|
|
119
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.FAILED_TO_GET_STATUS, adds: 'status not supported' })
|
|
120
|
+
}
|
|
121
|
+
return await this.addon.status()
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
_resolveVadModelPath () {
|
|
125
|
+
if (this._config.vadModelPath) {
|
|
126
|
+
return this._config.vadModelPath
|
|
127
|
+
}
|
|
128
|
+
if (this._files.vadModel) {
|
|
129
|
+
return this._files.vadModel
|
|
130
|
+
}
|
|
131
|
+
if (this.params?.vad_model_path) {
|
|
132
|
+
return this.params.vad_model_path
|
|
133
|
+
}
|
|
134
|
+
return null
|
|
135
|
+
}
|
|
136
|
+
|
|
49
137
|
/**
|
|
50
|
-
* Load model
|
|
51
|
-
* @param {boolean} [
|
|
52
|
-
* @param {Function} [
|
|
138
|
+
* Load model and activate addon. Model files must already exist at `files.model` / optional `files.vadModel`.
|
|
139
|
+
* @param {boolean} [_closeLoader=false] - Unused; kept for `load(...args)` forwarding compatibility.
|
|
140
|
+
* @param {Function} [_reportProgressCallback] - Unused; kept for `load(...args)` forwarding compatibility.
|
|
53
141
|
*/
|
|
54
|
-
async _load (
|
|
55
|
-
this.logger.debug('
|
|
56
|
-
|
|
57
|
-
await this.downloadWeights(reportProgressCallback, { closeLoader })
|
|
142
|
+
async _load (_closeLoader = false, _reportProgressCallback) {
|
|
143
|
+
this.logger.debug('TranscriptionWhispercpp _load (local model files)')
|
|
58
144
|
|
|
59
145
|
const whisperConfig = {
|
|
60
146
|
...this.params,
|
|
@@ -73,7 +159,7 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
73
159
|
delete whisperConfig.vad_params
|
|
74
160
|
|
|
75
161
|
// VAD model is required for whisper transcription
|
|
76
|
-
const vadModelPath = this.
|
|
162
|
+
const vadModelPath = this._resolveVadModelPath()
|
|
77
163
|
if (vadModelPath) {
|
|
78
164
|
whisperConfig.vad_model_path = vadModelPath
|
|
79
165
|
whisperConfig.vadParams = this.params.vad_params || { threshold: 0.6 }
|
|
@@ -97,55 +183,81 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
97
183
|
_checkParamsExists(configurationParams)
|
|
98
184
|
this.addon = this._createAddon(configurationParams)
|
|
99
185
|
|
|
100
|
-
// For whisper.cpp, the model file contains everything - no separate weight loading needed
|
|
101
186
|
await this.addon.activate()
|
|
102
187
|
this.logger.debug('Addon activated')
|
|
103
188
|
}
|
|
104
189
|
|
|
105
190
|
_getModelFilePath () {
|
|
106
|
-
|
|
107
|
-
|
|
191
|
+
return this._files.model
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Serialize inference until the returned response settles (replaces `_hasActiveResponse`).
|
|
196
|
+
* Uses a dedicated waiter so `destroy` / `reload` (`_runQueueWaiter`) can still preempt.
|
|
197
|
+
*/
|
|
198
|
+
async _enqueueExclusiveRunResponse (runFn) {
|
|
199
|
+
const prev = this._inferenceQueueWaiter || Promise.resolve()
|
|
200
|
+
let releaseSlot
|
|
201
|
+
this._inferenceQueueWaiter = new Promise(resolve => { releaseSlot = resolve })
|
|
202
|
+
await prev
|
|
203
|
+
let response
|
|
204
|
+
try {
|
|
205
|
+
response = await runFn()
|
|
206
|
+
} catch (err) {
|
|
207
|
+
releaseSlot()
|
|
208
|
+
throw err
|
|
108
209
|
}
|
|
109
|
-
|
|
210
|
+
response.await().finally(() => { releaseSlot() }).catch(() => {})
|
|
211
|
+
return response
|
|
110
212
|
}
|
|
111
213
|
|
|
112
|
-
|
|
113
|
-
|
|
214
|
+
async run (input) {
|
|
215
|
+
if (this.exclusiveRun) {
|
|
216
|
+
return await this._enqueueExclusiveRunResponse(() => this._runInternal(input))
|
|
217
|
+
}
|
|
218
|
+
return await this._runInternal(input)
|
|
114
219
|
}
|
|
115
220
|
|
|
116
|
-
async
|
|
117
|
-
if (this.exclusiveRun
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
221
|
+
async runStreaming (audioStream) {
|
|
222
|
+
if (this.exclusiveRun) {
|
|
223
|
+
return await this._enqueueExclusiveRunResponse(() =>
|
|
224
|
+
this._runInternal(audioStream, { streaming: true })
|
|
225
|
+
)
|
|
121
226
|
}
|
|
227
|
+
return await this._runInternal(audioStream, { streaming: true })
|
|
228
|
+
}
|
|
122
229
|
|
|
230
|
+
async _runInternal (audioStream, opts = {}) {
|
|
123
231
|
const normalizedAudioStream = this._normalizeAudioStream(audioStream)
|
|
124
232
|
|
|
125
233
|
if (opts.streaming) {
|
|
126
234
|
return this._runStreaming(normalizedAudioStream)
|
|
127
235
|
}
|
|
128
236
|
|
|
129
|
-
|
|
237
|
+
return this._runBatchTranscription(normalizedAudioStream)
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/** Batch runJob path: `_job` / response setup; audio via {@link #_handleAudioStream}. */
|
|
241
|
+
async _runBatchTranscription (normalizedAudioStream) {
|
|
242
|
+
this._pendingWhisperJobId = await this.addon.append({
|
|
130
243
|
type: 'audio',
|
|
131
244
|
input: new Uint8Array()
|
|
132
245
|
})
|
|
133
246
|
|
|
134
|
-
const response = this.
|
|
135
|
-
|
|
136
|
-
const finalized = response.await().finally(() => { this._hasActiveResponse = false })
|
|
247
|
+
const response = this._job.start()
|
|
248
|
+
const finalized = response.await()
|
|
137
249
|
finalized.catch(() => {})
|
|
138
250
|
response.await = () => finalized
|
|
139
251
|
|
|
140
252
|
this._handleAudioStream(normalizedAudioStream).catch((error) => {
|
|
141
|
-
|
|
142
|
-
this.
|
|
253
|
+
this._pendingWhisperJobId = null
|
|
254
|
+
this._job.fail(error)
|
|
143
255
|
})
|
|
144
256
|
return response
|
|
145
257
|
}
|
|
146
258
|
|
|
147
259
|
async _runStreaming (audioStream) {
|
|
148
|
-
const vadModelPath = this.
|
|
260
|
+
const vadModelPath = this._resolveVadModelPath()
|
|
149
261
|
if (!vadModelPath) {
|
|
150
262
|
throw new QvacErrorAddonWhisper({
|
|
151
263
|
code: ERR_CODES.VAD_MODEL_REQUIRED
|
|
@@ -164,11 +276,9 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
164
276
|
samplesOverlap: vadParams.samples_overlap || 0.1
|
|
165
277
|
})
|
|
166
278
|
|
|
167
|
-
|
|
168
|
-
const response = this.
|
|
169
|
-
this._hasActiveResponse = true
|
|
279
|
+
this._pendingWhisperJobId = null
|
|
280
|
+
const response = this._job.start()
|
|
170
281
|
const finalized = response.await().finally(() => {
|
|
171
|
-
this._hasActiveResponse = false
|
|
172
282
|
this.addon._activeJobId = null
|
|
173
283
|
this.addon._setState('listening')
|
|
174
284
|
})
|
|
@@ -176,14 +286,17 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
176
286
|
response.await = () => finalized
|
|
177
287
|
|
|
178
288
|
this._handleStreamingAudio(audioStream).catch((error) => {
|
|
179
|
-
|
|
180
|
-
this.
|
|
289
|
+
this._pendingWhisperJobId = null
|
|
290
|
+
this._job.fail(error)
|
|
181
291
|
})
|
|
182
292
|
return response
|
|
183
293
|
}
|
|
184
294
|
|
|
295
|
+
/** Append-only path to the native addon; job lifecycle lives in callers / `_outputCallback`. */
|
|
185
296
|
async _handleAudioStream (audioStream) {
|
|
186
|
-
this.logger.debug('Start handling audio stream', {
|
|
297
|
+
this.logger.debug('Start handling audio stream', {
|
|
298
|
+
modelPath: this._getModelFilePath()
|
|
299
|
+
})
|
|
187
300
|
for await (const chunk of audioStream) {
|
|
188
301
|
this.logger.debug('Appending audio chunk', { chunkLength: chunk.length })
|
|
189
302
|
await this.addon.append({
|
|
@@ -271,7 +384,7 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
271
384
|
delete whisperConfig.vad_params
|
|
272
385
|
|
|
273
386
|
// VAD model configuration
|
|
274
|
-
const vadModelPath = this.
|
|
387
|
+
const vadModelPath = this._resolveVadModelPath()
|
|
275
388
|
if (vadModelPath) {
|
|
276
389
|
whisperConfig.vad_model_path = vadModelPath
|
|
277
390
|
whisperConfig.vadParams = newConfig.whisperConfig?.vad_params || this.params.vad_params || { threshold: 0.6 }
|
|
@@ -289,34 +402,17 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
289
402
|
}
|
|
290
403
|
|
|
291
404
|
_checkParamsExists(configurationParams)
|
|
405
|
+
this._pendingWhisperJobId = null
|
|
406
|
+
if (this._job.active) {
|
|
407
|
+
this._job.fail(new Error('Model was reloaded'))
|
|
408
|
+
}
|
|
292
409
|
await this.cancel()
|
|
293
|
-
this._failAndClearActiveResponse('Model was reloaded')
|
|
294
410
|
await this.addon.reload(configurationParams)
|
|
295
411
|
await this.addon.activate()
|
|
296
412
|
this.logger.debug('Addon reloaded and activated successfully')
|
|
297
413
|
})
|
|
298
414
|
}
|
|
299
415
|
|
|
300
|
-
async _downloadWeights (reportProgressCallback, opts) {
|
|
301
|
-
const models = [this._modelName]
|
|
302
|
-
if (this._vadModelName) {
|
|
303
|
-
models.push(this._vadModelName)
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
this.logger.info('Loading weight files:', models)
|
|
307
|
-
|
|
308
|
-
const result = await this.weightsProvider.downloadFiles(
|
|
309
|
-
models,
|
|
310
|
-
this._diskPath,
|
|
311
|
-
{
|
|
312
|
-
closeLoader: opts.closeLoader,
|
|
313
|
-
onDownloadProgress: reportProgressCallback
|
|
314
|
-
}
|
|
315
|
-
)
|
|
316
|
-
this.logger.info('Weight files downloaded successfully', { models })
|
|
317
|
-
return result
|
|
318
|
-
}
|
|
319
|
-
|
|
320
416
|
/**
|
|
321
417
|
* Instantiate the native addon with the given parameters.
|
|
322
418
|
* @param {Object} configurationParams - Configuration parameters for the addon
|
|
@@ -338,14 +434,47 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
338
434
|
)
|
|
339
435
|
}
|
|
340
436
|
|
|
437
|
+
_outputCallback (addon, event, jobId, data, error) {
|
|
438
|
+
if (event === 'Error') {
|
|
439
|
+
this.logger.error(`Job failed with error: ${error}`)
|
|
440
|
+
this._pendingWhisperJobId = null
|
|
441
|
+
this._job.fail(error)
|
|
442
|
+
return
|
|
443
|
+
}
|
|
444
|
+
if (event === 'Output') {
|
|
445
|
+
try {
|
|
446
|
+
this.logger.debug(`Job produced output: ${dataAsStringWhisper(data)}`)
|
|
447
|
+
} catch (err) {
|
|
448
|
+
this.logger.error(`Failed to serialize output for logging: ${err.message}`)
|
|
449
|
+
this.logger.debug('Job produced output: [non-serializable data]')
|
|
450
|
+
}
|
|
451
|
+
this._job.output(data)
|
|
452
|
+
return
|
|
453
|
+
}
|
|
454
|
+
if (event === 'JobEnded') {
|
|
455
|
+
this.logger.info(`Job ${jobId} completed. Stats: ${JSON.stringify(data)}`)
|
|
456
|
+
this._pendingWhisperJobId = null
|
|
457
|
+
if (this.opts?.stats) {
|
|
458
|
+
this._job.end(data)
|
|
459
|
+
} else {
|
|
460
|
+
this._job.end()
|
|
461
|
+
}
|
|
462
|
+
return
|
|
463
|
+
}
|
|
464
|
+
this.logger.debug(`Received event for job ${jobId}: ${event}`)
|
|
465
|
+
}
|
|
466
|
+
|
|
341
467
|
/**
|
|
342
468
|
* Override unload to also call destroyInstance for proper cleanup
|
|
343
469
|
* This ensures the process can exit cleanly by closing the uv_async handle
|
|
344
470
|
*/
|
|
345
471
|
async unload () {
|
|
346
472
|
return await this._withExclusiveRun(async () => {
|
|
473
|
+
this._pendingWhisperJobId = null
|
|
474
|
+
if (this._job.active) {
|
|
475
|
+
this._job.fail(new Error('Model was unloaded'))
|
|
476
|
+
}
|
|
347
477
|
await this.cancel()
|
|
348
|
-
this._failAndClearActiveResponse('Model was unloaded')
|
|
349
478
|
if (this.addon) {
|
|
350
479
|
await this.addon.destroyInstance()
|
|
351
480
|
}
|
|
@@ -354,25 +483,23 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
354
483
|
})
|
|
355
484
|
}
|
|
356
485
|
|
|
357
|
-
async runStreaming (audioStream) {
|
|
358
|
-
if (this.exclusiveRun) {
|
|
359
|
-
return await this._withExclusiveRun(() =>
|
|
360
|
-
this._runInternal(audioStream, { streaming: true })
|
|
361
|
-
)
|
|
362
|
-
}
|
|
363
|
-
return await this._runInternal(audioStream, { streaming: true })
|
|
364
|
-
}
|
|
365
|
-
|
|
366
486
|
async cancel () {
|
|
367
487
|
if (this.addon?.cancel) {
|
|
368
488
|
await this.addon.cancel()
|
|
369
489
|
}
|
|
490
|
+
this._pendingWhisperJobId = null
|
|
491
|
+
if (this._job.active) {
|
|
492
|
+
this._job.fail(new Error('Job cancelled'))
|
|
493
|
+
}
|
|
370
494
|
}
|
|
371
495
|
|
|
372
496
|
async destroy () {
|
|
373
497
|
return await this._withExclusiveRun(async () => {
|
|
498
|
+
this._pendingWhisperJobId = null
|
|
499
|
+
if (this._job.active) {
|
|
500
|
+
this._job.fail(new Error('Model was destroyed'))
|
|
501
|
+
}
|
|
374
502
|
await this.cancel()
|
|
375
|
-
this._failAndClearActiveResponse('Model was destroyed')
|
|
376
503
|
if (this.addon) {
|
|
377
504
|
await this.addon.destroyInstance()
|
|
378
505
|
}
|
|
@@ -382,14 +509,6 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
382
509
|
})
|
|
383
510
|
}
|
|
384
511
|
|
|
385
|
-
_failAndClearActiveResponse (reason) {
|
|
386
|
-
for (const [jobId, response] of this._jobToResponse.entries()) {
|
|
387
|
-
response.failed(new Error(reason))
|
|
388
|
-
this._deleteJobMapping(jobId)
|
|
389
|
-
}
|
|
390
|
-
this._hasActiveResponse = false
|
|
391
|
-
}
|
|
392
|
-
|
|
393
512
|
validateModelFiles () {
|
|
394
513
|
const modelPath = this._config.path || this._getModelFilePath()
|
|
395
514
|
if (!modelPath || !fs.existsSync(modelPath)) {
|
|
@@ -401,20 +520,22 @@ class TranscriptionWhispercpp extends BaseInference {
|
|
|
401
520
|
)
|
|
402
521
|
}
|
|
403
522
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
throw new Error(
|
|
409
|
-
vadModelPath
|
|
410
|
-
? `VAD model file doesn't exist: ${vadModelPath}`
|
|
411
|
-
: "VAD model file doesn't exist"
|
|
412
|
-
)
|
|
413
|
-
}
|
|
523
|
+
const vadModelPath = this._resolveVadModelPath()
|
|
524
|
+
if (vadModelPath && !fs.existsSync(vadModelPath)) {
|
|
525
|
+
this.logger.error('VAD model file not found', { path: vadModelPath })
|
|
526
|
+
throw new QvacErrorAddonWhisper({ code: ERR_CODES.VAD_MODEL_NOT_FOUND, adds: vadModelPath })
|
|
414
527
|
}
|
|
415
528
|
}
|
|
416
529
|
}
|
|
417
530
|
|
|
531
|
+
function dataAsStringWhisper (data) {
|
|
532
|
+
if (!data) return ''
|
|
533
|
+
if (typeof data === 'object') {
|
|
534
|
+
return JSON.stringify(data)
|
|
535
|
+
}
|
|
536
|
+
return data.toString()
|
|
537
|
+
}
|
|
538
|
+
|
|
418
539
|
function _checkParamsExists (params) {
|
|
419
540
|
// Use the centralized config validation from configChecker.js
|
|
420
541
|
checkConfig(params)
|
package/lib/error.js
CHANGED
|
@@ -22,7 +22,10 @@ const ERR_CODES = Object.freeze({
|
|
|
22
22
|
FAILED_TO_START_STREAMING: 6012,
|
|
23
23
|
FAILED_TO_APPEND_STREAMING: 6013,
|
|
24
24
|
FAILED_TO_END_STREAMING: 6014,
|
|
25
|
-
BUFFER_LIMIT_EXCEEDED: 6015
|
|
25
|
+
BUFFER_LIMIT_EXCEEDED: 6015,
|
|
26
|
+
FAILED_TO_STOP: 6016,
|
|
27
|
+
MODEL_REQUIRED: 6017,
|
|
28
|
+
VAD_MODEL_NOT_FOUND: 6018
|
|
26
29
|
})
|
|
27
30
|
|
|
28
31
|
addCodes({
|
|
@@ -85,6 +88,18 @@ addCodes({
|
|
|
85
88
|
[ERR_CODES.BUFFER_LIMIT_EXCEEDED]: {
|
|
86
89
|
name: 'BUFFER_LIMIT_EXCEEDED',
|
|
87
90
|
message: (message) => `Audio buffer size limit exceeded: ${message}`
|
|
91
|
+
},
|
|
92
|
+
[ERR_CODES.FAILED_TO_STOP]: {
|
|
93
|
+
name: 'FAILED_TO_STOP',
|
|
94
|
+
message: (message) => `Failed to stop addon, error: ${message}`
|
|
95
|
+
},
|
|
96
|
+
[ERR_CODES.MODEL_REQUIRED]: {
|
|
97
|
+
name: 'MODEL_REQUIRED',
|
|
98
|
+
message: (message) => `Model is required: ${message}`
|
|
99
|
+
},
|
|
100
|
+
[ERR_CODES.VAD_MODEL_NOT_FOUND]: {
|
|
101
|
+
name: 'VAD_MODEL_NOT_FOUND',
|
|
102
|
+
message: (message) => `VAD model file not found: ${message}`
|
|
88
103
|
}
|
|
89
104
|
}, {
|
|
90
105
|
name,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@qvac/transcription-whispercpp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "transcription addon for qvac",
|
|
5
5
|
"addon": true,
|
|
6
6
|
"engines": {
|
|
@@ -85,7 +85,7 @@
|
|
|
85
85
|
"dependencies": {
|
|
86
86
|
"@qvac/decoder-audio": "^0.3.3",
|
|
87
87
|
"@qvac/error": "^0.1.0",
|
|
88
|
-
"@qvac/infer-base": "^0.
|
|
88
|
+
"@qvac/infer-base": "^0.4.0",
|
|
89
89
|
"@qvac/logging": "^0.1.0",
|
|
90
90
|
"bare-channel": "^5.2.2",
|
|
91
91
|
"bare-ffmpeg": "^1.0.0-32",
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|