speechflow 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +43 -14
  3. package/etc/speechflow.yaml +20 -48
  4. package/etc/stx.conf +2 -2
  5. package/package.json +5 -5
  6. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +60 -0
  8. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js.map +1 -0
  9. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.d.ts +15 -0
  10. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +234 -0
  11. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
  13. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +1 -0
  15. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +19 -11
  16. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.d.ts +16 -0
  18. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js +275 -0
  19. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +32 -15
  21. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +7 -6
  23. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +2 -4
  25. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +3 -3
  27. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +1 -1
  30. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +9 -8
  32. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +3 -3
  34. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +5 -5
  36. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
  37. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +26 -6
  38. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -1
  39. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -0
  41. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +72 -5
  42. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -1
  44. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  45. package/speechflow-cli/dst/speechflow-node-t2t-translate.js +50 -25
  46. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
  47. package/speechflow-cli/etc/oxlint.jsonc +9 -1
  48. package/speechflow-cli/etc/stx.conf +1 -1
  49. package/speechflow-cli/package.d/sherpa-onnx+1.12.23.patch +12 -0
  50. package/speechflow-cli/package.json +23 -19
  51. package/speechflow-cli/src/lib.d.ts +30 -4
  52. package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +68 -0
  53. package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +219 -0
  54. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  55. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +21 -12
  56. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +33 -15
  57. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +9 -8
  58. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +2 -4
  59. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +3 -3
  60. package/speechflow-cli/src/speechflow-node-t2a-google.ts +2 -2
  61. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +1 -1
  62. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +11 -10
  63. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +3 -3
  64. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +6 -6
  65. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +30 -11
  66. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
  67. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +86 -10
  68. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
  69. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
  70. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +54 -29
  71. package/speechflow-ui-db/dst/index.css +1 -1
  72. package/speechflow-ui-db/dst/index.js +13 -13
  73. package/speechflow-ui-db/package.json +16 -15
  74. package/speechflow-ui-db/src/app.vue +62 -17
  75. package/speechflow-ui-st/dst/index.css +1 -1
  76. package/speechflow-ui-st/dst/index.js +32 -32
  77. package/speechflow-ui-st/package.json +17 -16
  78. package/speechflow-ui-st/src/app.vue +9 -8
@@ -0,0 +1,68 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import { parentPort, workerData } from "node:worker_threads"
9
+
10
+ /* external dependencies */
11
+ import SherpaOnnx from "sherpa-onnx"
12
+ import type {
13
+ SherpaOnnxDenoiserConfig,
14
+ SherpaOnnxOfflineSpeechDenoiser
15
+ } from "sherpa-onnx"
16
+
17
+ /* receive model path from parent thread */
18
+ const modelPath: string = workerData.modelPath
19
+
20
+ /* GTCRN state */
21
+ let denoiser: SherpaOnnxOfflineSpeechDenoiser
22
+
23
+ /* helper: log message to parent */
24
+ const log = (level: string, message: string) => {
25
+ parentPort!.postMessage({ type: "log", level, message })
26
+ }
27
+
28
+ /* initialize globals */
29
+ ;(async () => {
30
+ try {
31
+ /* create denoiser */
32
+ const config: SherpaOnnxDenoiserConfig = {
33
+ model: {
34
+ gtcrn: {
35
+ model: modelPath
36
+ }
37
+ },
38
+ numThreads: 1
39
+ }
40
+ denoiser = SherpaOnnx.createOfflineSpeechDenoiser(config)
41
+ log("info", "GTCRN denoiser initialized")
42
+ parentPort!.postMessage({ type: "ready" })
43
+ }
44
+ catch (err) {
45
+ parentPort!.postMessage({ type: "failed", message: `failed to initialize GTCRN: ${err}` })
46
+ process.exit(1)
47
+ }
48
+ })()
49
+
50
+ /* receive messages */
51
+ parentPort!.on("message", (msg) => {
52
+ if (msg.type === "process") {
53
+ const { id, samples } = msg
54
+
55
+ /* process with GTCRN denoiser
56
+ NOTICE: GTCRN can also resample out input, but will always
57
+ produces 16KHz output, so we already fixate 16KHz input here! */
58
+ const result = denoiser.run(samples, 16000)
59
+
60
+ /* copy to transferable ArrayBuffer and send back to parent */
61
+ const samplesDenoised = new Float32Array(result.samples)
62
+ parentPort!.postMessage({ type: "process-done", id, data: samplesDenoised }, [ samplesDenoised.buffer ])
63
+ }
64
+ else if (msg.type === "close") {
65
+ /* shutdown this process */
66
+ process.exit(0)
67
+ }
68
+ })
@@ -0,0 +1,219 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import fs from "node:fs"
9
+ import path from "node:path"
10
+ import Stream from "node:stream"
11
+ import { Worker } from "node:worker_threads"
12
+
13
+ /* external dependencies */
14
+ import axios from "axios"
15
+ import SpeexResampler from "speex-resampler"
16
+
17
+ /* internal dependencies */
18
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
19
+ import * as util from "./speechflow-util"
20
+
21
+ /* SpeechFlow node for GTCRN based noise suppression in audio-to-audio passing */
22
+ export default class SpeechFlowNodeA2AGTCRN extends SpeechFlowNode {
23
+ /* declare official node name */
24
+ public static name = "a2a-gtcrn"
25
+
26
+ /* internal state */
27
+ private closing = false
28
+ private worker: Worker | null = null
29
+ private resamplerDown: SpeexResampler | null = null
30
+ private resamplerUp: SpeexResampler | null = null
31
+
32
+ /* construct node */
33
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
34
+ super(id, cfg, opts, args)
35
+
36
+ /* declare node configuration parameters */
37
+ this.configure({})
38
+
39
+ /* declare node input/output format */
40
+ this.input = "audio"
41
+ this.output = "audio"
42
+ }
43
+
44
+ /* open node */
45
+ async open () {
46
+ /* clear destruction flag */
47
+ this.closing = false
48
+
49
+ /* ensure GTCRN ONNX model is available */
50
+ const modelUrl = "https://github.com/k2-fsa/sherpa-onnx/" +
51
+ "releases/download/speech-enhancement-models/gtcrn_simple.onnx"
52
+ const modelDir = path.join(this.config.cacheDir, "gtcrn")
53
+ const modelPath = path.resolve(modelDir, "gtcrn_simple.onnx")
54
+ const stat = await fs.promises.stat(modelPath).catch(() => null)
55
+ if (stat === null) {
56
+ this.log("info", `GTCRN model downloading from "${modelUrl}"`)
57
+ await fs.promises.mkdir(modelDir, { recursive: true })
58
+ const response = await axios.get(modelUrl, {
59
+ responseType: "arraybuffer",
60
+ onDownloadProgress: (progressEvent) => {
61
+ if (progressEvent.total) {
62
+ const percent = (progressEvent.loaded / progressEvent.total) * 100
63
+ this.log("info", `GTCRN model download: ${percent.toFixed(1)}%`)
64
+ }
65
+ }
66
+ })
67
+ await fs.promises.writeFile(modelPath, Buffer.from(response.data))
68
+ this.log("info", `GTCRN model downloaded to "${modelPath}"`)
69
+ }
70
+
71
+ /* establish resamplers from SpeechFlow's internal 48KHz
72
+ to GTCRN's required 16KHz format and back */
73
+ this.resamplerDown = new SpeexResampler(1, this.config.audioSampleRate, 16000, 7)
74
+ this.resamplerUp = new SpeexResampler(1, 16000, this.config.audioSampleRate, 7)
75
+
76
+ /* initialize worker */
77
+ this.worker = new Worker(path.resolve(__dirname, "speechflow-node-a2a-gtcrn-wt.js"), {
78
+ workerData: { modelPath }
79
+ })
80
+ this.worker.on("error", (err) => {
81
+ this.log("error", `GTCRN worker thread error: ${err}`)
82
+ this.stream?.emit("error", err)
83
+ })
84
+ this.worker.on("exit", (code) => {
85
+ if (code !== 0)
86
+ this.log("error", `GTCRN worker thread exited with error code ${code}`)
87
+ else
88
+ this.log("info", `GTCRN worker thread exited with regular code ${code}`)
89
+ })
90
+
91
+ /* wait for worker to be ready */
92
+ await new Promise<void>((resolve, reject) => {
93
+ const timeout = setTimeout(() => {
94
+ reject(new Error("GTCRN worker thread initialization timeout"))
95
+ }, 60 * 1000)
96
+ const onMessage = (msg: any) => {
97
+ if (typeof msg === "object" && msg !== null && msg.type === "log")
98
+ this.log(msg.level, msg.message)
99
+ else if (typeof msg === "object" && msg !== null && msg.type === "ready") {
100
+ clearTimeout(timeout)
101
+ this.worker!.off("message", onMessage)
102
+ resolve()
103
+ }
104
+ else if (typeof msg === "object" && msg !== null && msg.type === "failed") {
105
+ clearTimeout(timeout)
106
+ this.worker!.off("message", onMessage)
107
+ reject(new Error(msg.message ?? "GTCRN worker thread initialization failed"))
108
+ }
109
+ }
110
+ this.worker!.on("message", onMessage)
111
+ this.worker!.once("error", (err) => {
112
+ clearTimeout(timeout)
113
+ reject(err)
114
+ })
115
+ })
116
+
117
+ /* receive message from worker */
118
+ const pending = new Map<string, (arr: Float32Array<ArrayBuffer>) => void>()
119
+ this.worker.on("exit", () => {
120
+ pending.clear()
121
+ })
122
+ this.worker.on("message", (msg: any) => {
123
+ if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
124
+ const cb = pending.get(msg.id)
125
+ pending.delete(msg.id)
126
+ if (cb)
127
+ cb(msg.data)
128
+ else
129
+ this.log("warning", `GTCRN worker thread sent back unexpected id: ${msg.id}`)
130
+ }
131
+ else if (typeof msg === "object" && msg !== null && msg.type === "log")
132
+ this.log(msg.level, msg.message)
133
+ else
134
+ this.log("warning", `GTCRN worker thread sent unexpected message: ${JSON.stringify(msg)}`)
135
+ })
136
+
137
+ /* send message to worker */
138
+ let seq = 0
139
+ const workerProcess = async (samples: Float32Array<ArrayBuffer>) => {
140
+ if (this.closing)
141
+ return samples
142
+ const id = `${seq++}`
143
+ return new Promise<Float32Array<ArrayBuffer>>((resolve) => {
144
+ pending.set(id, (result) => { resolve(result) })
145
+ this.worker!.postMessage({ type: "process", id, samples }, [ samples.buffer ])
146
+ })
147
+ }
148
+
149
+ /* establish a transform stream */
150
+ const self = this
151
+ this.stream = new Stream.Transform({
152
+ readableObjectMode: true,
153
+ writableObjectMode: true,
154
+ decodeStrings: false,
155
+ transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
156
+ if (self.closing) {
157
+ callback(new Error("stream already destroyed"))
158
+ return
159
+ }
160
+ if (!Buffer.isBuffer(chunk.payload))
161
+ callback(new Error("invalid chunk payload type"))
162
+ else {
163
+ /* resample Buffer from 48KHz (SpeechFlow) to 16KHz (GTCRN) */
164
+ const resampledDown = self.resamplerDown!.processChunk(chunk.payload)
165
+
166
+ /* convert Buffer into Float32Array */
167
+ const payload = util.convertBufToF32(resampledDown)
168
+
169
+ /* process with GTCRN */
170
+ workerProcess(payload).then((result: Float32Array<ArrayBuffer>) => {
171
+ /* convert Float32Array into Buffer */
172
+ const buf = util.convertF32ToBuf(result)
173
+
174
+ /* resample Buffer from 16KHz (GTCRN) back to 48KHz (SpeechFlow) */
175
+ const resampledUp = self.resamplerUp!.processChunk(buf)
176
+
177
+ /* update chunk */
178
+ chunk.payload = resampledUp
179
+
180
+ /* forward updated chunk */
181
+ this.push(chunk)
182
+ callback()
183
+ }).catch((err: unknown) => {
184
+ const error = util.ensureError(err)
185
+ self.log("warning", `processing of chunk failed: ${error.message}`)
186
+ callback(error)
187
+ })
188
+ }
189
+ },
190
+ final (callback) {
191
+ callback()
192
+ }
193
+ })
194
+ }
195
+
196
+ /* close node */
197
+ async close () {
198
+ /* indicate closing */
199
+ this.closing = true
200
+
201
+ /* shutdown worker */
202
+ if (this.worker !== null) {
203
+ this.worker.terminate()
204
+ this.worker = null
205
+ }
206
+
207
+ /* shutdown stream */
208
+ if (this.stream !== null) {
209
+ await util.destroyStream(this.stream)
210
+ this.stream = null
211
+ }
212
+
213
+ /* destroy resamplers */
214
+ if (this.resamplerDown !== null)
215
+ this.resamplerDown = null
216
+ if (this.resamplerUp !== null)
217
+ this.resamplerUp = null
218
+ }
219
+ }
@@ -81,11 +81,11 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
81
81
 
82
82
  /* grab the accumulated chunk data */
83
83
  const chunkData = this.chunkBuffer
84
- this.chunkBuffer = new Float32Array(0)
84
+ this.chunkBuffer = chunkData.subarray(samplesPerChunk)
85
85
 
86
86
  /* update internal audio sample sliding window for LUFS-M */
87
87
  if (chunkData.length > sampleWindow.length)
88
- sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
88
+ sampleWindow.set(chunkData.subarray(0, sampleWindow.length), 0)
89
89
  else {
90
90
  sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
91
91
  sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
@@ -29,7 +29,7 @@ class AsyncQueue<T> {
29
29
  const resolve = this.resolvers.shift()
30
30
  if (resolve) {
31
31
  if (v !== null)
32
- resolve({ value: v })
32
+ resolve({ value: v, done: false })
33
33
  else
34
34
  resolve({ value: null, done: true })
35
35
  }
@@ -70,6 +70,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
70
70
  /* internal state */
71
71
  private client: TranscribeStreamingClient | null = null
72
72
  private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
+ private audioQueue: AsyncQueue<Uint8Array> | null = null
73
74
  private closing = false
74
75
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
75
76
 
@@ -127,7 +128,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
127
128
  })
128
129
 
129
130
  /* create an AudioStream for Amazon Transcribe */
130
- const audioQueue = new AsyncQueue<Uint8Array>()
131
+ this.audioQueue = new AsyncQueue<Uint8Array>()
132
+ const audioQueue = this.audioQueue
131
133
  const audioStream = (async function * (q: AsyncQueue<Uint8Array>): AsyncIterable<AudioStream> {
132
134
  for await (const chunk of q) {
133
135
  yield { AudioEvent: { AudioChunk: chunk } }
@@ -173,11 +175,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
173
175
  return prev
174
176
  }, new Map<string, any>())
175
177
  if (this.params.interim) {
176
- const words = []
178
+ const words: { word: string, start: Duration, end: Duration }[] = []
177
179
  for (const item of alt.Items ?? []) {
178
180
  if (item.Type === "pronunciation") {
179
181
  words.push({
180
- word: item.Content,
182
+ word: item.Content ?? "",
181
183
  start: Duration.fromMillis((item.StartTime ?? 0) * 1000).plus(this.timeZeroOffset),
182
184
  end: Duration.fromMillis((item.EndTime ?? 0) * 1000).plus(this.timeZeroOffset)
183
185
  })
@@ -273,10 +275,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
273
275
  /* indicate closing first to stop all async operations */
274
276
  this.closing = true
275
277
 
276
- /* close queue */
277
- if (this.queue !== null) {
278
- this.queue.write(null)
279
- this.queue = null
278
+ /* shutdown stream */
279
+ if (this.stream !== null) {
280
+ await util.destroyStream(this.stream)
281
+ this.stream = null
280
282
  }
281
283
 
282
284
  /* close Amazon Transcribe connection */
@@ -285,10 +287,17 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
285
287
  this.client = null
286
288
  }
287
289
 
288
- /* shutdown stream */
289
- if (this.stream !== null) {
290
- await util.destroyStream(this.stream)
291
- this.stream = null
290
+ /* close audio queue */
291
+ if (this.audioQueue !== null) {
292
+ this.audioQueue.push(null)
293
+ this.audioQueue.destroy()
294
+ this.audioQueue = null
295
+ }
296
+
297
+ /* signal EOF to any pending read operations */
298
+ if (this.queue !== null) {
299
+ this.queue.write(null)
300
+ this.queue = null
292
301
  }
293
302
  }
294
303
  }
@@ -37,7 +37,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
37
37
  model: { type: "string", val: "nova-2", pos: 0 },
38
38
  version: { type: "string", val: "latest", pos: 1 },
39
39
  language: { type: "string", val: "multi", pos: 2 },
40
- interim: { type: "boolean", val: false, pos: 3 }
40
+ interim: { type: "boolean", val: false, pos: 3 },
41
+ keywords: { type: "string", val: "", pos: 4 }
41
42
  })
42
43
 
43
44
  /* sanity check parameters */
@@ -86,34 +87,51 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
86
87
  /* create a store for the meta information */
87
88
  const metastore = new util.TimeStore<Map<string, any>>()
88
89
 
89
- /* connect to Deepgram API */
90
- const deepgram = Deepgram.createClient(this.params.key)
91
- let language = "en"
92
- if (this.params.language !== "en") {
93
- if (this.params.model.match(/^nova-2/))
94
- language = this.params.language
95
- else if (this.params.model.match(/^nova-3/))
96
- language = "multi"
97
- }
98
- this.dg = deepgram.listen.live({
90
+ /* configure Deepgram connection options */
91
+ const options: Deepgram.LiveSchema = {
99
92
  mip_opt_out: true,
100
93
  model: this.params.model,
101
94
  version: this.params.version,
102
- language,
103
95
  channels: this.config.audioChannels,
104
96
  sample_rate: this.config.audioSampleRate,
105
97
  encoding: "linear16",
106
98
  multichannel: false,
107
99
  endpointing: false,
108
100
  interim_results: this.params.interim,
109
- smart_format: true,
101
+ smart_format: false,
110
102
  punctuate: true,
111
103
  filler_words: true,
112
- numerals: true,
104
+ numerals: false,
113
105
  diarize: false,
114
106
  profanity_filter: false,
115
107
  redact: false
116
- })
108
+ }
109
+ const model = this.params.model as string
110
+ const language = this.params.language as string
111
+ const keywords = this.params.keywords as string
112
+ if (model.match(/^nova-2/) && language !== "en")
113
+ options.language = this.params.language
114
+ else if (model.match(/^nova-3/) && language !== "en")
115
+ options.language = "multi"
116
+ else
117
+ options.language = "en"
118
+ if (keywords !== "") {
119
+ if (model.match(/^nova-2/))
120
+ options.keywords = keywords.split(/(?:\s+|\s*,\s*)/).map((kw) => {
121
+ let boost = 2
122
+ if (kw.startsWith("-")) {
123
+ kw = kw.slice(1)
124
+ boost = -4
125
+ }
126
+ return `${kw}:${boost}`
127
+ })
128
+ else if (model.match(/^nova-3/))
129
+ options.keyterm = keywords.split(/(?:\s+|\s*,\s*)/).join(" ")
130
+ }
131
+
132
+ /* connect to Deepgram API */
133
+ const deepgram = Deepgram.createClient(this.params.key)
134
+ this.dg = deepgram.listen.live(options)
117
135
 
118
136
  /* hook onto Deepgram API events */
119
137
  this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
@@ -170,9 +170,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
170
170
  /* track transcription text */
171
171
  let text = ""
172
172
  this.ws.on("message", (data) => {
173
- let ev: any
173
+ let ev: Record<string, unknown>
174
174
  try {
175
- ev = JSON.parse(data.toString())
175
+ ev = JSON.parse(data.toString()) as Record<string, unknown>
176
176
  }
177
177
  catch (err) {
178
178
  this.log("warning", `failed to parse WebSocket message: ${err}`)
@@ -194,8 +194,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
194
194
  if (this.params.interim && !this.closing && this.queue !== null) {
195
195
  const itemId = ev.item_id as string
196
196
  const timing = speechTiming.get(itemId)
197
- const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
198
- const end = timing ? Duration.fromMillis(timing.endMs) : start
197
+ const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
198
+ const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
199
199
  const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
200
200
  chunk.meta = aggregateMeta(start, end)
201
201
  this.queue.write(chunk)
@@ -207,8 +207,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
207
207
  text = ev.transcript as string
208
208
  const itemId = ev.item_id as string
209
209
  const timing = speechTiming.get(itemId)
210
- const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
211
- const end = timing ? Duration.fromMillis(timing.endMs) : start
210
+ const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
211
+ const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
212
212
  const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
213
213
  chunk.meta = aggregateMeta(start, end)
214
214
  metastore.prune(start)
@@ -230,7 +230,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
230
230
  const itemId = ev.item_id as string
231
231
  const audioEndMs = ev.audio_end_ms as number
232
232
  const timing = speechTiming.get(itemId)
233
- if (timing)
233
+ if (timing !== undefined)
234
234
  timing.endMs = audioEndMs
235
235
  break
236
236
  }
@@ -239,7 +239,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
239
239
  break
240
240
  }
241
241
  case "error": {
242
- this.log("error", `error: ${ev.error?.message}`)
242
+ const error = ev.error as { message?: string } | undefined
243
+ this.log("error", `error: ${error?.message ?? "unknown error"}`)
243
244
  break
244
245
  }
245
246
  default:
@@ -124,11 +124,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
124
124
  decodeStrings: false,
125
125
  highWaterMark: 1,
126
126
  transform (chunk: SpeechFlowChunk, encoding, callback) {
127
- if (self.closing) {
127
+ if (self.closing)
128
128
  callback(new Error("stream already destroyed"))
129
- return
130
- }
131
- if (Buffer.isBuffer(chunk.payload))
129
+ else if (Buffer.isBuffer(chunk.payload))
132
130
  callback(new Error("invalid chunk payload type"))
133
131
  else if (chunk.payload === "")
134
132
  callback()
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
98
98
  const voices = await this.elevenlabs.voices.getAll()
99
99
  let voice = voices.voices.find((v) => v.name === this.params.voice)
100
100
  if (voice === undefined) {
101
- voice = voices.voices.find((v) => (v.name ?? "").startsWith(this.params.voice))
101
+ voice = voices.voices.find((v) => v.name?.startsWith(this.params.voice))
102
102
  if (voice === undefined)
103
103
  throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
104
104
  }
@@ -108,7 +108,7 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
108
108
  ""
109
109
  this.log("info", `selected voice: name: "${voice.name}"${info}`)
110
110
 
111
- /* perform text-to-speech operation with Elevenlabs API */
111
+ /* perform text-to-speech operation with ElevenLabs API */
112
112
  const model = this.params.optimize === "quality" ?
113
113
  "eleven_turbo_v2_5" :
114
114
  "eleven_flash_v2_5"
@@ -131,7 +131,7 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
131
  })
132
132
  }
133
133
 
134
- /* establish resampler from ElevenLabs's tier-dependent
134
+ /* establish resampler from ElevenLabs tier-dependent
135
135
  output sample rate to our standard audio sample rate (48KHz) */
136
136
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
137
137
 
@@ -36,8 +36,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
36
36
  key: { type: "string", val: process.env.SPEECHFLOW_GOOGLE_KEY ?? "" },
37
37
  voice: { type: "string", pos: 0, val: "en-US-Neural2-J" },
38
38
  language: { type: "string", pos: 1, val: "en-US" },
39
- speed: { type: "number", pos: 2, val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
40
- pitch: { type: "number", pos: 3, val: 0.0, match: (n: number) => n >= -20.0 && n <= 20.0 }
39
+ speed: { type: "number", pos: 2, val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
40
+ pitch: { type: "number", pos: 3, val: 0.0, match: (n: number) => n >= -20.0 && n <= 20.0 }
41
41
  })
42
42
 
43
43
  /* validate API key */
@@ -139,7 +139,7 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
139
139
  const samples = result.audio
140
140
  const outputSampleRate = result.sampling_rate
141
141
  if (outputSampleRate !== this.sampleRate)
142
- this.log("warn", `unexpected sample rate ${outputSampleRate}Hz (expected ${this.sampleRate}Hz)`)
142
+ this.log("warning", `unexpected sample rate ${outputSampleRate}Hz (expected ${this.sampleRate}Hz)`)
143
143
 
144
144
  /* calculate duration */
145
145
  const duration = samples.length / outputSampleRate
@@ -85,15 +85,16 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
85
85
  const out = await this.client!.send(cmd)
86
86
  return (out.TranslatedText ?? "").trim()
87
87
  }
88
- catch (e: any) {
88
+ catch (e: unknown) {
89
89
  lastError = e
90
90
  attempt += 1
91
91
 
92
92
  /* simple backoff for transient errors */
93
+ const err = e as { name?: string, $retryable?: boolean }
93
94
  const retriable =
94
- e?.name === "ThrottlingException"
95
- || e?.name === "ServiceUnavailableException"
96
- || e?.$retryable === true
95
+ err?.name === "ThrottlingException"
96
+ || err?.name === "ServiceUnavailableException"
97
+ || err?.$retryable === true
97
98
  if (!retriable || attempt >= maxRetries)
98
99
  break
99
100
  const delayMs = Math.min(1000 * Math.pow(2, attempt - 1), 5000)
@@ -135,17 +136,17 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
135
136
 
136
137
  /* close node */
137
138
  async close () {
138
- /* close Amazon Translate connection */
139
- if (this.client !== null) {
140
- this.client.destroy()
141
- this.client = null
142
- }
143
-
144
139
  /* shutdown stream */
145
140
  if (this.stream !== null) {
146
141
  await util.destroyStream(this.stream)
147
142
  this.stream = null
148
143
  }
144
+
145
+ /* close Amazon Translate connection */
146
+ if (this.client !== null) {
147
+ this.client.destroy()
148
+ this.client = null
149
+ }
149
150
  }
150
151
  }
151
152
 
@@ -53,7 +53,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
53
53
  const usage = await deepl.getUsage()
54
54
  const limit = usage?.character?.limit ?? 1
55
55
  const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
56
- return { usage: `${percent.toFixed(8)}%` }
56
+ return { usage: `${percent.toFixed(2)}%` }
57
57
  }
58
58
 
59
59
  /* open node */
@@ -63,7 +63,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
63
63
 
64
64
  /* provide text-to-text translation */
65
65
  const translate = async (text: string) => {
66
- const src = this.params.src === "en" ? "en-US" : this.params.src
66
+ const src = this.params.src
67
67
  const dst = this.params.dst === "en" ? "en-US" : this.params.dst
68
68
  const result = await this.deepl!.translateText(text, src, dst, {
69
69
  splitSentences: "off",
@@ -95,7 +95,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
95
95
  this.push(chunkNew)
96
96
  callback()
97
97
  }).catch((error: unknown) => {
98
- callback(util.ensureError(error))
98
+ callback(util.ensureError(error, "DeepL translation failed"))
99
99
  })
100
100
  }
101
101
  },