speechflow 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/etc/stx.conf +54 -58
- package/package.json +25 -106
- package/{etc → speechflow-cli/etc}/eslint.mjs +1 -2
- package/speechflow-cli/etc/stx.conf +77 -0
- package/speechflow-cli/package.json +116 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +148 -64
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +39 -11
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +384 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +27 -11
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +313 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +59 -12
- package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +11 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +9 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +2 -2
- package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +37 -20
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +4 -3
- package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +9 -5
- package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +16 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +12 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +9 -3
- package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +5 -2
- package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +12 -12
- package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
- package/{src → speechflow-cli/src}/speechflow-utils.ts +78 -44
- package/{src → speechflow-cli/src}/speechflow.ts +188 -53
- package/speechflow-ui-db/etc/eslint.mjs +106 -0
- package/speechflow-ui-db/etc/htmllint.json +55 -0
- package/speechflow-ui-db/etc/stx.conf +79 -0
- package/speechflow-ui-db/etc/stylelint.js +46 -0
- package/speechflow-ui-db/etc/stylelint.yaml +33 -0
- package/speechflow-ui-db/etc/tsc-client.json +30 -0
- package/speechflow-ui-db/etc/tsc.node.json +9 -0
- package/speechflow-ui-db/etc/vite-client.mts +63 -0
- package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-db/package.json +75 -0
- package/speechflow-ui-db/src/app-icon.ai +1989 -4
- package/speechflow-ui-db/src/app-icon.svg +26 -0
- package/speechflow-ui-db/src/app.styl +64 -0
- package/speechflow-ui-db/src/app.vue +221 -0
- package/speechflow-ui-db/src/index.html +23 -0
- package/speechflow-ui-db/src/index.ts +26 -0
- package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
- package/speechflow-ui-db/src/tsconfig.json +3 -0
- package/speechflow-ui-st/etc/eslint.mjs +106 -0
- package/speechflow-ui-st/etc/htmllint.json +55 -0
- package/speechflow-ui-st/etc/stx.conf +79 -0
- package/speechflow-ui-st/etc/stylelint.js +46 -0
- package/speechflow-ui-st/etc/stylelint.yaml +33 -0
- package/speechflow-ui-st/etc/tsc-client.json +30 -0
- package/speechflow-ui-st/etc/tsc.node.json +9 -0
- package/speechflow-ui-st/etc/vite-client.mts +63 -0
- package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-st/package.json +79 -0
- package/speechflow-ui-st/src/app-icon.ai +1989 -4
- package/speechflow-ui-st/src/app-icon.svg +26 -0
- package/speechflow-ui-st/src/app.styl +64 -0
- package/speechflow-ui-st/src/app.vue +142 -0
- package/speechflow-ui-st/src/index.html +23 -0
- package/speechflow-ui-st/src/index.ts +26 -0
- package/speechflow-ui-st/src/lib.d.ts +9 -0
- package/speechflow-ui-st/src/tsconfig.json +3 -0
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
- package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
- package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
- package/dst/speechflow-node-a2a-gender.d.ts +0 -18
- package/dst/speechflow-node-a2a-gender.js +0 -271
- package/dst/speechflow-node-a2a-gender.js.map +0 -1
- package/dst/speechflow-node-a2a-meter.d.ts +0 -12
- package/dst/speechflow-node-a2a-meter.js +0 -155
- package/dst/speechflow-node-a2a-meter.js.map +0 -1
- package/dst/speechflow-node-a2a-mute.d.ts +0 -16
- package/dst/speechflow-node-a2a-mute.js +0 -91
- package/dst/speechflow-node-a2a-mute.js.map +0 -1
- package/dst/speechflow-node-a2a-vad.d.ts +0 -16
- package/dst/speechflow-node-a2a-vad.js +0 -285
- package/dst/speechflow-node-a2a-vad.js.map +0 -1
- package/dst/speechflow-node-a2a-wav.d.ts +0 -11
- package/dst/speechflow-node-a2a-wav.js +0 -195
- package/dst/speechflow-node-a2a-wav.js.map +0 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +0 -15
- package/dst/speechflow-node-a2t-deepgram.js +0 -255
- package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -16
- package/dst/speechflow-node-t2a-elevenlabs.js +0 -195
- package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +0 -13
- package/dst/speechflow-node-t2a-kokoro.js +0 -149
- package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
- package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
- package/dst/speechflow-node-t2t-deepl.js +0 -142
- package/dst/speechflow-node-t2t-deepl.js.map +0 -1
- package/dst/speechflow-node-t2t-format.d.ts +0 -11
- package/dst/speechflow-node-t2t-format.js +0 -82
- package/dst/speechflow-node-t2t-format.js.map +0 -1
- package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
- package/dst/speechflow-node-t2t-ollama.js +0 -247
- package/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/dst/speechflow-node-t2t-openai.d.ts +0 -13
- package/dst/speechflow-node-t2t-openai.js +0 -227
- package/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
- package/dst/speechflow-node-t2t-sentence.js +0 -234
- package/dst/speechflow-node-t2t-sentence.js.map +0 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -13
- package/dst/speechflow-node-t2t-subtitle.js +0 -278
- package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
- package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
- package/dst/speechflow-node-t2t-transformers.js +0 -265
- package/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/dst/speechflow-node-x2x-filter.d.ts +0 -11
- package/dst/speechflow-node-x2x-filter.js +0 -117
- package/dst/speechflow-node-x2x-filter.js.map +0 -1
- package/dst/speechflow-node-x2x-trace.d.ts +0 -11
- package/dst/speechflow-node-x2x-trace.js +0 -111
- package/dst/speechflow-node-x2x-trace.js.map +0 -1
- package/dst/speechflow-node-xio-device.d.ts +0 -13
- package/dst/speechflow-node-xio-device.js +0 -226
- package/dst/speechflow-node-xio-device.js.map +0 -1
- package/dst/speechflow-node-xio-file.d.ts +0 -11
- package/dst/speechflow-node-xio-file.js +0 -210
- package/dst/speechflow-node-xio-file.js.map +0 -1
- package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
- package/dst/speechflow-node-xio-mqtt.js +0 -185
- package/dst/speechflow-node-xio-mqtt.js.map +0 -1
- package/dst/speechflow-node-xio-websocket.d.ts +0 -13
- package/dst/speechflow-node-xio-websocket.js +0 -278
- package/dst/speechflow-node-xio-websocket.js.map +0 -1
- package/dst/speechflow-node.d.ts +0 -65
- package/dst/speechflow-node.js +0 -180
- package/dst/speechflow-node.js.map +0 -1
- package/dst/speechflow-utils.d.ts +0 -69
- package/dst/speechflow-utils.js +0 -486
- package/dst/speechflow-utils.js.map +0 -1
- package/dst/speechflow.js +0 -768
- package/dst/speechflow.js.map +0 -1
- package/src/speechflow-node-a2a-meter.ts +0 -130
- package/src/speechflow-node-a2a-vad.ts +0 -285
- package/src/speechflow-node-a2t-deepgram.ts +0 -234
- package/src/speechflow-node-t2t-subtitle.ts +0 -149
- /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
- /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
- /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
- /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
- /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import * as Deepgram from "@deepgram/sdk"
|
|
12
|
+
import { DateTime, Duration } from "luxon"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
import * as utils from "./speechflow-utils"
|
|
17
|
+
|
|
18
|
+
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
19
|
+
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
20
|
+
/* declare official node name */
|
|
21
|
+
public static name = "deepgram"
|
|
22
|
+
|
|
23
|
+
/* internal state */
|
|
24
|
+
private dg: Deepgram.LiveClient | null = null
|
|
25
|
+
private destroyed = false
|
|
26
|
+
private initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
28
|
+
private queue: utils.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
|
|
30
|
+
/* construct node */
|
|
31
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
32
|
+
super(id, cfg, opts, args)
|
|
33
|
+
|
|
34
|
+
/* declare node configuration parameters */
|
|
35
|
+
this.configure({
|
|
36
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
|
|
37
|
+
keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
|
|
38
|
+
model: { type: "string", val: "nova-2", pos: 0 },
|
|
39
|
+
version: { type: "string", val: "latest", pos: 1 },
|
|
40
|
+
language: { type: "string", val: "multi", pos: 2 },
|
|
41
|
+
interim: { type: "boolean", val: false, pos: 3 }
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
/* declare node input/output format */
|
|
45
|
+
this.input = "audio"
|
|
46
|
+
this.output = "text"
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/* one-time status of node */
|
|
50
|
+
async status () {
|
|
51
|
+
let balance = 0
|
|
52
|
+
try {
|
|
53
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
54
|
+
const response = await deepgram.manage.getProjects()
|
|
55
|
+
if (response !== null && response.error === null) {
|
|
56
|
+
for (const project of response.result.projects) {
|
|
57
|
+
const response = await deepgram.manage.getProjectBalances(project.project_id)
|
|
58
|
+
if (response !== null && response.error === null)
|
|
59
|
+
balance += response.result.balances[0]?.amount ?? 0
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
this.log("warning", `failed to fetch balance: ${error}`)
|
|
65
|
+
}
|
|
66
|
+
return { balance: balance.toFixed(2) }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/* open node */
|
|
70
|
+
async open () {
|
|
71
|
+
/* sanity check situation */
|
|
72
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
73
|
+
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
74
|
+
|
|
75
|
+
/* clear destruction flag */
|
|
76
|
+
this.destroyed = false
|
|
77
|
+
|
|
78
|
+
/* create queue for results */
|
|
79
|
+
this.queue = new utils.SingleQueue<SpeechFlowChunk | null>()
|
|
80
|
+
|
|
81
|
+
/* create a store for the meta information */
|
|
82
|
+
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
83
|
+
|
|
84
|
+
/* connect to Deepgram API */
|
|
85
|
+
const deepgram = Deepgram.createClient(this.params.key)
|
|
86
|
+
let language = "en"
|
|
87
|
+
if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
|
|
88
|
+
language = this.params.language
|
|
89
|
+
else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
|
|
90
|
+
language = "multi"
|
|
91
|
+
this.dg = deepgram.listen.live({
|
|
92
|
+
mip_opt_out: true,
|
|
93
|
+
model: this.params.model,
|
|
94
|
+
version: this.params.version,
|
|
95
|
+
language,
|
|
96
|
+
channels: this.config.audioChannels,
|
|
97
|
+
sample_rate: this.config.audioSampleRate,
|
|
98
|
+
encoding: "linear16",
|
|
99
|
+
multichannel: false,
|
|
100
|
+
endpointing: false,
|
|
101
|
+
interim_results: this.params.interim,
|
|
102
|
+
smart_format: true,
|
|
103
|
+
punctuate: true,
|
|
104
|
+
filler_words: true,
|
|
105
|
+
numerals: true,
|
|
106
|
+
diarize: false,
|
|
107
|
+
profanity_filter: false,
|
|
108
|
+
redact: false
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
/* hook onto Deepgram API events */
|
|
112
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
113
|
+
if (this.destroyed || this.queue === null)
|
|
114
|
+
return
|
|
115
|
+
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
116
|
+
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
117
|
+
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
118
|
+
const isFinal = (data.is_final ?? false) as boolean
|
|
119
|
+
if (text === "")
|
|
120
|
+
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
|
|
121
|
+
else {
|
|
122
|
+
this.log("info", `text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s): "${text}"`)
|
|
123
|
+
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
124
|
+
const end = start.plus({ seconds: data.duration })
|
|
125
|
+
const metas = metastore.fetch(start, end)
|
|
126
|
+
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
127
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
128
|
+
return prev
|
|
129
|
+
}, new Map<string, any>())
|
|
130
|
+
metastore.prune(start)
|
|
131
|
+
meta.set("words", words.map((word) => {
|
|
132
|
+
const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
|
|
133
|
+
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
134
|
+
return { word: word.punctuated_word ?? word.word, start, end }
|
|
135
|
+
}))
|
|
136
|
+
const chunk = new SpeechFlowChunk(start, end,
|
|
137
|
+
isFinal ? "final" : "intermediate", "text", text, meta)
|
|
138
|
+
this.queue.write(chunk)
|
|
139
|
+
}
|
|
140
|
+
})
|
|
141
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => {
|
|
142
|
+
this.log("info", "speech started", data)
|
|
143
|
+
})
|
|
144
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => {
|
|
145
|
+
this.log("info", "utterance end received", data)
|
|
146
|
+
})
|
|
147
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
148
|
+
this.log("info", "metadata received")
|
|
149
|
+
})
|
|
150
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
151
|
+
this.log("info", "connection close")
|
|
152
|
+
if (!this.destroyed && this.queue !== null)
|
|
153
|
+
this.queue.write(null)
|
|
154
|
+
})
|
|
155
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
156
|
+
this.log("error", `error: ${error.message}`)
|
|
157
|
+
if (!this.destroyed && this.queue !== null)
|
|
158
|
+
this.queue.write(null)
|
|
159
|
+
this.emit("error")
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
/* wait for Deepgram API to be available */
|
|
163
|
+
await new Promise((resolve, reject) => {
|
|
164
|
+
this.connectionTimeout = setTimeout(() => {
|
|
165
|
+
if (this.connectionTimeout !== null) {
|
|
166
|
+
this.connectionTimeout = null
|
|
167
|
+
reject(new Error("Deepgram: timeout waiting for connection open"))
|
|
168
|
+
}
|
|
169
|
+
}, 8000)
|
|
170
|
+
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
171
|
+
this.log("info", "connection open")
|
|
172
|
+
if (this.connectionTimeout !== null) {
|
|
173
|
+
clearTimeout(this.connectionTimeout)
|
|
174
|
+
this.connectionTimeout = null
|
|
175
|
+
}
|
|
176
|
+
resolve(true)
|
|
177
|
+
})
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
/* remember opening time to receive time zero offset */
|
|
181
|
+
this.timeOpen = DateTime.now()
|
|
182
|
+
|
|
183
|
+
/* provide Duplex stream and internally attach to Deepgram API */
|
|
184
|
+
const self = this
|
|
185
|
+
this.stream = new Stream.Duplex({
|
|
186
|
+
writableObjectMode: true,
|
|
187
|
+
readableObjectMode: true,
|
|
188
|
+
decodeStrings: false,
|
|
189
|
+
highWaterMark: 1,
|
|
190
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
191
|
+
if (self.destroyed || self.dg === null) {
|
|
192
|
+
callback(new Error("stream already destroyed"))
|
|
193
|
+
return
|
|
194
|
+
}
|
|
195
|
+
if (chunk.type !== "audio")
|
|
196
|
+
callback(new Error("expected audio input chunk"))
|
|
197
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
198
|
+
callback(new Error("expected Buffer input chunk"))
|
|
199
|
+
else {
|
|
200
|
+
if (chunk.payload.byteLength > 0) {
|
|
201
|
+
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
202
|
+
if (chunk.meta.size > 0)
|
|
203
|
+
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
204
|
+
try {
|
|
205
|
+
self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
206
|
+
}
|
|
207
|
+
catch (error) {
|
|
208
|
+
callback(error instanceof Error ? error : new Error("failed to send to Deepgram"))
|
|
209
|
+
return
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
callback()
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
read (size) {
|
|
216
|
+
if (self.destroyed || self.queue === null) {
|
|
217
|
+
this.push(null)
|
|
218
|
+
return
|
|
219
|
+
}
|
|
220
|
+
let readTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
221
|
+
if (readTimeout !== null) {
|
|
222
|
+
readTimeout = null
|
|
223
|
+
if (!self.destroyed) {
|
|
224
|
+
self.log("warning", "read timeout - pushing null to prevent hanging")
|
|
225
|
+
this.push(null)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}, 30 * 1000)
|
|
229
|
+
self.queue.read().then((chunk) => {
|
|
230
|
+
if (readTimeout !== null) {
|
|
231
|
+
clearTimeout(readTimeout)
|
|
232
|
+
readTimeout = null
|
|
233
|
+
}
|
|
234
|
+
if (self.destroyed) {
|
|
235
|
+
this.push(null)
|
|
236
|
+
return
|
|
237
|
+
}
|
|
238
|
+
if (chunk === null) {
|
|
239
|
+
self.log("info", "received EOF signal")
|
|
240
|
+
this.push(null)
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
244
|
+
this.push(chunk, self.config.textEncoding)
|
|
245
|
+
}
|
|
246
|
+
}).catch((error) => {
|
|
247
|
+
if (readTimeout !== null) {
|
|
248
|
+
clearTimeout(readTimeout)
|
|
249
|
+
readTimeout = null
|
|
250
|
+
}
|
|
251
|
+
if (!self.destroyed) {
|
|
252
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
253
|
+
this.push(null)
|
|
254
|
+
}
|
|
255
|
+
})
|
|
256
|
+
},
|
|
257
|
+
final (callback) {
|
|
258
|
+
if (self.destroyed || self.dg === null) {
|
|
259
|
+
callback()
|
|
260
|
+
return
|
|
261
|
+
}
|
|
262
|
+
try {
|
|
263
|
+
self.dg.requestClose()
|
|
264
|
+
}
|
|
265
|
+
catch (error) {
|
|
266
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
267
|
+
}
|
|
268
|
+
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
269
|
+
callback()
|
|
270
|
+
}
|
|
271
|
+
})
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/* close node */
|
|
275
|
+
async close () {
|
|
276
|
+
/* indicate destruction first to stop all async operations */
|
|
277
|
+
this.destroyed = true
|
|
278
|
+
|
|
279
|
+
/* cleanup all timers */
|
|
280
|
+
if (this.initTimeout !== null) {
|
|
281
|
+
clearTimeout(this.initTimeout)
|
|
282
|
+
this.initTimeout = null
|
|
283
|
+
}
|
|
284
|
+
if (this.connectionTimeout !== null) {
|
|
285
|
+
clearTimeout(this.connectionTimeout)
|
|
286
|
+
this.connectionTimeout = null
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/* close stream */
|
|
290
|
+
if (this.stream !== null) {
|
|
291
|
+
this.stream.destroy()
|
|
292
|
+
this.stream = null
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/* close Deepgram connection and remove listeners */
|
|
296
|
+
if (this.dg !== null) {
|
|
297
|
+
try {
|
|
298
|
+
this.dg.removeAllListeners()
|
|
299
|
+
this.dg.requestClose()
|
|
300
|
+
}
|
|
301
|
+
catch (error) {
|
|
302
|
+
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
303
|
+
}
|
|
304
|
+
this.dg = null
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/* signal EOF to any pending read operations */
|
|
308
|
+
if (this.queue !== null) {
|
|
309
|
+
this.queue.write(null)
|
|
310
|
+
this.queue = null
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
@@ -23,6 +23,8 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
25
25
|
private static speexInitialized = false
|
|
26
|
+
private destroyed = false
|
|
27
|
+
private resampler: SpeexResampler | null = null
|
|
26
28
|
|
|
27
29
|
/* construct node */
|
|
28
30
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -39,6 +41,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
39
41
|
optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
|
|
40
42
|
})
|
|
41
43
|
|
|
44
|
+
/* sanity check parameters */
|
|
45
|
+
if (!this.params.key)
|
|
46
|
+
throw new Error("ElevenLabs API key not configured")
|
|
47
|
+
|
|
42
48
|
/* declare node input/output format */
|
|
43
49
|
this.input = "text"
|
|
44
50
|
this.output = "audio"
|
|
@@ -54,6 +60,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
54
60
|
|
|
55
61
|
/* open node */
|
|
56
62
|
async open () {
|
|
63
|
+
/* clear destruction flag */
|
|
64
|
+
this.destroyed = false
|
|
65
|
+
|
|
57
66
|
/* establish ElevenLabs API connection */
|
|
58
67
|
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
59
68
|
apiKey: this.params.key
|
|
@@ -120,37 +129,68 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
120
129
|
await SpeexResampler.initPromise
|
|
121
130
|
SpeechFlowNodeElevenlabs.speexInitialized = true
|
|
122
131
|
}
|
|
123
|
-
|
|
132
|
+
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
124
133
|
|
|
125
134
|
/* create transform stream and connect it to the ElevenLabs API */
|
|
126
|
-
const
|
|
135
|
+
const self = this
|
|
127
136
|
this.stream = new Stream.Transform({
|
|
128
137
|
writableObjectMode: true,
|
|
129
138
|
readableObjectMode: true,
|
|
130
139
|
decodeStrings: false,
|
|
131
140
|
highWaterMark: 1,
|
|
132
141
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
142
|
+
if (self.destroyed) {
|
|
143
|
+
callback(new Error("stream already destroyed"))
|
|
144
|
+
return
|
|
145
|
+
}
|
|
133
146
|
if (Buffer.isBuffer(chunk.payload))
|
|
134
147
|
callback(new Error("invalid chunk payload type"))
|
|
135
148
|
else {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
149
|
+
(async () => {
|
|
150
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
151
|
+
processTimeout = null
|
|
152
|
+
callback(new Error("ElevenLabs API timeout"))
|
|
153
|
+
}, 60 * 1000)
|
|
154
|
+
const clearProcessTimeout = () => {
|
|
155
|
+
if (processTimeout !== null) {
|
|
156
|
+
clearTimeout(processTimeout)
|
|
157
|
+
processTimeout = null
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
try {
|
|
161
|
+
const stream = await speechStream(chunk.payload as string)
|
|
162
|
+
if (self.destroyed) {
|
|
163
|
+
clearProcessTimeout()
|
|
164
|
+
callback(new Error("stream destroyed during processing"))
|
|
165
|
+
return
|
|
166
|
+
}
|
|
167
|
+
const buffer = await getStreamAsBuffer(stream)
|
|
168
|
+
if (self.destroyed) {
|
|
169
|
+
clearProcessTimeout()
|
|
170
|
+
callback(new Error("stream destroyed during processing"))
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
const bufferResampled = self.resampler!.processChunk(buffer)
|
|
174
|
+
self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
140
175
|
const chunkNew = chunk.clone()
|
|
141
176
|
chunkNew.type = "audio"
|
|
142
177
|
chunkNew.payload = bufferResampled
|
|
178
|
+
clearProcessTimeout()
|
|
143
179
|
this.push(chunkNew)
|
|
144
180
|
callback()
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
})
|
|
181
|
+
}
|
|
182
|
+
catch (error) {
|
|
183
|
+
clearProcessTimeout()
|
|
184
|
+
callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
|
|
185
|
+
}
|
|
186
|
+
})()
|
|
151
187
|
}
|
|
152
188
|
},
|
|
153
189
|
final (callback) {
|
|
190
|
+
if (self.destroyed) {
|
|
191
|
+
callback()
|
|
192
|
+
return
|
|
193
|
+
}
|
|
154
194
|
this.push(null)
|
|
155
195
|
callback()
|
|
156
196
|
}
|
|
@@ -159,12 +199,19 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
159
199
|
|
|
160
200
|
/* close node */
|
|
161
201
|
async close () {
|
|
202
|
+
/* indicate destruction */
|
|
203
|
+
this.destroyed = true
|
|
204
|
+
|
|
162
205
|
/* destroy stream */
|
|
163
206
|
if (this.stream !== null) {
|
|
164
207
|
this.stream.destroy()
|
|
165
208
|
this.stream = null
|
|
166
209
|
}
|
|
167
210
|
|
|
211
|
+
/* destroy resampler */
|
|
212
|
+
if (this.resampler !== null)
|
|
213
|
+
this.resampler = null
|
|
214
|
+
|
|
168
215
|
/* destroy ElevenLabs API */
|
|
169
216
|
if (this.elevenlabs !== null)
|
|
170
217
|
this.elevenlabs = null
|
|
@@ -21,6 +21,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
23
|
private kokoro: KokoroTTS | null = null
|
|
24
|
+
private resampler: SpeexResampler | null = null
|
|
24
25
|
private static speexInitialized = false
|
|
25
26
|
|
|
26
27
|
/* construct node */
|
|
@@ -59,9 +60,11 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
59
60
|
const interval = setInterval(() => {
|
|
60
61
|
for (const [ artifact, percent ] of progressState) {
|
|
61
62
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
62
|
-
if (percent >=
|
|
63
|
+
if (percent >= 100.0)
|
|
63
64
|
progressState.delete(artifact)
|
|
64
65
|
}
|
|
66
|
+
if (progressState.size === 0)
|
|
67
|
+
clearInterval(interval)
|
|
65
68
|
}, 1000)
|
|
66
69
|
this.kokoro = await KokoroTTS.from_pretrained(model, {
|
|
67
70
|
dtype: "q4f16",
|
|
@@ -78,7 +81,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
78
81
|
await SpeexResampler.initPromise
|
|
79
82
|
SpeechFlowNodeKokoro.speexInitialized = true
|
|
80
83
|
}
|
|
81
|
-
|
|
84
|
+
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
82
85
|
|
|
83
86
|
/* determine voice for text-to-speech operation */
|
|
84
87
|
const voices = {
|
|
@@ -91,7 +94,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
91
94
|
if (voice === undefined)
|
|
92
95
|
throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
|
|
93
96
|
|
|
94
|
-
/* perform text-to-speech operation with
|
|
97
|
+
/* perform text-to-speech operation with Kokoro API */
|
|
95
98
|
const text2speech = async (text: string) => {
|
|
96
99
|
this.log("info", `Kokoro: input: "${text}"`)
|
|
97
100
|
const audio = await this.kokoro!.generate(text, {
|
|
@@ -110,7 +113,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
110
113
|
}
|
|
111
114
|
|
|
112
115
|
/* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
|
|
113
|
-
const buffer2 = resampler
|
|
116
|
+
const buffer2 = this.resampler!.processChunk(buffer1)
|
|
114
117
|
|
|
115
118
|
return buffer2
|
|
116
119
|
}
|
|
@@ -153,6 +156,10 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
153
156
|
this.stream = null
|
|
154
157
|
}
|
|
155
158
|
|
|
159
|
+
/* destroy resampler */
|
|
160
|
+
if (this.resampler !== null)
|
|
161
|
+
this.resampler = null
|
|
162
|
+
|
|
156
163
|
/* destroy Kokoro API */
|
|
157
164
|
if (this.kokoro !== null)
|
|
158
165
|
this.kokoro = null
|
|
@@ -27,12 +27,16 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
27
27
|
|
|
28
28
|
/* declare node configuration parameters */
|
|
29
29
|
this.configure({
|
|
30
|
-
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY },
|
|
30
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY ?? "" },
|
|
31
31
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
32
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
33
33
|
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
34
34
|
})
|
|
35
35
|
|
|
36
|
+
/* validate API key */
|
|
37
|
+
if (this.params.key === "")
|
|
38
|
+
throw new Error("DeepL API key is required")
|
|
39
|
+
|
|
36
40
|
/* sanity check situation */
|
|
37
41
|
if (this.params.src === this.params.dst)
|
|
38
42
|
throw new Error("source and destination languages cannot be the same")
|
|
@@ -44,9 +48,10 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
44
48
|
|
|
45
49
|
/* one-time status of node */
|
|
46
50
|
async status () {
|
|
47
|
-
|
|
48
|
-
const usage = await
|
|
49
|
-
const
|
|
51
|
+
const deepl = new DeepL.Translator(this.params.key)
|
|
52
|
+
const usage = await deepl.getUsage()
|
|
53
|
+
const limit = usage?.character?.limit ?? 1
|
|
54
|
+
const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
|
|
50
55
|
return { usage: `${percent.toFixed(8)}%` }
|
|
51
56
|
}
|
|
52
57
|
|
|
@@ -41,7 +41,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
41
41
|
return text
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
/* establish a duplex stream and connect it to
|
|
44
|
+
/* establish a duplex stream and connect it to text formatting */
|
|
45
45
|
this.stream = new Stream.Transform({
|
|
46
46
|
readableObjectMode: true,
|
|
47
47
|
writableObjectMode: true,
|
|
@@ -74,7 +74,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
74
74
|
})
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
/*
|
|
77
|
+
/* close node */
|
|
78
78
|
async close () {
|
|
79
79
|
/* close stream */
|
|
80
80
|
if (this.stream !== null) {
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|