speechflow 2.0.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +34 -5
- package/etc/speechflow.yaml +20 -48
- package/etc/stx.conf +2 -2
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +60 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +234 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js +275 -0
- package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +32 -15
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +26 -6
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +72 -5
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +1 -0
- package/speechflow-cli/package.d/sherpa-onnx+1.12.23.patch +12 -0
- package/speechflow-cli/package.json +20 -17
- package/speechflow-cli/src/lib.d.ts +30 -4
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +68 -0
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +219 -0
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +33 -15
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +30 -11
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +86 -10
- package/speechflow-ui-db/dst/index.css +1 -1
- package/speechflow-ui-db/dst/index.js +13 -13
- package/speechflow-ui-db/package.json +12 -11
- package/speechflow-ui-db/src/app.vue +62 -17
- package/speechflow-ui-st/dst/index.css +1 -1
- package/speechflow-ui-st/dst/index.js +32 -32
- package/speechflow-ui-st/package.json +13 -12
- package/speechflow-ui-st/src/app.vue +9 -8
|
@@ -81,11 +81,11 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
81
81
|
|
|
82
82
|
/* grab the accumulated chunk data */
|
|
83
83
|
const chunkData = this.chunkBuffer
|
|
84
|
-
this.chunkBuffer =
|
|
84
|
+
this.chunkBuffer = chunkData.subarray(samplesPerChunk)
|
|
85
85
|
|
|
86
86
|
/* update internal audio sample sliding window for LUFS-M */
|
|
87
87
|
if (chunkData.length > sampleWindow.length)
|
|
88
|
-
sampleWindow.set(chunkData.subarray(
|
|
88
|
+
sampleWindow.set(chunkData.subarray(0, sampleWindow.length), 0)
|
|
89
89
|
else {
|
|
90
90
|
sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
|
|
91
91
|
sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
|
|
@@ -37,7 +37,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
37
37
|
model: { type: "string", val: "nova-2", pos: 0 },
|
|
38
38
|
version: { type: "string", val: "latest", pos: 1 },
|
|
39
39
|
language: { type: "string", val: "multi", pos: 2 },
|
|
40
|
-
interim: { type: "boolean", val: false, pos: 3 }
|
|
40
|
+
interim: { type: "boolean", val: false, pos: 3 },
|
|
41
|
+
keywords: { type: "string", val: "", pos: 4 }
|
|
41
42
|
})
|
|
42
43
|
|
|
43
44
|
/* sanity check parameters */
|
|
@@ -86,34 +87,51 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
86
87
|
/* create a store for the meta information */
|
|
87
88
|
const metastore = new util.TimeStore<Map<string, any>>()
|
|
88
89
|
|
|
89
|
-
/*
|
|
90
|
-
const
|
|
91
|
-
let language = "en"
|
|
92
|
-
if (this.params.language !== "en") {
|
|
93
|
-
if (this.params.model.match(/^nova-2/))
|
|
94
|
-
language = this.params.language
|
|
95
|
-
else if (this.params.model.match(/^nova-3/))
|
|
96
|
-
language = "multi"
|
|
97
|
-
}
|
|
98
|
-
this.dg = deepgram.listen.live({
|
|
90
|
+
/* configure Deepgram connection options */
|
|
91
|
+
const options: Deepgram.LiveSchema = {
|
|
99
92
|
mip_opt_out: true,
|
|
100
93
|
model: this.params.model,
|
|
101
94
|
version: this.params.version,
|
|
102
|
-
language,
|
|
103
95
|
channels: this.config.audioChannels,
|
|
104
96
|
sample_rate: this.config.audioSampleRate,
|
|
105
97
|
encoding: "linear16",
|
|
106
98
|
multichannel: false,
|
|
107
99
|
endpointing: false,
|
|
108
100
|
interim_results: this.params.interim,
|
|
109
|
-
smart_format:
|
|
101
|
+
smart_format: false,
|
|
110
102
|
punctuate: true,
|
|
111
103
|
filler_words: true,
|
|
112
|
-
numerals:
|
|
104
|
+
numerals: false,
|
|
113
105
|
diarize: false,
|
|
114
106
|
profanity_filter: false,
|
|
115
107
|
redact: false
|
|
116
|
-
}
|
|
108
|
+
}
|
|
109
|
+
const model = this.params.model as string
|
|
110
|
+
const language = this.params.language as string
|
|
111
|
+
const keywords = this.params.keywords as string
|
|
112
|
+
if (model.match(/^nova-2/) && language !== "en")
|
|
113
|
+
options.language = this.params.language
|
|
114
|
+
else if (model.match(/^nova-3/) && language !== "en")
|
|
115
|
+
options.language = "multi"
|
|
116
|
+
else
|
|
117
|
+
options.language = "en"
|
|
118
|
+
if (keywords !== "") {
|
|
119
|
+
if (model.match(/^nova-2/))
|
|
120
|
+
options.keywords = keywords.split(/(?:\s+|\s*,\s*)/).map((kw) => {
|
|
121
|
+
let boost = 2
|
|
122
|
+
if (kw.startsWith("-")) {
|
|
123
|
+
kw = kw.slice(1)
|
|
124
|
+
boost = -4
|
|
125
|
+
}
|
|
126
|
+
return `${kw}:${boost}`
|
|
127
|
+
})
|
|
128
|
+
else if (model.match(/^nova-3/))
|
|
129
|
+
options.keyterm = keywords.split(/(?:\s+|\s*,\s*)/).join(" ")
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/* connect to Deepgram API */
|
|
133
|
+
const deepgram = Deepgram.createClient(this.params.key)
|
|
134
|
+
this.dg = deepgram.listen.live(options)
|
|
117
135
|
|
|
118
136
|
/* hook onto Deepgram API events */
|
|
119
137
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
@@ -63,7 +63,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
|
|
|
63
63
|
|
|
64
64
|
/* provide text-to-text translation */
|
|
65
65
|
const translate = async (text: string) => {
|
|
66
|
-
const src = this.params.src
|
|
66
|
+
const src = this.params.src
|
|
67
67
|
const dst = this.params.dst === "en" ? "en-US" : this.params.dst
|
|
68
68
|
const result = await this.deepl!.translateText(text, src, dst, {
|
|
69
69
|
splitSentences: "off",
|
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import BadWordsNext
|
|
12
|
-
import en
|
|
13
|
-
import de
|
|
11
|
+
import BadWordsNext from "bad-words-next"
|
|
12
|
+
import en from "bad-words-next/lib/en"
|
|
13
|
+
import de from "bad-words-next/lib/de"
|
|
14
|
+
import { Profanity, CensorType } from "@2toad/profanity"
|
|
14
15
|
|
|
15
16
|
/* internal dependencies */
|
|
16
17
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -31,8 +32,7 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
|
|
|
31
32
|
/* declare node configuration parameters */
|
|
32
33
|
this.configure({
|
|
33
34
|
lang: { type: "string", val: "en", match: /^(?:en|de)$/ },
|
|
34
|
-
placeholder: { type: "string", val: "***" }
|
|
35
|
-
mode: { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
|
|
35
|
+
placeholder: { type: "string", val: "***" }
|
|
36
36
|
})
|
|
37
37
|
|
|
38
38
|
/* declare node input/output format */
|
|
@@ -42,18 +42,37 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
|
|
|
42
42
|
|
|
43
43
|
/* open node */
|
|
44
44
|
async open () {
|
|
45
|
-
/* create profanity filter
|
|
46
|
-
const
|
|
45
|
+
/* create profanity filter instances */
|
|
46
|
+
const filter1 = util.run("creating profanity filter 1", () =>
|
|
47
47
|
new BadWordsNext({
|
|
48
48
|
data: langData[this.params.lang],
|
|
49
49
|
placeholder: this.params.placeholder,
|
|
50
|
-
placeholderMode:
|
|
50
|
+
placeholderMode: "repeat" as "replace" | "repeat"
|
|
51
51
|
})
|
|
52
52
|
)
|
|
53
|
+
const filter2 = util.run("creating profanity filter 2", () => {
|
|
54
|
+
const profanity = new Profanity({
|
|
55
|
+
languages: [ this.params.lang ],
|
|
56
|
+
grawlix: this.params.placeholder,
|
|
57
|
+
wholeWord: true
|
|
58
|
+
})
|
|
59
|
+
if (this.params.lang === "de") {
|
|
60
|
+
/* improve word-list for german language */
|
|
61
|
+
profanity.addWords([ "sex" ])
|
|
62
|
+
profanity.removeWords([
|
|
63
|
+
"verdammt", "glocke", "wahnsinn", "knochen", "fehler", "mist", "phantasievoll",
|
|
64
|
+
"huhn", "ziegen", "geil", "lustig", "verzögert", "schrauben", "geschlecht"
|
|
65
|
+
])
|
|
66
|
+
}
|
|
67
|
+
return profanity
|
|
68
|
+
})
|
|
53
69
|
|
|
54
70
|
/* apply profanity filtering */
|
|
55
|
-
const censor = (text: string): string =>
|
|
56
|
-
|
|
71
|
+
const censor = (text: string): string => {
|
|
72
|
+
text = filter1.filter(text)
|
|
73
|
+
text = filter2.censor(text, CensorType.Word)
|
|
74
|
+
return text
|
|
75
|
+
}
|
|
57
76
|
|
|
58
77
|
/* establish a transform stream and connect it to profanity filtering */
|
|
59
78
|
this.stream = new Stream.Transform({
|
|
@@ -14,13 +14,14 @@ import { Duration } from "luxon"
|
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
15
|
import * as util from "./speechflow-util"
|
|
16
16
|
|
|
17
|
-
/* text stream queue element
|
|
17
|
+
/* text stream queue element */
|
|
18
18
|
type TextQueueElement = {
|
|
19
|
-
type:
|
|
20
|
-
chunk:
|
|
21
|
-
|
|
19
|
+
type: "text-frame",
|
|
20
|
+
chunk: SpeechFlowChunk,
|
|
21
|
+
preview?: "pending" | "sent",
|
|
22
|
+
complete?: boolean
|
|
22
23
|
} | {
|
|
23
|
-
type:
|
|
24
|
+
type: "text-eof"
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
/* SpeechFlow node for sentence splitting */
|
|
@@ -35,13 +36,16 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
35
36
|
private queueSend = this.queue.pointerUse("send")
|
|
36
37
|
private closing = false
|
|
37
38
|
private workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
39
|
+
private previewTimer: ReturnType<typeof setTimeout> | null = null
|
|
38
40
|
|
|
39
41
|
/* construct node */
|
|
40
42
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
41
43
|
super(id, cfg, opts, args)
|
|
42
44
|
|
|
43
45
|
/* declare node configuration parameters */
|
|
44
|
-
this.configure({
|
|
46
|
+
this.configure({
|
|
47
|
+
timeout: { type: "number", pos: 0, val: 3 * 1000 }
|
|
48
|
+
})
|
|
45
49
|
|
|
46
50
|
/* declare node input/output format */
|
|
47
51
|
this.input = "text"
|
|
@@ -78,6 +82,8 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
78
82
|
this.queueSplit.walk(+1)
|
|
79
83
|
break
|
|
80
84
|
}
|
|
85
|
+
|
|
86
|
+
/* perform sentence splitting on input chunk */
|
|
81
87
|
const chunk = element.chunk
|
|
82
88
|
const payload = chunk.payload as string
|
|
83
89
|
const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
|
|
@@ -115,20 +121,33 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
115
121
|
if (element2 === undefined)
|
|
116
122
|
break
|
|
117
123
|
if (element2.type === "text-eof") {
|
|
124
|
+
/* no more chunks: output as final
|
|
125
|
+
(perhaps incomplete sentence at end of stream) */
|
|
118
126
|
element.complete = true
|
|
119
127
|
this.queueSplit.touch()
|
|
120
128
|
this.queueSplit.walk(+1)
|
|
121
129
|
break
|
|
122
130
|
}
|
|
131
|
+
|
|
132
|
+
/* merge into following chunk */
|
|
123
133
|
element2.chunk.timestampStart = element.chunk.timestampStart
|
|
124
134
|
element2.chunk.payload =
|
|
125
135
|
(element.chunk.payload as string) + " " +
|
|
126
136
|
(element2.chunk.payload as string)
|
|
137
|
+
|
|
138
|
+
/* reset preview state (merged content needs new preview) */
|
|
139
|
+
element2.preview = undefined
|
|
127
140
|
this.queueSplit.delete()
|
|
128
141
|
this.queueSplit.touch()
|
|
129
142
|
}
|
|
130
|
-
else
|
|
143
|
+
else {
|
|
144
|
+
/* no following chunk yet: mark for intermediate preview output */
|
|
145
|
+
if (element.preview !== "sent") {
|
|
146
|
+
element.preview = "pending"
|
|
147
|
+
this.queueSplit.touch()
|
|
148
|
+
}
|
|
131
149
|
break
|
|
150
|
+
}
|
|
132
151
|
}
|
|
133
152
|
}
|
|
134
153
|
|
|
@@ -157,8 +176,23 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
157
176
|
callback(new Error("expected text input as string chunks"))
|
|
158
177
|
else if (chunk.payload.length === 0)
|
|
159
178
|
callback()
|
|
179
|
+
else if (chunk.kind === "intermediate") {
|
|
180
|
+
/* intermediate chunks: pass through immediately (bypass queue) */
|
|
181
|
+
self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
|
|
182
|
+
self.log("info", `send text (intermediate pass-through): ${JSON.stringify(chunk.payload)}`)
|
|
183
|
+
this.push(chunk)
|
|
184
|
+
callback()
|
|
185
|
+
}
|
|
160
186
|
else {
|
|
161
|
-
|
|
187
|
+
/* final chunks: queue for sentence splitting */
|
|
188
|
+
self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
|
|
189
|
+
|
|
190
|
+
/* cancel any pending preview timeout */
|
|
191
|
+
if (self.previewTimer !== null) {
|
|
192
|
+
clearTimeout(self.previewTimer)
|
|
193
|
+
self.previewTimer = null
|
|
194
|
+
}
|
|
195
|
+
|
|
162
196
|
self.queueRecv.append({ type: "text-frame", chunk })
|
|
163
197
|
callback()
|
|
164
198
|
}
|
|
@@ -192,6 +226,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
192
226
|
else if (element !== undefined
|
|
193
227
|
&& element.type === "text-frame"
|
|
194
228
|
&& element.complete === true) {
|
|
229
|
+
/* send all consecutive complete chunks */
|
|
195
230
|
while (true) {
|
|
196
231
|
const nextElement = self.queueSend.peek()
|
|
197
232
|
if (nextElement === undefined)
|
|
@@ -204,12 +239,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
204
239
|
else if (nextElement.type === "text-frame"
|
|
205
240
|
&& nextElement.complete !== true)
|
|
206
241
|
break
|
|
207
|
-
self.log("info", `send text: ${JSON.stringify(nextElement.chunk.payload)}`)
|
|
242
|
+
self.log("info", `send text (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)}`)
|
|
208
243
|
this.push(nextElement.chunk)
|
|
209
244
|
self.queueSend.walk(+1)
|
|
210
245
|
self.queue.trim()
|
|
211
246
|
}
|
|
212
247
|
}
|
|
248
|
+
else if (element !== undefined
|
|
249
|
+
&& element.type === "text-frame"
|
|
250
|
+
&& element.preview === "pending") {
|
|
251
|
+
/* send intermediate preview (without advancing pointer) */
|
|
252
|
+
const previewChunk = element.chunk.clone()
|
|
253
|
+
previewChunk.kind = "intermediate"
|
|
254
|
+
self.log("info", `send text (intermediate preview): ${JSON.stringify(previewChunk.payload)}`)
|
|
255
|
+
this.push(previewChunk)
|
|
256
|
+
element.preview = "sent"
|
|
257
|
+
self.queueSend.touch()
|
|
258
|
+
|
|
259
|
+
/* start preview timeout (if configured) */
|
|
260
|
+
const timeout = self.params.timeout as number
|
|
261
|
+
if (timeout > 0 && self.previewTimer === null) {
|
|
262
|
+
self.previewTimer = setTimeout(() => {
|
|
263
|
+
self.previewTimer = null
|
|
264
|
+
if (self.closing)
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
/* promote preview to final chunk */
|
|
268
|
+
const el = self.queueSend.peek()
|
|
269
|
+
if (el !== undefined
|
|
270
|
+
&& el.type === "text-frame"
|
|
271
|
+
&& el.preview === "sent"
|
|
272
|
+
&& el.complete !== true) {
|
|
273
|
+
self.log("info", `timeout: promoting intermediate to final: ${JSON.stringify(el.chunk.payload)}`)
|
|
274
|
+
el.complete = true
|
|
275
|
+
self.queueSend.touch()
|
|
276
|
+
self.queue.emit("write")
|
|
277
|
+
}
|
|
278
|
+
}, timeout)
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/* wait for more data */
|
|
282
|
+
if (!self.closing)
|
|
283
|
+
self.queue.once("write", flushPendingChunks)
|
|
284
|
+
}
|
|
213
285
|
else if (!self.closing)
|
|
214
286
|
self.queue.once("write", flushPendingChunks)
|
|
215
287
|
}
|
|
@@ -223,11 +295,15 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
223
295
|
/* indicate closing */
|
|
224
296
|
this.closing = true
|
|
225
297
|
|
|
226
|
-
/* clean up
|
|
298
|
+
/* clean up timers */
|
|
227
299
|
if (this.workingOffTimer !== null) {
|
|
228
300
|
clearTimeout(this.workingOffTimer)
|
|
229
301
|
this.workingOffTimer = null
|
|
230
302
|
}
|
|
303
|
+
if (this.previewTimer !== null) {
|
|
304
|
+
clearTimeout(this.previewTimer)
|
|
305
|
+
this.previewTimer = null
|
|
306
|
+
}
|
|
231
307
|
|
|
232
308
|
/* remove any pending event listeners */
|
|
233
309
|
this.queue.removeAllListeners("write")
|