speechflow 2.0.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +34 -5
  3. package/etc/speechflow.yaml +20 -48
  4. package/etc/stx.conf +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +60 -0
  8. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js.map +1 -0
  9. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.d.ts +15 -0
  10. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +234 -0
  11. package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
  13. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.d.ts +16 -0
  15. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js +275 -0
  16. package/speechflow-cli/dst/speechflow-node-a2t-assemblyai.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +32 -15
  18. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
  20. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +26 -6
  22. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -0
  24. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +72 -5
  25. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  26. package/speechflow-cli/etc/oxlint.jsonc +1 -0
  27. package/speechflow-cli/package.d/sherpa-onnx+1.12.23.patch +12 -0
  28. package/speechflow-cli/package.json +20 -17
  29. package/speechflow-cli/src/lib.d.ts +30 -4
  30. package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +68 -0
  31. package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +219 -0
  32. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  33. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +33 -15
  34. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
  35. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +30 -11
  36. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +86 -10
  37. package/speechflow-ui-db/dst/index.css +1 -1
  38. package/speechflow-ui-db/dst/index.js +13 -13
  39. package/speechflow-ui-db/package.json +12 -11
  40. package/speechflow-ui-db/src/app.vue +62 -17
  41. package/speechflow-ui-st/dst/index.css +1 -1
  42. package/speechflow-ui-st/dst/index.js +32 -32
  43. package/speechflow-ui-st/package.json +13 -12
  44. package/speechflow-ui-st/src/app.vue +9 -8
@@ -81,11 +81,11 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
81
81
 
82
82
  /* grab the accumulated chunk data */
83
83
  const chunkData = this.chunkBuffer
84
- this.chunkBuffer = new Float32Array(0)
84
+ this.chunkBuffer = chunkData.subarray(samplesPerChunk)
85
85
 
86
86
  /* update internal audio sample sliding window for LUFS-M */
87
87
  if (chunkData.length > sampleWindow.length)
88
- sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
88
+ sampleWindow.set(chunkData.subarray(0, sampleWindow.length), 0)
89
89
  else {
90
90
  sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
91
91
  sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
@@ -37,7 +37,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
37
37
  model: { type: "string", val: "nova-2", pos: 0 },
38
38
  version: { type: "string", val: "latest", pos: 1 },
39
39
  language: { type: "string", val: "multi", pos: 2 },
40
- interim: { type: "boolean", val: false, pos: 3 }
40
+ interim: { type: "boolean", val: false, pos: 3 },
41
+ keywords: { type: "string", val: "", pos: 4 }
41
42
  })
42
43
 
43
44
  /* sanity check parameters */
@@ -86,34 +87,51 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
86
87
  /* create a store for the meta information */
87
88
  const metastore = new util.TimeStore<Map<string, any>>()
88
89
 
89
- /* connect to Deepgram API */
90
- const deepgram = Deepgram.createClient(this.params.key)
91
- let language = "en"
92
- if (this.params.language !== "en") {
93
- if (this.params.model.match(/^nova-2/))
94
- language = this.params.language
95
- else if (this.params.model.match(/^nova-3/))
96
- language = "multi"
97
- }
98
- this.dg = deepgram.listen.live({
90
+ /* configure Deepgram connection options */
91
+ const options: Deepgram.LiveSchema = {
99
92
  mip_opt_out: true,
100
93
  model: this.params.model,
101
94
  version: this.params.version,
102
- language,
103
95
  channels: this.config.audioChannels,
104
96
  sample_rate: this.config.audioSampleRate,
105
97
  encoding: "linear16",
106
98
  multichannel: false,
107
99
  endpointing: false,
108
100
  interim_results: this.params.interim,
109
- smart_format: true,
101
+ smart_format: false,
110
102
  punctuate: true,
111
103
  filler_words: true,
112
- numerals: true,
104
+ numerals: false,
113
105
  diarize: false,
114
106
  profanity_filter: false,
115
107
  redact: false
116
- })
108
+ }
109
+ const model = this.params.model as string
110
+ const language = this.params.language as string
111
+ const keywords = this.params.keywords as string
112
+ if (model.match(/^nova-2/) && language !== "en")
113
+ options.language = this.params.language
114
+ else if (model.match(/^nova-3/) && language !== "en")
115
+ options.language = "multi"
116
+ else
117
+ options.language = "en"
118
+ if (keywords !== "") {
119
+ if (model.match(/^nova-2/))
120
+ options.keywords = keywords.split(/(?:\s+|\s*,\s*)/).map((kw) => {
121
+ let boost = 2
122
+ if (kw.startsWith("-")) {
123
+ kw = kw.slice(1)
124
+ boost = -4
125
+ }
126
+ return `${kw}:${boost}`
127
+ })
128
+ else if (model.match(/^nova-3/))
129
+ options.keyterm = keywords.split(/(?:\s+|\s*,\s*)/).join(" ")
130
+ }
131
+
132
+ /* connect to Deepgram API */
133
+ const deepgram = Deepgram.createClient(this.params.key)
134
+ this.dg = deepgram.listen.live(options)
117
135
 
118
136
  /* hook onto Deepgram API events */
119
137
  this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
@@ -63,7 +63,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
63
63
 
64
64
  /* provide text-to-text translation */
65
65
  const translate = async (text: string) => {
66
- const src = this.params.src === "en" ? "en-US" : this.params.src
66
+ const src = this.params.src
67
67
  const dst = this.params.dst === "en" ? "en-US" : this.params.dst
68
68
  const result = await this.deepl!.translateText(text, src, dst, {
69
69
  splitSentences: "off",
@@ -5,12 +5,13 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import BadWordsNext from "bad-words-next"
12
- import en from "bad-words-next/lib/en"
13
- import de from "bad-words-next/lib/de"
11
+ import BadWordsNext from "bad-words-next"
12
+ import en from "bad-words-next/lib/en"
13
+ import de from "bad-words-next/lib/de"
14
+ import { Profanity, CensorType } from "@2toad/profanity"
14
15
 
15
16
  /* internal dependencies */
16
17
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -31,8 +32,7 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
31
32
  /* declare node configuration parameters */
32
33
  this.configure({
33
34
  lang: { type: "string", val: "en", match: /^(?:en|de)$/ },
34
- placeholder: { type: "string", val: "***" },
35
- mode: { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
35
+ placeholder: { type: "string", val: "***" }
36
36
  })
37
37
 
38
38
  /* declare node input/output format */
@@ -42,18 +42,37 @@ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
42
42
 
43
43
  /* open node */
44
44
  async open () {
45
- /* create profanity filter instance */
46
- const filter = util.run("creating profanity filter", () =>
45
+ /* create profanity filter instances */
46
+ const filter1 = util.run("creating profanity filter 1", () =>
47
47
  new BadWordsNext({
48
48
  data: langData[this.params.lang],
49
49
  placeholder: this.params.placeholder,
50
- placeholderMode: this.params.mode as "replace" | "repeat"
50
+ placeholderMode: "repeat" as "replace" | "repeat"
51
51
  })
52
52
  )
53
+ const filter2 = util.run("creating profanity filter 2", () => {
54
+ const profanity = new Profanity({
55
+ languages: [ this.params.lang ],
56
+ grawlix: this.params.placeholder,
57
+ wholeWord: true
58
+ })
59
+ if (this.params.lang === "de") {
60
+ /* improve word-list for german language */
61
+ profanity.addWords([ "sex" ])
62
+ profanity.removeWords([
63
+ "verdammt", "glocke", "wahnsinn", "knochen", "fehler", "mist", "phantasievoll",
64
+ "huhn", "ziegen", "geil", "lustig", "verzögert", "schrauben", "geschlecht"
65
+ ])
66
+ }
67
+ return profanity
68
+ })
53
69
 
54
70
  /* apply profanity filtering */
55
- const censor = (text: string): string =>
56
- filter.filter(text)
71
+ const censor = (text: string): string => {
72
+ text = filter1.filter(text)
73
+ text = filter2.censor(text, CensorType.Word)
74
+ return text
75
+ }
57
76
 
58
77
  /* establish a transform stream and connect it to profanity filtering */
59
78
  this.stream = new Stream.Transform({
@@ -14,13 +14,14 @@ import { Duration } from "luxon"
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
15
  import * as util from "./speechflow-util"
16
16
 
17
- /* text stream queue element */
17
+ /* text stream queue element */
18
18
  type TextQueueElement = {
19
- type: "text-frame",
20
- chunk: SpeechFlowChunk,
21
- complete?: boolean
19
+ type: "text-frame",
20
+ chunk: SpeechFlowChunk,
21
+ preview?: "pending" | "sent",
22
+ complete?: boolean
22
23
  } | {
23
- type: "text-eof"
24
+ type: "text-eof"
24
25
  }
25
26
 
26
27
  /* SpeechFlow node for sentence splitting */
@@ -35,13 +36,16 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
35
36
  private queueSend = this.queue.pointerUse("send")
36
37
  private closing = false
37
38
  private workingOffTimer: ReturnType<typeof setTimeout> | null = null
39
+ private previewTimer: ReturnType<typeof setTimeout> | null = null
38
40
 
39
41
  /* construct node */
40
42
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
41
43
  super(id, cfg, opts, args)
42
44
 
43
45
  /* declare node configuration parameters */
44
- this.configure({})
46
+ this.configure({
47
+ timeout: { type: "number", pos: 0, val: 3 * 1000 }
48
+ })
45
49
 
46
50
  /* declare node input/output format */
47
51
  this.input = "text"
@@ -78,6 +82,8 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
78
82
  this.queueSplit.walk(+1)
79
83
  break
80
84
  }
85
+
86
+ /* perform sentence splitting on input chunk */
81
87
  const chunk = element.chunk
82
88
  const payload = chunk.payload as string
83
89
  const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
@@ -115,20 +121,33 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
115
121
  if (element2 === undefined)
116
122
  break
117
123
  if (element2.type === "text-eof") {
124
+ /* no more chunks: output as final
125
+ (perhaps incomplete sentence at end of stream) */
118
126
  element.complete = true
119
127
  this.queueSplit.touch()
120
128
  this.queueSplit.walk(+1)
121
129
  break
122
130
  }
131
+
132
+ /* merge into following chunk */
123
133
  element2.chunk.timestampStart = element.chunk.timestampStart
124
134
  element2.chunk.payload =
125
135
  (element.chunk.payload as string) + " " +
126
136
  (element2.chunk.payload as string)
137
+
138
+ /* reset preview state (merged content needs new preview) */
139
+ element2.preview = undefined
127
140
  this.queueSplit.delete()
128
141
  this.queueSplit.touch()
129
142
  }
130
- else
143
+ else {
144
+ /* no following chunk yet: mark for intermediate preview output */
145
+ if (element.preview !== "sent") {
146
+ element.preview = "pending"
147
+ this.queueSplit.touch()
148
+ }
131
149
  break
150
+ }
132
151
  }
133
152
  }
134
153
 
@@ -157,8 +176,23 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
157
176
  callback(new Error("expected text input as string chunks"))
158
177
  else if (chunk.payload.length === 0)
159
178
  callback()
179
+ else if (chunk.kind === "intermediate") {
180
+ /* intermediate chunks: pass through immediately (bypass queue) */
181
+ self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
182
+ self.log("info", `send text (intermediate pass-through): ${JSON.stringify(chunk.payload)}`)
183
+ this.push(chunk)
184
+ callback()
185
+ }
160
186
  else {
161
- self.log("info", `received text: ${JSON.stringify(chunk.payload)}`)
187
+ /* final chunks: queue for sentence splitting */
188
+ self.log("info", `received text (${chunk.kind}): ${JSON.stringify(chunk.payload)}`)
189
+
190
+ /* cancel any pending preview timeout */
191
+ if (self.previewTimer !== null) {
192
+ clearTimeout(self.previewTimer)
193
+ self.previewTimer = null
194
+ }
195
+
162
196
  self.queueRecv.append({ type: "text-frame", chunk })
163
197
  callback()
164
198
  }
@@ -192,6 +226,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
192
226
  else if (element !== undefined
193
227
  && element.type === "text-frame"
194
228
  && element.complete === true) {
229
+ /* send all consecutive complete chunks */
195
230
  while (true) {
196
231
  const nextElement = self.queueSend.peek()
197
232
  if (nextElement === undefined)
@@ -204,12 +239,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
204
239
  else if (nextElement.type === "text-frame"
205
240
  && nextElement.complete !== true)
206
241
  break
207
- self.log("info", `send text: ${JSON.stringify(nextElement.chunk.payload)}`)
242
+ self.log("info", `send text (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)}`)
208
243
  this.push(nextElement.chunk)
209
244
  self.queueSend.walk(+1)
210
245
  self.queue.trim()
211
246
  }
212
247
  }
248
+ else if (element !== undefined
249
+ && element.type === "text-frame"
250
+ && element.preview === "pending") {
251
+ /* send intermediate preview (without advancing pointer) */
252
+ const previewChunk = element.chunk.clone()
253
+ previewChunk.kind = "intermediate"
254
+ self.log("info", `send text (intermediate preview): ${JSON.stringify(previewChunk.payload)}`)
255
+ this.push(previewChunk)
256
+ element.preview = "sent"
257
+ self.queueSend.touch()
258
+
259
+ /* start preview timeout (if configured) */
260
+ const timeout = self.params.timeout as number
261
+ if (timeout > 0 && self.previewTimer === null) {
262
+ self.previewTimer = setTimeout(() => {
263
+ self.previewTimer = null
264
+ if (self.closing)
265
+ return
266
+
267
+ /* promote preview to final chunk */
268
+ const el = self.queueSend.peek()
269
+ if (el !== undefined
270
+ && el.type === "text-frame"
271
+ && el.preview === "sent"
272
+ && el.complete !== true) {
273
+ self.log("info", `timeout: promoting intermediate to final: ${JSON.stringify(el.chunk.payload)}`)
274
+ el.complete = true
275
+ self.queueSend.touch()
276
+ self.queue.emit("write")
277
+ }
278
+ }, timeout)
279
+ }
280
+
281
+ /* wait for more data */
282
+ if (!self.closing)
283
+ self.queue.once("write", flushPendingChunks)
284
+ }
213
285
  else if (!self.closing)
214
286
  self.queue.once("write", flushPendingChunks)
215
287
  }
@@ -223,11 +295,15 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
223
295
  /* indicate closing */
224
296
  this.closing = true
225
297
 
226
- /* clean up timer */
298
+ /* clean up timers */
227
299
  if (this.workingOffTimer !== null) {
228
300
  clearTimeout(this.workingOffTimer)
229
301
  this.workingOffTimer = null
230
302
  }
303
+ if (this.previewTimer !== null) {
304
+ clearTimeout(this.previewTimer)
305
+ this.previewTimer = null
306
+ }
231
307
 
232
308
  /* remove any pending event listeners */
233
309
  this.queue.removeAllListeners("write")