speechflow 1.2.8 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +201 -43
  3. package/dst/speechflow-node-a2a-gender.d.ts +1 -0
  4. package/dst/speechflow-node-a2a-gender.js +7 -2
  5. package/dst/speechflow-node-a2a-gender.js.map +1 -1
  6. package/dst/speechflow-node-a2a-meter.js +5 -2
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  8. package/dst/speechflow-node-a2t-deepgram.js +22 -14
  9. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  10. package/dst/speechflow-node-t2t-sentence.d.ts +17 -0
  11. package/dst/speechflow-node-t2t-sentence.js +234 -0
  12. package/dst/speechflow-node-t2t-sentence.js.map +1 -0
  13. package/dst/speechflow-node-t2t-subtitle.d.ts +1 -0
  14. package/dst/speechflow-node-t2t-subtitle.js +231 -51
  15. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  16. package/dst/speechflow-node-x2x-trace.js +14 -7
  17. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  18. package/dst/speechflow-node-xio-device.js +10 -2
  19. package/dst/speechflow-node-xio-device.js.map +1 -1
  20. package/dst/speechflow-node.d.ts +2 -0
  21. package/dst/speechflow-node.js +3 -0
  22. package/dst/speechflow-node.js.map +1 -1
  23. package/dst/speechflow-utils.js +1 -1
  24. package/dst/speechflow-utils.js.map +1 -1
  25. package/dst/speechflow.js +64 -2
  26. package/dst/speechflow.js.map +1 -1
  27. package/etc/speechflow.yaml +39 -26
  28. package/package.json +18 -17
  29. package/src/speechflow-node-a2a-gender.ts +8 -2
  30. package/src/speechflow-node-a2a-meter.ts +1 -1
  31. package/src/speechflow-node-a2t-deepgram.ts +18 -11
  32. package/src/speechflow-node-t2t-sentence.ts +224 -0
  33. package/src/speechflow-node-t2t-subtitle.ts +62 -15
  34. package/src/speechflow-node-xio-device.ts +10 -2
  35. package/src/speechflow-utils.ts +1 -1
  36. package/src/speechflow.ts +15 -2
@@ -0,0 +1,224 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { Duration } from "luxon"
12
+
13
+ /* internal dependencies */
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
+ import * as utils from "./speechflow-utils"
16
+
17
+ /* text stream queue element */
18
+ type TextQueueElement = {
19
+ type: "text-frame",
20
+ chunk: SpeechFlowChunk,
21
+ complete?: boolean
22
+ } | {
23
+ type: "text-eof"
24
+ }
25
+
26
+ /* SpeechFlow node for sentence splitting */
27
+ export default class SpeechFlowNodeSentence extends SpeechFlowNode {
28
+ /* declare official node name */
29
+ public static name = "sentence"
30
+
31
+ /* internal state */
32
+ private static speexInitialized = false
33
+ private queue = new utils.Queue<TextQueueElement>()
34
+ private queueRecv = this.queue.pointerUse("recv")
35
+ private queueSplit = this.queue.pointerUse("split")
36
+ private queueSend = this.queue.pointerUse("send")
37
+ private destroyed = false
38
+
39
+ /* construct node */
40
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
41
+ super(id, cfg, opts, args)
42
+
43
+ /* declare node configuration parameters */
44
+ this.configure({})
45
+
46
+ /* declare node input/output format */
47
+ this.input = "text"
48
+ this.output = "text"
49
+ }
50
+
51
+ /* open node */
52
+ async open () {
53
+ /* clear destruction flag */
54
+ this.destroyed = false
55
+
56
+ /* pass-through logging */
57
+ const log = (level: string, msg: string) => { this.log(level, msg) }
58
+
59
+ /* work off queued audio frames */
60
+ let workingOffTimer: ReturnType<typeof setTimeout> | null = null
61
+ let workingOff = false
62
+ const workOffQueue = async () => {
63
+ if (this.destroyed)
64
+ return
65
+
66
+ /* control working off round */
67
+ if (workingOff)
68
+ return
69
+ workingOff = true
70
+ if (workingOffTimer !== null) {
71
+ clearTimeout(workingOffTimer)
72
+ workingOffTimer = null
73
+ }
74
+ this.queue.off("write", workOffQueue)
75
+
76
+ /* try to work off one or more chunks */
77
+ while (true) {
78
+ const element = this.queueSplit.peek()
79
+ if (element === undefined)
80
+ break
81
+ if (element.type === "text-eof") {
82
+ this.queueSplit.walk(+1)
83
+ break
84
+ }
85
+ const chunk = element.chunk
86
+ const payload = chunk.payload as string
87
+ const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
88
+ if (m !== null) {
89
+ /* contains a sentence */
90
+ const [ , sentence, rest ] = m
91
+ if (rest !== "") {
92
+ /* contains more than a sentence */
93
+ const chunk2 = chunk.clone()
94
+ const duration = Duration.fromMillis(
95
+ chunk.timestampEnd.minus(chunk.timestampStart).toMillis() *
96
+ (sentence.length / payload.length))
97
+ chunk2.timestampStart = chunk.timestampStart.plus(duration)
98
+ chunk.timestampEnd = chunk2.timestampStart
99
+ chunk.payload = sentence
100
+ chunk2.payload = rest
101
+ element.complete = true
102
+ this.queueSplit.touch()
103
+ this.queueSplit.walk(+1)
104
+ this.queueSplit.insert({ type: "text-frame", chunk: chunk2 })
105
+ }
106
+ else {
107
+ /* contains just the sentence */
108
+ element.complete = true
109
+ this.queueSplit.touch()
110
+ this.queueSplit.walk(+1)
111
+ }
112
+ }
113
+ else {
114
+ /* contains less than a sentence */
115
+ const position = this.queueSplit.position()
116
+ if (position < this.queueSplit.maxPosition() - 1) {
117
+ /* merge into following chunk */
118
+ const element2 = this.queueSplit.peek(position + 1)
119
+ if (element2 === undefined)
120
+ break
121
+ if (element2.type === "text-eof") {
122
+ element.complete = true
123
+ this.queueSplit.touch()
124
+ this.queueSplit.walk(+1)
125
+ break
126
+ }
127
+ element2.chunk.timestampStart = element.chunk.timestampStart
128
+ element2.chunk.payload =
129
+ element.chunk.payload as string + " " +
130
+ element2.chunk.payload as string
131
+ this.queueSplit.delete()
132
+ this.queueSplit.touch()
133
+ }
134
+ else
135
+ break
136
+ }
137
+ }
138
+
139
+ /* re-initiate working off round */
140
+ workingOff = false
141
+ workingOffTimer = setTimeout(workOffQueue, 100)
142
+ this.queue.once("write", workOffQueue)
143
+ }
144
+ this.queue.once("write", workOffQueue)
145
+
146
+ /* provide Duplex stream and internally attach to classifier */
147
+ const self = this
148
+ this.stream = new Stream.Duplex({
149
+ writableObjectMode: true,
150
+ readableObjectMode: true,
151
+ decodeStrings: false,
152
+ highWaterMark: 1,
153
+
154
+ /* receive text chunk (writable side of stream) */
155
+ write (chunk: SpeechFlowChunk, encoding, callback) {
156
+ if (Buffer.isBuffer(chunk.payload))
157
+ callback(new Error("expected text input as string chunks"))
158
+ else if (chunk.payload.length === 0)
159
+ callback()
160
+ else {
161
+ log("info", `received text: ${JSON.stringify(chunk.payload)}`)
162
+ self.queueRecv.append({ type: "text-frame", chunk })
163
+ callback()
164
+ }
165
+ },
166
+
167
+ /* receive no more text chunks (writable side of stream) */
168
+ final (callback) {
169
+ /* signal end of file */
170
+ self.queueRecv.append({ type: "text-eof" })
171
+ callback()
172
+ },
173
+
174
+ /* send text chunk(s) (readable side of stream) */
175
+ read (_size) {
176
+ /* flush pending audio chunks */
177
+ const flushPendingChunks = () => {
178
+ const element = self.queueSend.peek()
179
+ if (element !== undefined
180
+ && element.type === "text-eof") {
181
+ this.push(null)
182
+ self.queueSend.walk(+1)
183
+ }
184
+ else if (element !== undefined
185
+ && element.type === "text-frame"
186
+ && element.complete === true) {
187
+ while (true) {
188
+ const element = self.queueSend.peek()
189
+ if (element === undefined)
190
+ break
191
+ else if (element.type === "text-eof") {
192
+ this.push(null)
193
+ self.queueSend.walk(+1)
194
+ break
195
+ }
196
+ else if (element.type === "text-frame"
197
+ && element.complete !== true)
198
+ break
199
+ log("info", `send text: ${JSON.stringify(element.chunk.payload)}`)
200
+ this.push(element.chunk)
201
+ self.queueSend.walk(+1)
202
+ self.queue.trim()
203
+ }
204
+ }
205
+ else
206
+ self.queue.once("write", flushPendingChunks)
207
+ }
208
+ flushPendingChunks()
209
+ }
210
+ })
211
+ }
212
+
213
+ /* close node */
214
+ async close () {
215
+ /* close stream */
216
+ if (this.stream !== null) {
217
+ this.stream.destroy()
218
+ this.stream = null
219
+ }
220
+
221
+ /* indicate destruction */
222
+ this.destroyed = true
223
+ }
224
+ }
@@ -7,6 +7,9 @@
7
7
  /* standard dependencies */
8
8
  import Stream from "node:stream"
9
9
 
10
+ /* external dependencies */
11
+ import { Duration } from "luxon"
12
+
10
13
  /* internal dependencies */
11
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
15
 
@@ -24,7 +27,8 @@ export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
24
27
 
25
28
  /* declare node configuration parameters */
26
29
  this.configure({
27
- format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ }
30
+ format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
31
+ words: { type: "boolean", val: false }
28
32
  })
29
33
 
30
34
  /* declare node input/output format */
@@ -40,31 +44,74 @@ export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
40
44
  const convert = async (chunk: SpeechFlowChunk) => {
41
45
  if (typeof chunk.payload !== "string")
42
46
  throw new Error("chunk payload type must be string")
43
- let text = chunk.payload
44
- if (this.params.format === "srt") {
45
- const start = chunk.timestampStart.toFormat("hh:mm:ss,SSS")
46
- const end = chunk.timestampEnd.toFormat("hh:mm:ss,SSS")
47
- text = `${this.sequenceNo++}\n` +
48
- `${start} --> ${end}\n` +
49
- `${text}\n\n`
47
+ const convertSingle = (
48
+ start: Duration,
49
+ end: Duration,
50
+ text: string,
51
+ word?: string,
52
+ occurence?: number
53
+ ) => {
54
+ if (word) {
55
+ occurence ??= 1
56
+ let match = 1
57
+ word = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
58
+ text = text.replaceAll(new RegExp(`\\b${word}\\b`, "g"), (m) => {
59
+ if (match++ === occurence)
60
+ return `<b>${m}</b>`
61
+ else
62
+ return m
63
+ })
64
+ }
65
+ if (this.params.format === "srt") {
66
+ const startFmt = start.toFormat("hh:mm:ss,SSS")
67
+ const endFmt = end.toFormat("hh:mm:ss,SSS")
68
+ text = `${this.sequenceNo++}\n` +
69
+ `${startFmt} --> ${endFmt}\n` +
70
+ `${text}\n\n`
71
+ }
72
+ else if (this.params.format === "vtt") {
73
+ const startFmt = start.toFormat("hh:mm:ss.SSS")
74
+ const endFmt = end.toFormat("hh:mm:ss.SSS")
75
+ text = `${startFmt} --> ${endFmt}\n` +
76
+ `${text}\n\n`
77
+ }
78
+ return text
50
79
  }
51
- else if (this.params.format === "vtt") {
52
- const start = chunk.timestampStart.toFormat("hh:mm:ss.SSS")
53
- const end = chunk.timestampEnd.toFormat("hh:mm:ss.SSS")
54
- text = `${this.sequenceNo++}\n` +
55
- `${start} --> ${end}\n` +
56
- `${text}\n\n`
80
+ let output = ""
81
+ if (this.params.words) {
82
+ output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
83
+ const words = (chunk.meta.get("words") ?? []) as
84
+ { word: string, start: Duration, end: Duration }[]
85
+ const occurences = new Map<string, number>()
86
+ for (const word of words) {
87
+ let occurence = occurences.get(word.word) ?? 0
88
+ occurence++
89
+ occurences.set(word.word, occurence)
90
+ output += convertSingle(word.start, word.end, chunk.payload, word.word, occurence)
91
+ }
57
92
  }
58
- return text
93
+ else
94
+ output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
95
+ return output
59
96
  }
60
97
 
61
98
  /* establish a duplex stream */
99
+ const self = this
100
+ let firstChunk = true
62
101
  this.stream = new Stream.Transform({
63
102
  readableObjectMode: true,
64
103
  writableObjectMode: true,
65
104
  decodeStrings: false,
66
105
  highWaterMark: 1,
67
106
  transform (chunk: SpeechFlowChunk, encoding, callback) {
107
+ if (firstChunk && self.params.format === "vtt") {
108
+ this.push(new SpeechFlowChunk(
109
+ Duration.fromMillis(0), Duration.fromMillis(0),
110
+ "final", "text",
111
+ "WEBVTT\n\n"
112
+ ))
113
+ firstChunk = false
114
+ }
68
115
  if (Buffer.isBuffer(chunk.payload))
69
116
  callback(new Error("invalid chunk payload type"))
70
117
  else {
@@ -199,8 +199,16 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
199
199
  async close () {
200
200
  /* shutdown PortAudio */
201
201
  if (this.io !== null) {
202
- this.io.abort()
203
- this.io.quit()
202
+ await new Promise<void>((resolve, reject) => {
203
+ this.io!.abort(() => {
204
+ resolve()
205
+ })
206
+ })
207
+ await new Promise<void>((resolve, reject) => {
208
+ this.io!.quit(() => {
209
+ resolve()
210
+ })
211
+ })
204
212
  this.io = null
205
213
  }
206
214
  }
@@ -378,7 +378,7 @@ export class QueuePointer<T extends QueueElement> extends EventEmitter {
378
378
  this.queue.emit("write", { start: this.index - 1, end: this.index - 1 })
379
379
  }
380
380
  insert (element: T) {
381
- this.queue.elements.splice(this.index++, 0, element)
381
+ this.queue.elements.splice(this.index, 0, element)
382
382
  this.queue.emit("write", { start: this.index - 1, end: this.index })
383
383
  }
384
384
  delete () {
package/src/speechflow.ts CHANGED
@@ -244,6 +244,7 @@ type wsPeerInfo = {
244
244
  "./speechflow-node-t2t-format.js",
245
245
  "./speechflow-node-t2t-ollama.js",
246
246
  "./speechflow-node-t2t-openai.js",
247
+ "./speechflow-node-t2t-sentence.js",
247
248
  "./speechflow-node-t2t-subtitle.js",
248
249
  "./speechflow-node-t2t-transformers.js",
249
250
  "./speechflow-node-x2x-filter.js",
@@ -506,8 +507,10 @@ type wsPeerInfo = {
506
507
  const name = req.node as string
507
508
  const args = req.args as any[]
508
509
  const foundNode = Array.from(graphNodes).find((node) => node.id === name)
509
- if (foundNode === undefined)
510
+ if (foundNode === undefined) {
510
511
  cli!.log("warning", `external request failed: no such node <${name}>`)
512
+ throw new Error(`external request failed: no such node <${name}>`)
513
+ }
511
514
  else {
512
515
  await foundNode.receiveRequest(args).catch((err: Error) => {
513
516
  cli!.log("warning", `external request to node <${name}> failed: ${err}`)
@@ -644,12 +647,14 @@ type wsPeerInfo = {
644
647
  shuttingDown = true
645
648
  if (signal === "finished")
646
649
  cli!.log("info", "**** streams of all nodes finished -- shutting down service ****")
650
+ else if (signal === "exception")
651
+ cli!.log("warning", "**** exception occurred -- shutting down service ****")
647
652
  else
648
653
  cli!.log("warning", `**** received signal ${signal} -- shutting down service ****`)
649
654
 
650
655
  /* shutdown HAPI service */
651
656
  cli!.log("info", `HAPI: stopping REST/WebSocket network service: http://${args.address}:${args.port}`)
652
- await hapi.stop()
657
+ await hapi.stop({ timeout: 2000 })
653
658
 
654
659
  /* graph processing: PASS 1: disconnect node streams */
655
660
  for (const node of graphNodes) {
@@ -715,6 +720,14 @@ type wsPeerInfo = {
715
720
  process.on("SIGUSR1", () => { shutdown("SIGUSR1") })
716
721
  process.on("SIGUSR2", () => { shutdown("SIGUSR2") })
717
722
  process.on("SIGTERM", () => { shutdown("SIGTERM") })
723
+ process.on("uncaughtException", (err) => {
724
+ cli!.log("error", `uncaught exception: ${err}`)
725
+ shutdown("exception")
726
+ })
727
+ process.on("unhandledRejection", (reason) => {
728
+ cli!.log("error", `unhandled rejection: ${reason}`)
729
+ shutdown("exception")
730
+ })
718
731
  })().catch((err: Error) => {
719
732
  if (cli !== null)
720
733
  cli.log("error", err.message)