speechflow 1.2.8 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +201 -43
- package/dst/speechflow-node-a2a-gender.d.ts +1 -0
- package/dst/speechflow-node-a2a-gender.js +7 -2
- package/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/dst/speechflow-node-a2a-meter.js +5 -2
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.js +22 -14
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +17 -0
- package/dst/speechflow-node-t2t-sentence.js +234 -0
- package/dst/speechflow-node-t2t-sentence.js.map +1 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +1 -0
- package/dst/speechflow-node-t2t-subtitle.js +231 -51
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-x2x-trace.js +14 -7
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +10 -2
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node.d.ts +2 -0
- package/dst/speechflow-node.js +3 -0
- package/dst/speechflow-node.js.map +1 -1
- package/dst/speechflow-utils.js +1 -1
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +64 -2
- package/dst/speechflow.js.map +1 -1
- package/etc/speechflow.yaml +39 -26
- package/package.json +18 -17
- package/src/speechflow-node-a2a-gender.ts +8 -2
- package/src/speechflow-node-a2a-meter.ts +1 -1
- package/src/speechflow-node-a2t-deepgram.ts +18 -11
- package/src/speechflow-node-t2t-sentence.ts +224 -0
- package/src/speechflow-node-t2t-subtitle.ts +62 -15
- package/src/speechflow-node-xio-device.ts +10 -2
- package/src/speechflow-utils.ts +1 -1
- package/src/speechflow.ts +15 -2
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { Duration } from "luxon"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
+
import * as utils from "./speechflow-utils"
|
|
16
|
+
|
|
17
|
+
/* text stream queue element */
|
|
18
|
+
type TextQueueElement = {
|
|
19
|
+
type: "text-frame",
|
|
20
|
+
chunk: SpeechFlowChunk,
|
|
21
|
+
complete?: boolean
|
|
22
|
+
} | {
|
|
23
|
+
type: "text-eof"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/* SpeechFlow node for sentence splitting */
|
|
27
|
+
export default class SpeechFlowNodeSentence extends SpeechFlowNode {
|
|
28
|
+
/* declare official node name */
|
|
29
|
+
public static name = "sentence"
|
|
30
|
+
|
|
31
|
+
/* internal state */
|
|
32
|
+
private static speexInitialized = false
|
|
33
|
+
private queue = new utils.Queue<TextQueueElement>()
|
|
34
|
+
private queueRecv = this.queue.pointerUse("recv")
|
|
35
|
+
private queueSplit = this.queue.pointerUse("split")
|
|
36
|
+
private queueSend = this.queue.pointerUse("send")
|
|
37
|
+
private destroyed = false
|
|
38
|
+
|
|
39
|
+
/* construct node */
|
|
40
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
41
|
+
super(id, cfg, opts, args)
|
|
42
|
+
|
|
43
|
+
/* declare node configuration parameters */
|
|
44
|
+
this.configure({})
|
|
45
|
+
|
|
46
|
+
/* declare node input/output format */
|
|
47
|
+
this.input = "text"
|
|
48
|
+
this.output = "text"
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/* open node */
|
|
52
|
+
async open () {
|
|
53
|
+
/* clear destruction flag */
|
|
54
|
+
this.destroyed = false
|
|
55
|
+
|
|
56
|
+
/* pass-through logging */
|
|
57
|
+
const log = (level: string, msg: string) => { this.log(level, msg) }
|
|
58
|
+
|
|
59
|
+
/* work off queued audio frames */
|
|
60
|
+
let workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
61
|
+
let workingOff = false
|
|
62
|
+
const workOffQueue = async () => {
|
|
63
|
+
if (this.destroyed)
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
/* control working off round */
|
|
67
|
+
if (workingOff)
|
|
68
|
+
return
|
|
69
|
+
workingOff = true
|
|
70
|
+
if (workingOffTimer !== null) {
|
|
71
|
+
clearTimeout(workingOffTimer)
|
|
72
|
+
workingOffTimer = null
|
|
73
|
+
}
|
|
74
|
+
this.queue.off("write", workOffQueue)
|
|
75
|
+
|
|
76
|
+
/* try to work off one or more chunks */
|
|
77
|
+
while (true) {
|
|
78
|
+
const element = this.queueSplit.peek()
|
|
79
|
+
if (element === undefined)
|
|
80
|
+
break
|
|
81
|
+
if (element.type === "text-eof") {
|
|
82
|
+
this.queueSplit.walk(+1)
|
|
83
|
+
break
|
|
84
|
+
}
|
|
85
|
+
const chunk = element.chunk
|
|
86
|
+
const payload = chunk.payload as string
|
|
87
|
+
const m = payload.match(/^((?:.|\r?\n)+?[.;?!])\s*((?:.|\r?\n)*)$/)
|
|
88
|
+
if (m !== null) {
|
|
89
|
+
/* contains a sentence */
|
|
90
|
+
const [ , sentence, rest ] = m
|
|
91
|
+
if (rest !== "") {
|
|
92
|
+
/* contains more than a sentence */
|
|
93
|
+
const chunk2 = chunk.clone()
|
|
94
|
+
const duration = Duration.fromMillis(
|
|
95
|
+
chunk.timestampEnd.minus(chunk.timestampStart).toMillis() *
|
|
96
|
+
(sentence.length / payload.length))
|
|
97
|
+
chunk2.timestampStart = chunk.timestampStart.plus(duration)
|
|
98
|
+
chunk.timestampEnd = chunk2.timestampStart
|
|
99
|
+
chunk.payload = sentence
|
|
100
|
+
chunk2.payload = rest
|
|
101
|
+
element.complete = true
|
|
102
|
+
this.queueSplit.touch()
|
|
103
|
+
this.queueSplit.walk(+1)
|
|
104
|
+
this.queueSplit.insert({ type: "text-frame", chunk: chunk2 })
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
/* contains just the sentence */
|
|
108
|
+
element.complete = true
|
|
109
|
+
this.queueSplit.touch()
|
|
110
|
+
this.queueSplit.walk(+1)
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
/* contains less than a sentence */
|
|
115
|
+
const position = this.queueSplit.position()
|
|
116
|
+
if (position < this.queueSplit.maxPosition() - 1) {
|
|
117
|
+
/* merge into following chunk */
|
|
118
|
+
const element2 = this.queueSplit.peek(position + 1)
|
|
119
|
+
if (element2 === undefined)
|
|
120
|
+
break
|
|
121
|
+
if (element2.type === "text-eof") {
|
|
122
|
+
element.complete = true
|
|
123
|
+
this.queueSplit.touch()
|
|
124
|
+
this.queueSplit.walk(+1)
|
|
125
|
+
break
|
|
126
|
+
}
|
|
127
|
+
element2.chunk.timestampStart = element.chunk.timestampStart
|
|
128
|
+
element2.chunk.payload =
|
|
129
|
+
element.chunk.payload as string + " " +
|
|
130
|
+
element2.chunk.payload as string
|
|
131
|
+
this.queueSplit.delete()
|
|
132
|
+
this.queueSplit.touch()
|
|
133
|
+
}
|
|
134
|
+
else
|
|
135
|
+
break
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/* re-initiate working off round */
|
|
140
|
+
workingOff = false
|
|
141
|
+
workingOffTimer = setTimeout(workOffQueue, 100)
|
|
142
|
+
this.queue.once("write", workOffQueue)
|
|
143
|
+
}
|
|
144
|
+
this.queue.once("write", workOffQueue)
|
|
145
|
+
|
|
146
|
+
/* provide Duplex stream and internally attach to classifier */
|
|
147
|
+
const self = this
|
|
148
|
+
this.stream = new Stream.Duplex({
|
|
149
|
+
writableObjectMode: true,
|
|
150
|
+
readableObjectMode: true,
|
|
151
|
+
decodeStrings: false,
|
|
152
|
+
highWaterMark: 1,
|
|
153
|
+
|
|
154
|
+
/* receive text chunk (writable side of stream) */
|
|
155
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
156
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
157
|
+
callback(new Error("expected text input as string chunks"))
|
|
158
|
+
else if (chunk.payload.length === 0)
|
|
159
|
+
callback()
|
|
160
|
+
else {
|
|
161
|
+
log("info", `received text: ${JSON.stringify(chunk.payload)}`)
|
|
162
|
+
self.queueRecv.append({ type: "text-frame", chunk })
|
|
163
|
+
callback()
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
|
|
167
|
+
/* receive no more text chunks (writable side of stream) */
|
|
168
|
+
final (callback) {
|
|
169
|
+
/* signal end of file */
|
|
170
|
+
self.queueRecv.append({ type: "text-eof" })
|
|
171
|
+
callback()
|
|
172
|
+
},
|
|
173
|
+
|
|
174
|
+
/* send text chunk(s) (readable side of stream) */
|
|
175
|
+
read (_size) {
|
|
176
|
+
/* flush pending audio chunks */
|
|
177
|
+
const flushPendingChunks = () => {
|
|
178
|
+
const element = self.queueSend.peek()
|
|
179
|
+
if (element !== undefined
|
|
180
|
+
&& element.type === "text-eof") {
|
|
181
|
+
this.push(null)
|
|
182
|
+
self.queueSend.walk(+1)
|
|
183
|
+
}
|
|
184
|
+
else if (element !== undefined
|
|
185
|
+
&& element.type === "text-frame"
|
|
186
|
+
&& element.complete === true) {
|
|
187
|
+
while (true) {
|
|
188
|
+
const element = self.queueSend.peek()
|
|
189
|
+
if (element === undefined)
|
|
190
|
+
break
|
|
191
|
+
else if (element.type === "text-eof") {
|
|
192
|
+
this.push(null)
|
|
193
|
+
self.queueSend.walk(+1)
|
|
194
|
+
break
|
|
195
|
+
}
|
|
196
|
+
else if (element.type === "text-frame"
|
|
197
|
+
&& element.complete !== true)
|
|
198
|
+
break
|
|
199
|
+
log("info", `send text: ${JSON.stringify(element.chunk.payload)}`)
|
|
200
|
+
this.push(element.chunk)
|
|
201
|
+
self.queueSend.walk(+1)
|
|
202
|
+
self.queue.trim()
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
else
|
|
206
|
+
self.queue.once("write", flushPendingChunks)
|
|
207
|
+
}
|
|
208
|
+
flushPendingChunks()
|
|
209
|
+
}
|
|
210
|
+
})
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/* close node */
|
|
214
|
+
async close () {
|
|
215
|
+
/* close stream */
|
|
216
|
+
if (this.stream !== null) {
|
|
217
|
+
this.stream.destroy()
|
|
218
|
+
this.stream = null
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/* indicate destruction */
|
|
222
|
+
this.destroyed = true
|
|
223
|
+
}
|
|
224
|
+
}
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Stream from "node:stream"
|
|
9
9
|
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { Duration } from "luxon"
|
|
12
|
+
|
|
10
13
|
/* internal dependencies */
|
|
11
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
15
|
|
|
@@ -24,7 +27,8 @@ export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
|
|
|
24
27
|
|
|
25
28
|
/* declare node configuration parameters */
|
|
26
29
|
this.configure({
|
|
27
|
-
format:
|
|
30
|
+
format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
|
|
31
|
+
words: { type: "boolean", val: false }
|
|
28
32
|
})
|
|
29
33
|
|
|
30
34
|
/* declare node input/output format */
|
|
@@ -40,31 +44,74 @@ export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
|
|
|
40
44
|
const convert = async (chunk: SpeechFlowChunk) => {
|
|
41
45
|
if (typeof chunk.payload !== "string")
|
|
42
46
|
throw new Error("chunk payload type must be string")
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
const convertSingle = (
|
|
48
|
+
start: Duration,
|
|
49
|
+
end: Duration,
|
|
50
|
+
text: string,
|
|
51
|
+
word?: string,
|
|
52
|
+
occurence?: number
|
|
53
|
+
) => {
|
|
54
|
+
if (word) {
|
|
55
|
+
occurence ??= 1
|
|
56
|
+
let match = 1
|
|
57
|
+
word = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
|
58
|
+
text = text.replaceAll(new RegExp(`\\b${word}\\b`, "g"), (m) => {
|
|
59
|
+
if (match++ === occurence)
|
|
60
|
+
return `<b>${m}</b>`
|
|
61
|
+
else
|
|
62
|
+
return m
|
|
63
|
+
})
|
|
64
|
+
}
|
|
65
|
+
if (this.params.format === "srt") {
|
|
66
|
+
const startFmt = start.toFormat("hh:mm:ss,SSS")
|
|
67
|
+
const endFmt = end.toFormat("hh:mm:ss,SSS")
|
|
68
|
+
text = `${this.sequenceNo++}\n` +
|
|
69
|
+
`${startFmt} --> ${endFmt}\n` +
|
|
70
|
+
`${text}\n\n`
|
|
71
|
+
}
|
|
72
|
+
else if (this.params.format === "vtt") {
|
|
73
|
+
const startFmt = start.toFormat("hh:mm:ss.SSS")
|
|
74
|
+
const endFmt = end.toFormat("hh:mm:ss.SSS")
|
|
75
|
+
text = `${startFmt} --> ${endFmt}\n` +
|
|
76
|
+
`${text}\n\n`
|
|
77
|
+
}
|
|
78
|
+
return text
|
|
50
79
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
80
|
+
let output = ""
|
|
81
|
+
if (this.params.words) {
|
|
82
|
+
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
83
|
+
const words = (chunk.meta.get("words") ?? []) as
|
|
84
|
+
{ word: string, start: Duration, end: Duration }[]
|
|
85
|
+
const occurences = new Map<string, number>()
|
|
86
|
+
for (const word of words) {
|
|
87
|
+
let occurence = occurences.get(word.word) ?? 0
|
|
88
|
+
occurence++
|
|
89
|
+
occurences.set(word.word, occurence)
|
|
90
|
+
output += convertSingle(word.start, word.end, chunk.payload, word.word, occurence)
|
|
91
|
+
}
|
|
57
92
|
}
|
|
58
|
-
|
|
93
|
+
else
|
|
94
|
+
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
95
|
+
return output
|
|
59
96
|
}
|
|
60
97
|
|
|
61
98
|
/* establish a duplex stream */
|
|
99
|
+
const self = this
|
|
100
|
+
let firstChunk = true
|
|
62
101
|
this.stream = new Stream.Transform({
|
|
63
102
|
readableObjectMode: true,
|
|
64
103
|
writableObjectMode: true,
|
|
65
104
|
decodeStrings: false,
|
|
66
105
|
highWaterMark: 1,
|
|
67
106
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
107
|
+
if (firstChunk && self.params.format === "vtt") {
|
|
108
|
+
this.push(new SpeechFlowChunk(
|
|
109
|
+
Duration.fromMillis(0), Duration.fromMillis(0),
|
|
110
|
+
"final", "text",
|
|
111
|
+
"WEBVTT\n\n"
|
|
112
|
+
))
|
|
113
|
+
firstChunk = false
|
|
114
|
+
}
|
|
68
115
|
if (Buffer.isBuffer(chunk.payload))
|
|
69
116
|
callback(new Error("invalid chunk payload type"))
|
|
70
117
|
else {
|
|
@@ -199,8 +199,16 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
|
|
|
199
199
|
async close () {
|
|
200
200
|
/* shutdown PortAudio */
|
|
201
201
|
if (this.io !== null) {
|
|
202
|
-
|
|
203
|
-
|
|
202
|
+
await new Promise<void>((resolve, reject) => {
|
|
203
|
+
this.io!.abort(() => {
|
|
204
|
+
resolve()
|
|
205
|
+
})
|
|
206
|
+
})
|
|
207
|
+
await new Promise<void>((resolve, reject) => {
|
|
208
|
+
this.io!.quit(() => {
|
|
209
|
+
resolve()
|
|
210
|
+
})
|
|
211
|
+
})
|
|
204
212
|
this.io = null
|
|
205
213
|
}
|
|
206
214
|
}
|
package/src/speechflow-utils.ts
CHANGED
|
@@ -378,7 +378,7 @@ export class QueuePointer<T extends QueueElement> extends EventEmitter {
|
|
|
378
378
|
this.queue.emit("write", { start: this.index - 1, end: this.index - 1 })
|
|
379
379
|
}
|
|
380
380
|
insert (element: T) {
|
|
381
|
-
this.queue.elements.splice(this.index
|
|
381
|
+
this.queue.elements.splice(this.index, 0, element)
|
|
382
382
|
this.queue.emit("write", { start: this.index - 1, end: this.index })
|
|
383
383
|
}
|
|
384
384
|
delete () {
|
package/src/speechflow.ts
CHANGED
|
@@ -244,6 +244,7 @@ type wsPeerInfo = {
|
|
|
244
244
|
"./speechflow-node-t2t-format.js",
|
|
245
245
|
"./speechflow-node-t2t-ollama.js",
|
|
246
246
|
"./speechflow-node-t2t-openai.js",
|
|
247
|
+
"./speechflow-node-t2t-sentence.js",
|
|
247
248
|
"./speechflow-node-t2t-subtitle.js",
|
|
248
249
|
"./speechflow-node-t2t-transformers.js",
|
|
249
250
|
"./speechflow-node-x2x-filter.js",
|
|
@@ -506,8 +507,10 @@ type wsPeerInfo = {
|
|
|
506
507
|
const name = req.node as string
|
|
507
508
|
const args = req.args as any[]
|
|
508
509
|
const foundNode = Array.from(graphNodes).find((node) => node.id === name)
|
|
509
|
-
if (foundNode === undefined)
|
|
510
|
+
if (foundNode === undefined) {
|
|
510
511
|
cli!.log("warning", `external request failed: no such node <${name}>`)
|
|
512
|
+
throw new Error(`external request failed: no such node <${name}>`)
|
|
513
|
+
}
|
|
511
514
|
else {
|
|
512
515
|
await foundNode.receiveRequest(args).catch((err: Error) => {
|
|
513
516
|
cli!.log("warning", `external request to node <${name}> failed: ${err}`)
|
|
@@ -644,12 +647,14 @@ type wsPeerInfo = {
|
|
|
644
647
|
shuttingDown = true
|
|
645
648
|
if (signal === "finished")
|
|
646
649
|
cli!.log("info", "**** streams of all nodes finished -- shutting down service ****")
|
|
650
|
+
else if (signal === "exception")
|
|
651
|
+
cli!.log("warning", "**** exception occurred -- shutting down service ****")
|
|
647
652
|
else
|
|
648
653
|
cli!.log("warning", `**** received signal ${signal} -- shutting down service ****`)
|
|
649
654
|
|
|
650
655
|
/* shutdown HAPI service */
|
|
651
656
|
cli!.log("info", `HAPI: stopping REST/WebSocket network service: http://${args.address}:${args.port}`)
|
|
652
|
-
await hapi.stop()
|
|
657
|
+
await hapi.stop({ timeout: 2000 })
|
|
653
658
|
|
|
654
659
|
/* graph processing: PASS 1: disconnect node streams */
|
|
655
660
|
for (const node of graphNodes) {
|
|
@@ -715,6 +720,14 @@ type wsPeerInfo = {
|
|
|
715
720
|
process.on("SIGUSR1", () => { shutdown("SIGUSR1") })
|
|
716
721
|
process.on("SIGUSR2", () => { shutdown("SIGUSR2") })
|
|
717
722
|
process.on("SIGTERM", () => { shutdown("SIGTERM") })
|
|
723
|
+
process.on("uncaughtException", (err) => {
|
|
724
|
+
cli!.log("error", `uncaught exception: ${err}`)
|
|
725
|
+
shutdown("exception")
|
|
726
|
+
})
|
|
727
|
+
process.on("unhandledRejection", (reason) => {
|
|
728
|
+
cli!.log("error", `unhandled rejection: ${reason}`)
|
|
729
|
+
shutdown("exception")
|
|
730
|
+
})
|
|
718
731
|
})().catch((err: Error) => {
|
|
719
732
|
if (cli !== null)
|
|
720
733
|
cli.log("error", err.message)
|