speechflow 1.6.7 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +77 -52
- package/etc/secretlint.json +7 -0
- package/etc/speechflow.yaml +13 -4
- package/etc/stx.conf +3 -2
- package/package.json +8 -6
- package/speechflow-cli/dst/speechflow-main-api.js +9 -8
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +13 -14
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +38 -8
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +3 -4
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +8 -2
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +17 -19
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +30 -25
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +79 -48
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +45 -44
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -7
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +5 -5
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +4 -4
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +9 -3
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +16 -5
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-util-error.js +0 -7
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
- package/speechflow-cli/dst/speechflow-util-queue.js +36 -15
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
- package/speechflow-cli/dst/speechflow-util-stream.js +17 -19
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +6 -1
- package/speechflow-cli/etc/stx.conf +1 -0
- package/speechflow-cli/package.json +28 -27
- package/speechflow-cli/src/speechflow-main-api.ts +9 -11
- package/speechflow-cli/src/speechflow-main-graph.ts +15 -16
- package/speechflow-cli/src/speechflow-main-status.ts +6 -10
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -4
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +9 -3
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +27 -27
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +37 -28
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +92 -56
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +47 -43
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -4
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -8
- package/speechflow-cli/src/speechflow-node-xio-device.ts +6 -9
- package/speechflow-cli/src/speechflow-node-xio-file.ts +4 -4
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +10 -4
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +16 -5
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
- package/speechflow-cli/src/speechflow-util-audio.ts +7 -7
- package/speechflow-cli/src/speechflow-util-error.ts +0 -7
- package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +40 -20
- package/speechflow-cli/src/speechflow-util-stream.ts +29 -24
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/dst/index.css +1 -5
- package/speechflow-ui-db/dst/index.js +14 -58
- package/speechflow-ui-db/etc/stx.conf +5 -16
- package/speechflow-ui-db/package.json +16 -15
- package/speechflow-ui-st/dst/index.css +1 -5
- package/speechflow-ui-st/dst/index.js +31 -160
- package/speechflow-ui-st/etc/stx.conf +5 -16
- package/speechflow-ui-st/package.json +17 -16
|
@@ -20,10 +20,16 @@ import HAPIWebSocket from "hapi-plugin-websocket"
|
|
|
20
20
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
21
21
|
import * as util from "./speechflow-util"
|
|
22
22
|
|
|
23
|
+
/* internal helper types */
|
|
23
24
|
type WSPeerInfo = {
|
|
24
|
-
ctx:
|
|
25
|
-
ws:
|
|
26
|
-
req:
|
|
25
|
+
ctx: Record<string, any>
|
|
26
|
+
ws: WebSocket
|
|
27
|
+
req: http.IncomingMessage
|
|
28
|
+
}
|
|
29
|
+
type TextChunk = {
|
|
30
|
+
start: Duration
|
|
31
|
+
end: Duration
|
|
32
|
+
text: string
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
/* SpeechFlow node for subtitle (text-to-text) "translations" */
|
|
@@ -43,14 +49,14 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
43
49
|
this.configure({
|
|
44
50
|
format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
|
|
45
51
|
words: { type: "boolean", val: false },
|
|
46
|
-
mode: { type: "string", val: "export", match: /^(?:export|render)$/ },
|
|
52
|
+
mode: { type: "string", val: "export", match: /^(?:export|import|render)$/ },
|
|
47
53
|
addr: { type: "string", val: "127.0.0.1" },
|
|
48
54
|
port: { type: "number", val: 8585 }
|
|
49
55
|
})
|
|
50
56
|
|
|
51
57
|
/* declare node input/output format */
|
|
52
58
|
this.input = "text"
|
|
53
|
-
this.output = this.params.mode === "export" ? "text" : "none"
|
|
59
|
+
this.output = (this.params.mode === "export" || this.params.mode === "import") ? "text" : "none"
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
/* open node */
|
|
@@ -95,11 +101,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
95
101
|
}
|
|
96
102
|
return text
|
|
97
103
|
}
|
|
98
|
-
|
|
104
|
+
|
|
105
|
+
/* determine start and end timestamp,
|
|
106
|
+
by using first word's start time and last word's end time (if available),
|
|
107
|
+
to exclude leading and trailing silence parts */
|
|
108
|
+
const words: { word: string, start: Duration, end: Duration }[] = chunk.meta.get("words") ?? []
|
|
109
|
+
const timestampStart = words.length > 0 ? words[0].start : chunk.timestampStart
|
|
110
|
+
const timestampEnd = words.length > 0 ? words[words.length - 1].end : chunk.timestampEnd
|
|
111
|
+
|
|
112
|
+
/* produce SRT/VTT blocks */
|
|
113
|
+
let output = convertSingle(timestampStart, timestampEnd, chunk.payload)
|
|
99
114
|
if (this.params.words) {
|
|
100
|
-
|
|
101
|
-
const words = (chunk.meta.get("words") ?? []) as
|
|
102
|
-
{ word: string, start: Duration, end: Duration }[]
|
|
115
|
+
/* produce additional SRT/VTT blocks with each word highlighted */
|
|
103
116
|
const occurrences = new Map<string, number>()
|
|
104
117
|
for (const word of words) {
|
|
105
118
|
let occurrence = occurrences.get(word.word) ?? 0
|
|
@@ -108,49 +121,210 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
108
121
|
output += convertSingle(word.start, word.end, chunk.payload, word.word, occurrence)
|
|
109
122
|
}
|
|
110
123
|
}
|
|
111
|
-
else
|
|
112
|
-
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
113
124
|
return output
|
|
114
125
|
}
|
|
115
126
|
|
|
116
127
|
/* establish a duplex stream */
|
|
117
128
|
const self = this
|
|
118
|
-
let
|
|
129
|
+
let headerEmitted = false
|
|
119
130
|
this.stream = new Stream.Transform({
|
|
120
131
|
readableObjectMode: true,
|
|
121
132
|
writableObjectMode: true,
|
|
122
133
|
decodeStrings: false,
|
|
123
134
|
highWaterMark: 1,
|
|
124
135
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
125
|
-
if (
|
|
136
|
+
if (!headerEmitted && self.params.format === "vtt") {
|
|
126
137
|
this.push(new SpeechFlowChunk(
|
|
127
138
|
Duration.fromMillis(0), Duration.fromMillis(0),
|
|
128
139
|
"final", "text",
|
|
129
140
|
"WEBVTT\n\n"
|
|
130
141
|
))
|
|
131
|
-
|
|
142
|
+
headerEmitted = true
|
|
132
143
|
}
|
|
133
144
|
if (Buffer.isBuffer(chunk.payload))
|
|
134
145
|
callback(new Error("invalid chunk payload type"))
|
|
146
|
+
else if (chunk.payload === "") {
|
|
147
|
+
this.push(chunk)
|
|
148
|
+
callback()
|
|
149
|
+
}
|
|
135
150
|
else {
|
|
136
|
-
|
|
137
|
-
|
|
151
|
+
convert(chunk).then((payload) => {
|
|
152
|
+
const chunkNew = chunk.clone()
|
|
153
|
+
chunkNew.payload = payload
|
|
154
|
+
this.push(chunkNew)
|
|
138
155
|
callback()
|
|
156
|
+
}).catch((error: unknown) => {
|
|
157
|
+
callback(util.ensureError(error))
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
final (callback) {
|
|
162
|
+
callback()
|
|
163
|
+
}
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
else if (this.params.mode === "import") {
|
|
167
|
+
/* parse timestamp in SRT format ("HH:MM:SS,mmm") or VTT format ("HH:MM:SS.mmm") */
|
|
168
|
+
const parseTimestamp = (ts: string): Duration => {
|
|
169
|
+
const match = ts.match(/^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/)
|
|
170
|
+
if (!match)
|
|
171
|
+
throw new Error(`invalid timestamp format: "${ts}"`)
|
|
172
|
+
const hours = Number.parseInt(match[1], 10)
|
|
173
|
+
const minutes = Number.parseInt(match[2], 10)
|
|
174
|
+
const seconds = Number.parseInt(match[3], 10)
|
|
175
|
+
const milliseconds = Number.parseInt(match[4], 10)
|
|
176
|
+
if (minutes > 59 || seconds > 59)
|
|
177
|
+
throw new Error(`invalid timestamp value "${ts}"`)
|
|
178
|
+
return Duration.fromObject({ hours, minutes, seconds, milliseconds })
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/* strip arbitrary HTML tags */
|
|
182
|
+
const stripHtmlTags = (text: string): string =>
|
|
183
|
+
text.replace(/<\/?[a-zA-Z][^>]*>/g, "")
|
|
184
|
+
|
|
185
|
+
/* parse SRT format */
|
|
186
|
+
const parseSRT = (input: string): TextChunk[] => {
|
|
187
|
+
const results: TextChunk[] = []
|
|
188
|
+
|
|
189
|
+
/* iterate over all blocks */
|
|
190
|
+
const blocks = input.trim().split(/\r?\n\r?\n+/)
|
|
191
|
+
for (const block of blocks) {
|
|
192
|
+
const lines = block.trim().split(/\r?\n/)
|
|
193
|
+
if (lines.length < 2) {
|
|
194
|
+
this.log("warning", "SRT block contains less than 2 lines")
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/* skip optional sequence number line (first line) */
|
|
199
|
+
let lineIdx = 0
|
|
200
|
+
if (/^\d+$/.test(lines[0].trim()))
|
|
201
|
+
lineIdx = 1
|
|
202
|
+
|
|
203
|
+
/* parse timestamp line */
|
|
204
|
+
const timeLine = lines[lineIdx]
|
|
205
|
+
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})/)
|
|
206
|
+
if (!timeMatch) {
|
|
207
|
+
this.log("warning", "SRT contains invalid timestamp line")
|
|
208
|
+
continue
|
|
209
|
+
}
|
|
210
|
+
const start = parseTimestamp(timeMatch[1])
|
|
211
|
+
const end = parseTimestamp(timeMatch[2])
|
|
212
|
+
|
|
213
|
+
/* collect text lines */
|
|
214
|
+
const textLines = lines.slice(lineIdx + 1).join("\n")
|
|
215
|
+
const text = stripHtmlTags(textLines).trim()
|
|
216
|
+
if (text !== "")
|
|
217
|
+
results.push({ start, end, text })
|
|
218
|
+
}
|
|
219
|
+
return results
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/* parse VTT format */
|
|
223
|
+
const parseVTT = (input: string): TextChunk[] => {
|
|
224
|
+
const results: TextChunk[] = []
|
|
225
|
+
|
|
226
|
+
/* remove VTT header and any metadata */
|
|
227
|
+
const content = input.trim().replace(/^WEBVTT[^\r\n]*\r?\n*/, "")
|
|
228
|
+
|
|
229
|
+
/* iterate over all blocks */
|
|
230
|
+
const blocks = content.trim().split(/\r?\n\r?\n+/)
|
|
231
|
+
for (const block of blocks) {
|
|
232
|
+
const lines = block.trim().split(/\r?\n/)
|
|
233
|
+
if (lines.length < 1) {
|
|
234
|
+
this.log("warning", "VTT block contains less than 1 line")
|
|
235
|
+
continue
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/* skip optional cue identifier lines */
|
|
239
|
+
let lineIdx = 0
|
|
240
|
+
while (lineIdx < lines.length && !lines[lineIdx].includes("-->"))
|
|
241
|
+
lineIdx++
|
|
242
|
+
if (lineIdx >= lines.length)
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
/* parse timestamp line */
|
|
246
|
+
const timeLine = lines[lineIdx]
|
|
247
|
+
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/)
|
|
248
|
+
if (!timeMatch) {
|
|
249
|
+
this.log("warning", "VTT contains invalid timestamp line")
|
|
250
|
+
continue
|
|
251
|
+
}
|
|
252
|
+
const start = parseTimestamp(timeMatch[1])
|
|
253
|
+
const end = parseTimestamp(timeMatch[2])
|
|
254
|
+
|
|
255
|
+
/* collect text lines */
|
|
256
|
+
const textLines = lines.slice(lineIdx + 1).join("\n")
|
|
257
|
+
const text = stripHtmlTags(textLines).trim()
|
|
258
|
+
if (text !== "")
|
|
259
|
+
results.push({ start, end, text })
|
|
260
|
+
}
|
|
261
|
+
return results
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/* buffer for accumulating input */
|
|
265
|
+
let buffer = ""
|
|
266
|
+
|
|
267
|
+
/* establish a duplex stream */
|
|
268
|
+
const self = this
|
|
269
|
+
this.stream = new Stream.Transform({
|
|
270
|
+
readableObjectMode: true,
|
|
271
|
+
writableObjectMode: true,
|
|
272
|
+
decodeStrings: false,
|
|
273
|
+
highWaterMark: 1,
|
|
274
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
275
|
+
/* sanity check text chunks */
|
|
276
|
+
if (Buffer.isBuffer(chunk.payload)) {
|
|
277
|
+
callback(new Error("invalid chunk payload type"))
|
|
278
|
+
return
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/* short-circuit processing in case of empty payloads */
|
|
282
|
+
if (chunk.payload === "") {
|
|
283
|
+
this.push(chunk)
|
|
284
|
+
callback()
|
|
285
|
+
return
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* accumulate input */
|
|
289
|
+
buffer += chunk.payload
|
|
290
|
+
|
|
291
|
+
/* parse accumulated input */
|
|
292
|
+
try {
|
|
293
|
+
/* parse entries */
|
|
294
|
+
const entries = (self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer))
|
|
295
|
+
|
|
296
|
+
/* emit parsed entries as individual chunks */
|
|
297
|
+
for (const entry of entries) {
|
|
298
|
+
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
299
|
+
this.push(chunkNew)
|
|
139
300
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
})
|
|
149
|
-
}
|
|
301
|
+
|
|
302
|
+
/* clear buffer after successful parse */
|
|
303
|
+
buffer = ""
|
|
304
|
+
callback()
|
|
305
|
+
}
|
|
306
|
+
catch (error: unknown) {
|
|
307
|
+
buffer = ""
|
|
308
|
+
callback(util.ensureError(error))
|
|
150
309
|
}
|
|
151
310
|
},
|
|
152
311
|
final (callback) {
|
|
153
|
-
|
|
312
|
+
/* process any remaining buffer content */
|
|
313
|
+
if (buffer.trim() !== "") {
|
|
314
|
+
try {
|
|
315
|
+
/* parse entries */
|
|
316
|
+
const entries = self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer)
|
|
317
|
+
|
|
318
|
+
/* emit parsed entries as individual chunks */
|
|
319
|
+
for (const entry of entries) {
|
|
320
|
+
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
321
|
+
this.push(chunkNew)
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
catch (_error: unknown) {
|
|
325
|
+
/* ignore parse errors on final flush */
|
|
326
|
+
}
|
|
327
|
+
}
|
|
154
328
|
callback()
|
|
155
329
|
}
|
|
156
330
|
})
|
|
@@ -239,13 +413,11 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
239
413
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
240
414
|
if (Buffer.isBuffer(chunk.payload))
|
|
241
415
|
callback(new Error("invalid chunk payload type"))
|
|
416
|
+
else if (chunk.payload === "")
|
|
417
|
+
callback()
|
|
242
418
|
else {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
else {
|
|
246
|
-
emit(chunk)
|
|
247
|
-
callback()
|
|
248
|
-
}
|
|
419
|
+
emit(chunk)
|
|
420
|
+
callback()
|
|
249
421
|
}
|
|
250
422
|
},
|
|
251
423
|
final (callback) {
|
|
@@ -19,6 +19,9 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
19
19
|
/* cached regular expression instance */
|
|
20
20
|
private cachedRegExp = new util.CachedRegExp()
|
|
21
21
|
|
|
22
|
+
/* internal state */
|
|
23
|
+
private closing = false
|
|
24
|
+
|
|
22
25
|
/* construct node */
|
|
23
26
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
24
27
|
super(id, cfg, opts, args)
|
|
@@ -39,17 +42,20 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
39
42
|
|
|
40
43
|
/* open node */
|
|
41
44
|
async open () {
|
|
45
|
+
/* clear destruction flag */
|
|
46
|
+
this.closing = false
|
|
47
|
+
|
|
42
48
|
/* helper function for comparing two values */
|
|
43
49
|
const comparison = (val1: any, op: string, val2: any) => {
|
|
44
50
|
if (op === "==" || op === "!=") {
|
|
45
51
|
/* equal comparison */
|
|
46
|
-
const str1 = (typeof val1 === "string" ? val1 : val1.toString())
|
|
47
|
-
const str2 = (typeof val2 === "string" ? val2 : val2.toString())
|
|
52
|
+
const str1 = (typeof val1 === "string" ? val1 : val1.toString())
|
|
53
|
+
const str2 = (typeof val2 === "string" ? val2 : val2.toString())
|
|
48
54
|
return (op === "==" ? (str1 === str2) : (str1 !== str2))
|
|
49
55
|
}
|
|
50
56
|
else if (op === "~~" || op === "!~") {
|
|
51
57
|
/* regular expression comparison */
|
|
52
|
-
const str = (typeof val1 === "string" ? val1 : val1.toString())
|
|
58
|
+
const str = (typeof val1 === "string" ? val1 : val1.toString())
|
|
53
59
|
const regexp = (
|
|
54
60
|
val2 instanceof RegExp ?
|
|
55
61
|
val2 :
|
|
@@ -93,6 +99,10 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
93
99
|
decodeStrings: false,
|
|
94
100
|
highWaterMark: 1,
|
|
95
101
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
102
|
+
if (self.closing) {
|
|
103
|
+
callback(new Error("stream already destroyed"))
|
|
104
|
+
return
|
|
105
|
+
}
|
|
96
106
|
let val1: any
|
|
97
107
|
const val2: any = self.params.val
|
|
98
108
|
const m = self.params.var.match(/^meta:(.+)$/)
|
|
@@ -117,7 +127,6 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
117
127
|
callback()
|
|
118
128
|
},
|
|
119
129
|
final (callback) {
|
|
120
|
-
this.push(null)
|
|
121
130
|
callback()
|
|
122
131
|
}
|
|
123
132
|
})
|
|
@@ -125,6 +134,9 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
125
134
|
|
|
126
135
|
/* close node */
|
|
127
136
|
async close () {
|
|
137
|
+
/* indicate closing */
|
|
138
|
+
this.closing = true
|
|
139
|
+
|
|
128
140
|
/* shutdown stream */
|
|
129
141
|
if (this.stream !== null) {
|
|
130
142
|
await util.destroyStream(this.stream)
|
|
@@ -28,10 +28,10 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
|
|
|
28
28
|
|
|
29
29
|
/* declare node configuration parameters */
|
|
30
30
|
this.configure({
|
|
31
|
-
type: { type: "string", pos: 0, val: "audio",
|
|
32
|
-
name: { type: "string", pos: 1, val: "trace"
|
|
31
|
+
type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
|
|
32
|
+
name: { type: "string", pos: 1, val: "trace" },
|
|
33
33
|
mode: { type: "string", pos: 2, val: "filter", match: /^(?:filter|sink)$/ },
|
|
34
|
-
dashboard: { type: "string", val: ""
|
|
34
|
+
dashboard: { type: "string", val: "" }
|
|
35
35
|
})
|
|
36
36
|
|
|
37
37
|
/* sanity check parameters */
|
|
@@ -119,11 +119,6 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
|
|
|
119
119
|
}
|
|
120
120
|
},
|
|
121
121
|
final (callback) {
|
|
122
|
-
if (self.closing || self.params.mode === "sink") {
|
|
123
|
-
callback()
|
|
124
|
-
return
|
|
125
|
-
}
|
|
126
|
-
this.push(null)
|
|
127
122
|
callback()
|
|
128
123
|
}
|
|
129
124
|
})
|
|
@@ -20,10 +20,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
20
20
|
public static name = "xio-device"
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
|
-
private io: PortAudio.IoStreamRead
|
|
24
|
-
| PortAudio.IoStreamWrite
|
|
25
|
-
| PortAudio.IoStreamDuplex
|
|
26
|
-
| null = null
|
|
23
|
+
private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
|
|
27
24
|
|
|
28
25
|
/* construct node */
|
|
29
26
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -87,7 +84,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
87
84
|
return device
|
|
88
85
|
}
|
|
89
86
|
|
|
90
|
-
/* NOTICE: "
|
|
87
|
+
/* NOTICE: "naudiodon" actually implements Stream.{Readable,Writable,Duplex}, but
|
|
91
88
|
declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
|
|
92
89
|
so it is correct to cast it back to Stream.{Readable,Writable,Duplex}
|
|
93
90
|
in the following device stream setup functions! */
|
|
@@ -118,7 +115,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
118
115
|
this.stream = this.io as unknown as Stream.Duplex
|
|
119
116
|
|
|
120
117
|
/* convert regular stream into object-mode stream */
|
|
121
|
-
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
118
|
+
const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
|
|
122
119
|
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
|
|
123
120
|
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
124
121
|
}
|
|
@@ -161,7 +158,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
161
158
|
this.stream = this.io as unknown as Stream.Writable
|
|
162
159
|
|
|
163
160
|
/* convert regular stream into object-mode stream */
|
|
164
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
161
|
+
const wrapper = util.createTransformStreamForWritableSide("audio", 1)
|
|
165
162
|
this.stream = Stream.compose(wrapper, this.stream)
|
|
166
163
|
}
|
|
167
164
|
|
|
@@ -211,7 +208,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
211
208
|
throw error
|
|
212
209
|
}
|
|
213
210
|
await Promise.race([
|
|
214
|
-
util.
|
|
211
|
+
util.timeout(2 * 1000, "PortAudio abort timeout"),
|
|
215
212
|
new Promise<void>((resolve) => {
|
|
216
213
|
this.io!.abort(() => {
|
|
217
214
|
resolve()
|
|
@@ -219,7 +216,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
219
216
|
}).catch(catchHandler)
|
|
220
217
|
])
|
|
221
218
|
await Promise.race([
|
|
222
|
-
util.
|
|
219
|
+
util.timeout(2 * 1000, "PortAudio quit timeout"),
|
|
223
220
|
new Promise<void>((resolve) => {
|
|
224
221
|
this.io!.quit(() => {
|
|
225
222
|
resolve()
|
|
@@ -128,7 +128,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
128
128
|
}
|
|
129
129
|
|
|
130
130
|
/* convert regular stream into object-mode stream */
|
|
131
|
-
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
131
|
+
const wrapper1 = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
132
132
|
const wrapper2 = util.createTransformStreamForReadableSide(
|
|
133
133
|
this.params.type, () => this.timeZero)
|
|
134
134
|
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
@@ -171,7 +171,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
171
171
|
else
|
|
172
172
|
process.stdout.setEncoding(this.config.textEncoding)
|
|
173
173
|
const chunker = createStdoutChunker()
|
|
174
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
174
|
+
const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
175
175
|
this.stream = Stream.compose(wrapper, chunker)
|
|
176
176
|
}
|
|
177
177
|
else {
|
|
@@ -183,7 +183,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
183
183
|
else
|
|
184
184
|
writable = fs.createWriteStream(this.params.path,
|
|
185
185
|
{ highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
|
|
186
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
186
|
+
const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
187
187
|
this.stream = Stream.compose(wrapper, writable)
|
|
188
188
|
}
|
|
189
189
|
}
|
|
@@ -210,7 +210,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
210
210
|
else resolve()
|
|
211
211
|
})
|
|
212
212
|
}),
|
|
213
|
-
util.
|
|
213
|
+
util.timeout(5000)
|
|
214
214
|
])
|
|
215
215
|
}
|
|
216
216
|
}
|
|
@@ -97,7 +97,8 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
|
|
|
97
97
|
this.log("info", `connection re-opened to MQTT ${this.params.url}`)
|
|
98
98
|
})
|
|
99
99
|
this.broker.on("disconnect", (packet: MQTT.IDisconnectPacket) => {
|
|
100
|
-
|
|
100
|
+
const reasonCode = packet.reasonCode ?? 0
|
|
101
|
+
this.log("info", `connection closed to MQTT ${this.params.url} (reason code: ${reasonCode})`)
|
|
101
102
|
})
|
|
102
103
|
this.chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
|
|
103
104
|
this.broker.on("message", (topic: string, payload: Buffer, packet: MQTT.IPublishPacket) => {
|
|
@@ -107,11 +108,12 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
|
|
|
107
108
|
const chunk = util.streamChunkDecode(payload)
|
|
108
109
|
this.chunkQueue!.write(chunk)
|
|
109
110
|
}
|
|
110
|
-
catch (_err:
|
|
111
|
+
catch (_err: unknown) {
|
|
111
112
|
this.log("warning", `received invalid CBOR chunk from MQTT ${this.params.url}`)
|
|
112
113
|
}
|
|
113
114
|
})
|
|
114
115
|
const self = this
|
|
116
|
+
const reads = new util.PromiseSet<void>()
|
|
115
117
|
this.stream = new Stream.Duplex({
|
|
116
118
|
writableObjectMode: true,
|
|
117
119
|
readableObjectMode: true,
|
|
@@ -134,14 +136,18 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
|
|
|
134
136
|
})
|
|
135
137
|
}
|
|
136
138
|
},
|
|
139
|
+
async final (callback) {
|
|
140
|
+
await reads.awaitAll()
|
|
141
|
+
callback()
|
|
142
|
+
},
|
|
137
143
|
read (size: number) {
|
|
138
144
|
if (self.params.mode === "w")
|
|
139
145
|
throw new Error("read operation on write-only node")
|
|
140
|
-
self.chunkQueue!.read().then((chunk) => {
|
|
146
|
+
reads.add(self.chunkQueue!.read().then((chunk) => {
|
|
141
147
|
this.push(chunk, "binary")
|
|
142
148
|
}).catch((err: Error) => {
|
|
143
149
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
144
|
-
})
|
|
150
|
+
}))
|
|
145
151
|
}
|
|
146
152
|
})
|
|
147
153
|
}
|
|
@@ -109,6 +109,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
109
109
|
this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`)
|
|
110
110
|
})
|
|
111
111
|
const self = this
|
|
112
|
+
const reads = new util.PromiseSet<void>()
|
|
112
113
|
this.stream = new Stream.Duplex({
|
|
113
114
|
writableObjectMode: true,
|
|
114
115
|
readableObjectMode: true,
|
|
@@ -141,14 +142,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
141
142
|
})
|
|
142
143
|
}
|
|
143
144
|
},
|
|
145
|
+
async final (callback) {
|
|
146
|
+
await reads.awaitAll()
|
|
147
|
+
callback()
|
|
148
|
+
},
|
|
144
149
|
read (size: number) {
|
|
145
150
|
if (self.params.mode === "w")
|
|
146
151
|
throw new Error("read operation on write-only node")
|
|
147
|
-
chunkQueue.read().then((chunk) => {
|
|
152
|
+
reads.add(chunkQueue.read().then((chunk) => {
|
|
148
153
|
this.push(chunk, "binary")
|
|
149
154
|
}).catch((err: Error) => {
|
|
150
155
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
151
|
-
})
|
|
156
|
+
}))
|
|
152
157
|
}
|
|
153
158
|
})
|
|
154
159
|
}
|
|
@@ -170,7 +175,8 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
170
175
|
this.log("info", `connection closed to URL ${this.params.connect}`)
|
|
171
176
|
})
|
|
172
177
|
this.client.addEventListener("error", (ev: ErrorEvent) => {
|
|
173
|
-
|
|
178
|
+
const error = util.ensureError(ev.error)
|
|
179
|
+
this.log("error", `error of connection on URL ${this.params.connect}: ${error.message}`)
|
|
174
180
|
})
|
|
175
181
|
const chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
|
|
176
182
|
this.client.addEventListener("message", (ev: MessageEvent) => {
|
|
@@ -190,6 +196,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
190
196
|
})
|
|
191
197
|
this.client.binaryType = "arraybuffer"
|
|
192
198
|
const self = this
|
|
199
|
+
const reads = new util.PromiseSet<void>()
|
|
193
200
|
this.stream = new Stream.Duplex({
|
|
194
201
|
writableObjectMode: true,
|
|
195
202
|
readableObjectMode: true,
|
|
@@ -208,14 +215,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
208
215
|
callback()
|
|
209
216
|
}
|
|
210
217
|
},
|
|
218
|
+
async final (callback) {
|
|
219
|
+
await reads.awaitAll()
|
|
220
|
+
callback()
|
|
221
|
+
},
|
|
211
222
|
read (size: number) {
|
|
212
223
|
if (self.params.mode === "w")
|
|
213
224
|
throw new Error("read operation on write-only node")
|
|
214
|
-
chunkQueue.read().then((chunk) => {
|
|
225
|
+
reads.add(chunkQueue.read().then((chunk) => {
|
|
215
226
|
this.push(chunk, "binary")
|
|
216
227
|
}).catch((err: Error) => {
|
|
217
228
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
218
|
-
})
|
|
229
|
+
}))
|
|
219
230
|
}
|
|
220
231
|
})
|
|
221
232
|
}
|
|
@@ -38,7 +38,7 @@ class AudioSourceProcessor extends AudioWorkletProcessor {
|
|
|
38
38
|
private currentOffset = 0
|
|
39
39
|
|
|
40
40
|
/* node construction */
|
|
41
|
-
constructor() {
|
|
41
|
+
constructor () {
|
|
42
42
|
super()
|
|
43
43
|
|
|
44
44
|
/* receive input chunks */
|
|
@@ -50,7 +50,7 @@ class AudioSourceProcessor extends AudioWorkletProcessor {
|
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
/* process audio frame */
|
|
53
|
-
process(
|
|
53
|
+
process (
|
|
54
54
|
inputs: Float32Array[][], /* unused */
|
|
55
55
|
outputs: Float32Array[][],
|
|
56
56
|
parameters: Record<string, Float32Array> /* unused */
|
|
@@ -117,7 +117,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
|
117
117
|
private activeCaptures = new Map<string, { data: number[], expectedSamples: number }>()
|
|
118
118
|
|
|
119
119
|
/* node construction */
|
|
120
|
-
constructor() {
|
|
120
|
+
constructor () {
|
|
121
121
|
super()
|
|
122
122
|
|
|
123
123
|
/* receive start of capturing command */
|
|
@@ -133,7 +133,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
|
133
133
|
}
|
|
134
134
|
|
|
135
135
|
/* process audio frame */
|
|
136
|
-
process(
|
|
136
|
+
process (
|
|
137
137
|
inputs: Float32Array[][],
|
|
138
138
|
outputs: Float32Array[][], /* unused */
|
|
139
139
|
parameters: Record<string, Float32Array> /* unused */
|