speechflow 1.6.7 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +28 -14
- package/etc/secretlint.json +7 -0
- package/etc/speechflow.yaml +13 -4
- package/etc/stx.conf +3 -2
- package/package.json +8 -6
- package/speechflow-cli/dst/speechflow-main-api.js +3 -3
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +6 -6
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +35 -1
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -2
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -13
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +21 -16
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +75 -46
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +39 -39
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +0 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +2 -2
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -3
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +7 -2
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +14 -4
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +2 -2
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
- package/speechflow-cli/dst/speechflow-util-queue.js +33 -12
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
- package/speechflow-cli/dst/speechflow-util-stream.js +13 -17
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +6 -1
- package/speechflow-cli/etc/stx.conf +1 -0
- package/speechflow-cli/package.json +28 -27
- package/speechflow-cli/src/speechflow-main-api.ts +3 -6
- package/speechflow-cli/src/speechflow-main-graph.ts +6 -8
- package/speechflow-cli/src/speechflow-main-status.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +1 -2
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +0 -1
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +21 -16
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +24 -16
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +86 -54
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +41 -38
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +0 -1
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +0 -5
- package/speechflow-cli/src/speechflow-node-xio-device.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -3
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +7 -2
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +14 -4
- package/speechflow-cli/src/speechflow-util-audio.ts +2 -2
- package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +36 -16
- package/speechflow-cli/src/speechflow-util-stream.ts +24 -21
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/dst/index.css +1 -5
- package/speechflow-ui-db/dst/index.js +14 -58
- package/speechflow-ui-db/etc/stx.conf +5 -16
- package/speechflow-ui-db/package.json +16 -15
- package/speechflow-ui-st/dst/index.css +1 -5
- package/speechflow-ui-st/dst/index.js +31 -160
- package/speechflow-ui-st/etc/stx.conf +5 -16
- package/speechflow-ui-st/package.json +17 -16
|
@@ -20,10 +20,16 @@ import HAPIWebSocket from "hapi-plugin-websocket"
|
|
|
20
20
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
21
21
|
import * as util from "./speechflow-util"
|
|
22
22
|
|
|
23
|
+
/* internal helper types */
|
|
23
24
|
type WSPeerInfo = {
|
|
24
|
-
ctx:
|
|
25
|
-
ws:
|
|
26
|
-
req:
|
|
25
|
+
ctx: Record<string, any>
|
|
26
|
+
ws: WebSocket
|
|
27
|
+
req: http.IncomingMessage
|
|
28
|
+
}
|
|
29
|
+
type TextChunk = {
|
|
30
|
+
start: Duration
|
|
31
|
+
end: Duration
|
|
32
|
+
text: string
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
/* SpeechFlow node for subtitle (text-to-text) "translations" */
|
|
@@ -43,14 +49,14 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
43
49
|
this.configure({
|
|
44
50
|
format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
|
|
45
51
|
words: { type: "boolean", val: false },
|
|
46
|
-
mode: { type: "string", val: "export", match: /^(?:export|render)$/ },
|
|
52
|
+
mode: { type: "string", val: "export", match: /^(?:export|import|render)$/ },
|
|
47
53
|
addr: { type: "string", val: "127.0.0.1" },
|
|
48
54
|
port: { type: "number", val: 8585 }
|
|
49
55
|
})
|
|
50
56
|
|
|
51
57
|
/* declare node input/output format */
|
|
52
58
|
this.input = "text"
|
|
53
|
-
this.output = this.params.mode === "export" ? "text" : "none"
|
|
59
|
+
this.output = (this.params.mode === "export" || this.params.mode === "import") ? "text" : "none"
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
/* open node */
|
|
@@ -95,11 +101,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
95
101
|
}
|
|
96
102
|
return text
|
|
97
103
|
}
|
|
98
|
-
|
|
104
|
+
|
|
105
|
+
/* determine start and end timestamp,
|
|
106
|
+
by using first word's start time and last word's end time (if available),
|
|
107
|
+
to exclude leading and trailing silence parts */
|
|
108
|
+
const words: { word: string, start: Duration, end: Duration }[] = chunk.meta.get("words") ?? []
|
|
109
|
+
const timestampStart = words.length > 0 ? words[0].start : chunk.timestampStart
|
|
110
|
+
const timestampEnd = words.length > 0 ? words[words.length - 1].end : chunk.timestampEnd
|
|
111
|
+
|
|
112
|
+
/* produce SRT/VTT blocks */
|
|
113
|
+
let output = convertSingle(timestampStart, timestampEnd, chunk.payload)
|
|
99
114
|
if (this.params.words) {
|
|
100
|
-
|
|
101
|
-
const words = (chunk.meta.get("words") ?? []) as
|
|
102
|
-
{ word: string, start: Duration, end: Duration }[]
|
|
115
|
+
/* produce additional SRT/VTT blocks with each word highlighted */
|
|
103
116
|
const occurrences = new Map<string, number>()
|
|
104
117
|
for (const word of words) {
|
|
105
118
|
let occurrence = occurrences.get(word.word) ?? 0
|
|
@@ -108,49 +121,210 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
108
121
|
output += convertSingle(word.start, word.end, chunk.payload, word.word, occurrence)
|
|
109
122
|
}
|
|
110
123
|
}
|
|
111
|
-
else
|
|
112
|
-
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
113
124
|
return output
|
|
114
125
|
}
|
|
115
126
|
|
|
116
127
|
/* establish a duplex stream */
|
|
117
128
|
const self = this
|
|
118
|
-
let
|
|
129
|
+
let headerEmitted = false
|
|
119
130
|
this.stream = new Stream.Transform({
|
|
120
131
|
readableObjectMode: true,
|
|
121
132
|
writableObjectMode: true,
|
|
122
133
|
decodeStrings: false,
|
|
123
134
|
highWaterMark: 1,
|
|
124
135
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
125
|
-
if (
|
|
136
|
+
if (!headerEmitted && self.params.format === "vtt") {
|
|
126
137
|
this.push(new SpeechFlowChunk(
|
|
127
138
|
Duration.fromMillis(0), Duration.fromMillis(0),
|
|
128
139
|
"final", "text",
|
|
129
140
|
"WEBVTT\n\n"
|
|
130
141
|
))
|
|
131
|
-
|
|
142
|
+
headerEmitted = true
|
|
132
143
|
}
|
|
133
144
|
if (Buffer.isBuffer(chunk.payload))
|
|
134
145
|
callback(new Error("invalid chunk payload type"))
|
|
146
|
+
else if (chunk.payload === "") {
|
|
147
|
+
this.push(chunk)
|
|
148
|
+
callback()
|
|
149
|
+
}
|
|
135
150
|
else {
|
|
136
|
-
|
|
137
|
-
|
|
151
|
+
convert(chunk).then((payload) => {
|
|
152
|
+
const chunkNew = chunk.clone()
|
|
153
|
+
chunkNew.payload = payload
|
|
154
|
+
this.push(chunkNew)
|
|
138
155
|
callback()
|
|
156
|
+
}).catch((error: unknown) => {
|
|
157
|
+
callback(util.ensureError(error))
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
final (callback) {
|
|
162
|
+
callback()
|
|
163
|
+
}
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
else if (this.params.mode === "import") {
|
|
167
|
+
/* parse timestamp in SRT format ("HH:MM:SS,mmm") or VTT format ("HH:MM:SS.mmm") */
|
|
168
|
+
const parseTimestamp = (ts: string): Duration => {
|
|
169
|
+
const match = ts.match(/^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/)
|
|
170
|
+
if (!match)
|
|
171
|
+
throw new Error(`invalid timestamp format: "${ts}"`)
|
|
172
|
+
const hours = Number.parseInt(match[1], 10)
|
|
173
|
+
const minutes = Number.parseInt(match[2], 10)
|
|
174
|
+
const seconds = Number.parseInt(match[3], 10)
|
|
175
|
+
const milliseconds = Number.parseInt(match[4], 10)
|
|
176
|
+
if (minutes > 59 || seconds > 59)
|
|
177
|
+
throw new Error(`invalid timestamp value "${ts}"`)
|
|
178
|
+
return Duration.fromObject({ hours, minutes, seconds, milliseconds })
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/* strip arbitrary HTML tags */
|
|
182
|
+
const stripHtmlTags = (text: string): string =>
|
|
183
|
+
text.replace(/<\/?[a-zA-Z][^>]*>/g, "")
|
|
184
|
+
|
|
185
|
+
/* parse SRT format */
|
|
186
|
+
const parseSRT = (input: string): TextChunk[] => {
|
|
187
|
+
const results: TextChunk[] = []
|
|
188
|
+
|
|
189
|
+
/* iterate over all blocks */
|
|
190
|
+
const blocks = input.trim().split(/\r?\n\r?\n+/)
|
|
191
|
+
for (const block of blocks) {
|
|
192
|
+
const lines = block.trim().split(/\r?\n/)
|
|
193
|
+
if (lines.length < 2) {
|
|
194
|
+
this.log("warning", "SRT block contains less than 2 lines")
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/* skip optional sequence number line (first line) */
|
|
199
|
+
let lineIdx = 0
|
|
200
|
+
if (/^\d+$/.test(lines[0].trim()))
|
|
201
|
+
lineIdx = 1
|
|
202
|
+
|
|
203
|
+
/* parse timestamp line */
|
|
204
|
+
const timeLine = lines[lineIdx]
|
|
205
|
+
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})/)
|
|
206
|
+
if (!timeMatch) {
|
|
207
|
+
this.log("warning", "SRT contains invalid timestamp line")
|
|
208
|
+
continue
|
|
209
|
+
}
|
|
210
|
+
const start = parseTimestamp(timeMatch[1])
|
|
211
|
+
const end = parseTimestamp(timeMatch[2])
|
|
212
|
+
|
|
213
|
+
/* collect text lines */
|
|
214
|
+
const textLines = lines.slice(lineIdx + 1).join("\n")
|
|
215
|
+
const text = stripHtmlTags(textLines).trim()
|
|
216
|
+
if (text !== "")
|
|
217
|
+
results.push({ start, end, text })
|
|
218
|
+
}
|
|
219
|
+
return results
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/* parse VTT format */
|
|
223
|
+
const parseVTT = (input: string): TextChunk[] => {
|
|
224
|
+
const results: TextChunk[] = []
|
|
225
|
+
|
|
226
|
+
/* remove VTT header and any metadata */
|
|
227
|
+
const content = input.trim().replace(/^WEBVTT[^\r\n]*\r?\n*/, "")
|
|
228
|
+
|
|
229
|
+
/* iterate over all blocks */
|
|
230
|
+
const blocks = content.trim().split(/\r?\n\r?\n+/)
|
|
231
|
+
for (const block of blocks) {
|
|
232
|
+
const lines = block.trim().split(/\r?\n/)
|
|
233
|
+
if (lines.length < 1) {
|
|
234
|
+
this.log("warning", "VTT block contains less than 1 line")
|
|
235
|
+
continue
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/* skip optional cue identifier lines */
|
|
239
|
+
let lineIdx = 0
|
|
240
|
+
while (lineIdx < lines.length && !lines[lineIdx].includes("-->"))
|
|
241
|
+
lineIdx++
|
|
242
|
+
if (lineIdx >= lines.length)
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
/* parse timestamp line */
|
|
246
|
+
const timeLine = lines[lineIdx]
|
|
247
|
+
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/)
|
|
248
|
+
if (!timeMatch) {
|
|
249
|
+
this.log("warning", "VTT contains invalid timestamp line")
|
|
250
|
+
continue
|
|
251
|
+
}
|
|
252
|
+
const start = parseTimestamp(timeMatch[1])
|
|
253
|
+
const end = parseTimestamp(timeMatch[2])
|
|
254
|
+
|
|
255
|
+
/* collect text lines */
|
|
256
|
+
const textLines = lines.slice(lineIdx + 1).join("\n")
|
|
257
|
+
const text = stripHtmlTags(textLines).trim()
|
|
258
|
+
if (text !== "")
|
|
259
|
+
results.push({ start, end, text })
|
|
260
|
+
}
|
|
261
|
+
return results
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/* buffer for accumulating input */
|
|
265
|
+
let buffer = ""
|
|
266
|
+
|
|
267
|
+
/* establish a duplex stream */
|
|
268
|
+
const self = this
|
|
269
|
+
this.stream = new Stream.Transform({
|
|
270
|
+
readableObjectMode: true,
|
|
271
|
+
writableObjectMode: true,
|
|
272
|
+
decodeStrings: false,
|
|
273
|
+
highWaterMark: 1,
|
|
274
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
275
|
+
/* sanity check text chunks */
|
|
276
|
+
if (Buffer.isBuffer(chunk.payload)) {
|
|
277
|
+
callback(new Error("invalid chunk payload type"))
|
|
278
|
+
return
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/* short-circuit processing in case of empty payloads */
|
|
282
|
+
if (chunk.payload === "") {
|
|
283
|
+
this.push(chunk)
|
|
284
|
+
callback()
|
|
285
|
+
return
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* accumulate input */
|
|
289
|
+
buffer += chunk.payload
|
|
290
|
+
|
|
291
|
+
/* parse accumulated input */
|
|
292
|
+
try {
|
|
293
|
+
/* parse entries */
|
|
294
|
+
const entries = (self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer))
|
|
295
|
+
|
|
296
|
+
/* emit parsed entries as individual chunks */
|
|
297
|
+
for (const entry of entries) {
|
|
298
|
+
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
299
|
+
this.push(chunkNew)
|
|
139
300
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
})
|
|
149
|
-
}
|
|
301
|
+
|
|
302
|
+
/* clear buffer after successful parse */
|
|
303
|
+
buffer = ""
|
|
304
|
+
callback()
|
|
305
|
+
}
|
|
306
|
+
catch (error: unknown) {
|
|
307
|
+
buffer = ""
|
|
308
|
+
callback(util.ensureError(error))
|
|
150
309
|
}
|
|
151
310
|
},
|
|
152
311
|
final (callback) {
|
|
153
|
-
|
|
312
|
+
/* process any remaining buffer content */
|
|
313
|
+
if (buffer.trim() !== "") {
|
|
314
|
+
try {
|
|
315
|
+
/* parse entries */
|
|
316
|
+
const entries = self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer)
|
|
317
|
+
|
|
318
|
+
/* emit parsed entries as individual chunks */
|
|
319
|
+
for (const entry of entries) {
|
|
320
|
+
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
321
|
+
this.push(chunkNew)
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
catch (_error: unknown) {
|
|
325
|
+
/* ignore parse errors on final flush */
|
|
326
|
+
}
|
|
327
|
+
}
|
|
154
328
|
callback()
|
|
155
329
|
}
|
|
156
330
|
})
|
|
@@ -239,13 +413,11 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
239
413
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
240
414
|
if (Buffer.isBuffer(chunk.payload))
|
|
241
415
|
callback(new Error("invalid chunk payload type"))
|
|
416
|
+
else if (chunk.payload === "")
|
|
417
|
+
callback()
|
|
242
418
|
else {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
else {
|
|
246
|
-
emit(chunk)
|
|
247
|
-
callback()
|
|
248
|
-
}
|
|
419
|
+
emit(chunk)
|
|
420
|
+
callback()
|
|
249
421
|
}
|
|
250
422
|
},
|
|
251
423
|
final (callback) {
|
|
@@ -118,7 +118,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
118
118
|
this.stream = this.io as unknown as Stream.Duplex
|
|
119
119
|
|
|
120
120
|
/* convert regular stream into object-mode stream */
|
|
121
|
-
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
121
|
+
const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
|
|
122
122
|
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
|
|
123
123
|
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
124
124
|
}
|
|
@@ -161,7 +161,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
161
161
|
this.stream = this.io as unknown as Stream.Writable
|
|
162
162
|
|
|
163
163
|
/* convert regular stream into object-mode stream */
|
|
164
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
164
|
+
const wrapper = util.createTransformStreamForWritableSide("audio", 1)
|
|
165
165
|
this.stream = Stream.compose(wrapper, this.stream)
|
|
166
166
|
}
|
|
167
167
|
|
|
@@ -128,7 +128,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
128
128
|
}
|
|
129
129
|
|
|
130
130
|
/* convert regular stream into object-mode stream */
|
|
131
|
-
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
131
|
+
const wrapper1 = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
132
132
|
const wrapper2 = util.createTransformStreamForReadableSide(
|
|
133
133
|
this.params.type, () => this.timeZero)
|
|
134
134
|
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
@@ -171,7 +171,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
171
171
|
else
|
|
172
172
|
process.stdout.setEncoding(this.config.textEncoding)
|
|
173
173
|
const chunker = createStdoutChunker()
|
|
174
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
174
|
+
const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
175
175
|
this.stream = Stream.compose(wrapper, chunker)
|
|
176
176
|
}
|
|
177
177
|
else {
|
|
@@ -183,7 +183,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
183
183
|
else
|
|
184
184
|
writable = fs.createWriteStream(this.params.path,
|
|
185
185
|
{ highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
|
|
186
|
-
const wrapper = util.createTransformStreamForWritableSide()
|
|
186
|
+
const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
187
187
|
this.stream = Stream.compose(wrapper, writable)
|
|
188
188
|
}
|
|
189
189
|
}
|
|
@@ -112,6 +112,7 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
|
|
|
112
112
|
}
|
|
113
113
|
})
|
|
114
114
|
const self = this
|
|
115
|
+
const reads = new util.PromiseSet<void>()
|
|
115
116
|
this.stream = new Stream.Duplex({
|
|
116
117
|
writableObjectMode: true,
|
|
117
118
|
readableObjectMode: true,
|
|
@@ -134,14 +135,18 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
|
|
|
134
135
|
})
|
|
135
136
|
}
|
|
136
137
|
},
|
|
138
|
+
async final (callback) {
|
|
139
|
+
await reads.awaitAll()
|
|
140
|
+
callback()
|
|
141
|
+
},
|
|
137
142
|
read (size: number) {
|
|
138
143
|
if (self.params.mode === "w")
|
|
139
144
|
throw new Error("read operation on write-only node")
|
|
140
|
-
self.chunkQueue!.read().then((chunk) => {
|
|
145
|
+
reads.add(self.chunkQueue!.read().then((chunk) => {
|
|
141
146
|
this.push(chunk, "binary")
|
|
142
147
|
}).catch((err: Error) => {
|
|
143
148
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
144
|
-
})
|
|
149
|
+
}))
|
|
145
150
|
}
|
|
146
151
|
})
|
|
147
152
|
}
|
|
@@ -109,6 +109,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
109
109
|
this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`)
|
|
110
110
|
})
|
|
111
111
|
const self = this
|
|
112
|
+
const reads = new util.PromiseSet<void>()
|
|
112
113
|
this.stream = new Stream.Duplex({
|
|
113
114
|
writableObjectMode: true,
|
|
114
115
|
readableObjectMode: true,
|
|
@@ -141,14 +142,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
141
142
|
})
|
|
142
143
|
}
|
|
143
144
|
},
|
|
145
|
+
async final (callback) {
|
|
146
|
+
await reads.awaitAll()
|
|
147
|
+
callback()
|
|
148
|
+
},
|
|
144
149
|
read (size: number) {
|
|
145
150
|
if (self.params.mode === "w")
|
|
146
151
|
throw new Error("read operation on write-only node")
|
|
147
|
-
chunkQueue.read().then((chunk) => {
|
|
152
|
+
reads.add(chunkQueue.read().then((chunk) => {
|
|
148
153
|
this.push(chunk, "binary")
|
|
149
154
|
}).catch((err: Error) => {
|
|
150
155
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
151
|
-
})
|
|
156
|
+
}))
|
|
152
157
|
}
|
|
153
158
|
})
|
|
154
159
|
}
|
|
@@ -190,6 +195,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
190
195
|
})
|
|
191
196
|
this.client.binaryType = "arraybuffer"
|
|
192
197
|
const self = this
|
|
198
|
+
const reads = new util.PromiseSet<void>()
|
|
193
199
|
this.stream = new Stream.Duplex({
|
|
194
200
|
writableObjectMode: true,
|
|
195
201
|
readableObjectMode: true,
|
|
@@ -208,14 +214,18 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
208
214
|
callback()
|
|
209
215
|
}
|
|
210
216
|
},
|
|
217
|
+
async final (callback) {
|
|
218
|
+
await reads.awaitAll()
|
|
219
|
+
callback()
|
|
220
|
+
},
|
|
211
221
|
read (size: number) {
|
|
212
222
|
if (self.params.mode === "w")
|
|
213
223
|
throw new Error("read operation on write-only node")
|
|
214
|
-
chunkQueue.read().then((chunk) => {
|
|
224
|
+
reads.add(chunkQueue.read().then((chunk) => {
|
|
215
225
|
this.push(chunk, "binary")
|
|
216
226
|
}).catch((err: Error) => {
|
|
217
227
|
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
218
|
-
})
|
|
228
|
+
}))
|
|
219
229
|
}
|
|
220
230
|
})
|
|
221
231
|
}
|
|
@@ -91,7 +91,7 @@ export function convertBufToI16 (buf: Buffer, littleEndian = true) {
|
|
|
91
91
|
return arr
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
/* helper function: convert
|
|
94
|
+
/* helper function: convert Int16Array in PCM/I16 to Buffer */
|
|
95
95
|
export function convertI16ToBuf (arr: Int16Array, littleEndian = true) {
|
|
96
96
|
if (arr.length === 0)
|
|
97
97
|
return Buffer.alloc(0)
|
|
@@ -252,7 +252,7 @@ export class WebAudio {
|
|
|
252
252
|
|
|
253
253
|
/* start capture first */
|
|
254
254
|
if (this.captureNode !== null) {
|
|
255
|
-
this.captureNode
|
|
255
|
+
this.captureNode.port.postMessage({
|
|
256
256
|
type: "start-capture",
|
|
257
257
|
chunkId,
|
|
258
258
|
expectedSamples: int16Array.length
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* sleep: wait a duration of time and then resolve */
|
|
8
|
+
export function sleep (durationMs: number) {
|
|
9
|
+
return new Promise<void>((resolve, reject) => {
|
|
10
|
+
setTimeout(() => {
|
|
11
|
+
resolve()
|
|
12
|
+
}, durationMs)
|
|
13
|
+
})
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/* timeout: wait a duration of time and then reject */
|
|
17
|
+
export function timeout (durationMs: number) {
|
|
18
|
+
return new Promise<never>((resolve, reject) => {
|
|
19
|
+
setTimeout(() => {
|
|
20
|
+
reject(new Error("timeout"))
|
|
21
|
+
}, durationMs)
|
|
22
|
+
})
|
|
23
|
+
}
|
|
@@ -35,11 +35,9 @@ export class SingleQueue<T> extends EventEmitter {
|
|
|
35
35
|
}
|
|
36
36
|
read () {
|
|
37
37
|
return new Promise<T>((resolve, reject) => {
|
|
38
|
-
const consume = () =>
|
|
39
|
-
this.queue.length > 0 ? this.queue.pop()! : null
|
|
40
38
|
const tryToConsume = () => {
|
|
41
|
-
const item =
|
|
42
|
-
if (item !==
|
|
39
|
+
const item = this.queue.pop()
|
|
40
|
+
if (item !== undefined)
|
|
43
41
|
resolve(item)
|
|
44
42
|
else
|
|
45
43
|
this.once("dequeue", tryToConsume)
|
|
@@ -47,6 +45,11 @@ export class SingleQueue<T> extends EventEmitter {
|
|
|
47
45
|
tryToConsume()
|
|
48
46
|
})
|
|
49
47
|
}
|
|
48
|
+
drain () {
|
|
49
|
+
const items = this.queue
|
|
50
|
+
this.queue = new Array<T>()
|
|
51
|
+
return items
|
|
52
|
+
}
|
|
50
53
|
}
|
|
51
54
|
|
|
52
55
|
/* helper class for double-item queue */
|
|
@@ -67,17 +70,17 @@ export class DoubleQueue<T0, T1> extends EventEmitter {
|
|
|
67
70
|
}
|
|
68
71
|
read () {
|
|
69
72
|
return new Promise<[ T0, T1 ]>((resolve, reject) => {
|
|
70
|
-
const consume = (): [ T0, T1 ] |
|
|
73
|
+
const consume = (): [ T0, T1 ] | undefined => {
|
|
71
74
|
if (this.queue0.length > 0 && this.queue1.length > 0) {
|
|
72
75
|
const item0 = this.queue0.pop() as T0
|
|
73
76
|
const item1 = this.queue1.pop() as T1
|
|
74
77
|
return [ item0, item1 ]
|
|
75
78
|
}
|
|
76
|
-
return
|
|
79
|
+
return undefined
|
|
77
80
|
}
|
|
78
81
|
const tryToConsume = () => {
|
|
79
82
|
const items = consume()
|
|
80
|
-
if (items !==
|
|
83
|
+
if (items !== undefined)
|
|
81
84
|
resolve(items)
|
|
82
85
|
else
|
|
83
86
|
this.once("dequeue", tryToConsume)
|
|
@@ -273,12 +276,12 @@ export class TimeStore<T> extends EventEmitter {
|
|
|
273
276
|
|
|
274
277
|
/* asynchronous queue */
|
|
275
278
|
export class AsyncQueue<T> {
|
|
276
|
-
private queue: Array<T
|
|
277
|
-
private resolvers: (
|
|
278
|
-
write (v: T
|
|
279
|
-
const
|
|
280
|
-
if (
|
|
281
|
-
resolve(v)
|
|
279
|
+
private queue: Array<T> = []
|
|
280
|
+
private resolvers: { resolve: (v: T) => void, reject: (err: Error) => void }[] = []
|
|
281
|
+
write (v: T) {
|
|
282
|
+
const resolver = this.resolvers.shift()
|
|
283
|
+
if (resolver)
|
|
284
|
+
resolver.resolve(v)
|
|
282
285
|
else
|
|
283
286
|
this.queue.push(v)
|
|
284
287
|
}
|
|
@@ -286,11 +289,14 @@ export class AsyncQueue<T> {
|
|
|
286
289
|
if (this.queue.length > 0)
|
|
287
290
|
return this.queue.shift()!
|
|
288
291
|
else
|
|
289
|
-
return new Promise<T
|
|
292
|
+
return new Promise<T>((resolve, reject) => this.resolvers.push({ resolve, reject }))
|
|
293
|
+
}
|
|
294
|
+
empty () {
|
|
295
|
+
return this.queue.length === 0
|
|
290
296
|
}
|
|
291
297
|
destroy () {
|
|
292
|
-
for (const
|
|
293
|
-
|
|
298
|
+
for (const resolver of this.resolvers)
|
|
299
|
+
resolver.reject(new Error("AsyncQueue destroyed"))
|
|
294
300
|
this.resolvers = []
|
|
295
301
|
this.queue = []
|
|
296
302
|
}
|
|
@@ -318,3 +324,17 @@ export class CachedRegExp {
|
|
|
318
324
|
return this.cache.size
|
|
319
325
|
}
|
|
320
326
|
}
|
|
327
|
+
|
|
328
|
+
/* set of promises */
|
|
329
|
+
export class PromiseSet<T> {
|
|
330
|
+
private promises = new Set<Promise<T>>()
|
|
331
|
+
add (promise: Promise<T>) {
|
|
332
|
+
this.promises.add(promise)
|
|
333
|
+
promise.finally(() => {
|
|
334
|
+
this.promises.delete(promise)
|
|
335
|
+
}).catch(() => {})
|
|
336
|
+
}
|
|
337
|
+
async awaitAll () {
|
|
338
|
+
await Promise.all(this.promises)
|
|
339
|
+
}
|
|
340
|
+
}
|