speechflow 2.2.1 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{etc/claude.md → AGENTS.md} +8 -3
- package/CHANGELOG.md +98 -1
- package/README.md +28 -4
- package/etc/speechflow.yaml +3 -1
- package/etc/stx.conf +1 -1
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-api.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-main-api.js +57 -16
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
- package/speechflow-cli/dst/speechflow-main-config.js +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +55 -21
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +6 -3
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +17 -19
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +25 -8
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +16 -13
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +7 -4
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +21 -16
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +33 -11
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +4 -3
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +19 -11
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +33 -29
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +42 -23
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +13 -5
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +8 -2
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +33 -27
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +16 -5
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js +194 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +24 -10
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +22 -7
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +4 -2
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +10 -2
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +3 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +160 -57
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +34 -14
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -2
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +18 -7
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +27 -15
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +13 -7
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +25 -12
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +32 -20
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +84 -63
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +75 -20
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +5 -7
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio-wt.js +31 -5
- package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +28 -15
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +2 -2
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +13 -3
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +3 -2
- package/speechflow-cli/dst/speechflow-util-misc.js +63 -6
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -17
- package/speechflow-cli/dst/speechflow-util-queue.js +98 -78
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +35 -8
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -1
- package/speechflow-cli/dst/speechflow.d.ts +1 -1
- package/speechflow-cli/dst/speechflow.js +1 -1
- package/speechflow-cli/etc/eslint.mjs +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/etc/stx.conf +8 -2
- package/speechflow-cli/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +2 -1
- package/speechflow-cli/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-cli/package.d/kitten-tts-js+0.1.2.patch +24 -0
- package/speechflow-cli/package.d/speex-resampler+3.0.1.patch +56 -0
- package/speechflow-cli/package.json +40 -30
- package/speechflow-cli/src/lib.d.ts +19 -1
- package/speechflow-cli/src/speechflow-main-api.ts +64 -19
- package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
- package/speechflow-cli/src/speechflow-main-config.ts +1 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +56 -22
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
- package/speechflow-cli/src/speechflow-main-status.ts +6 -3
- package/speechflow-cli/src/speechflow-main.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +19 -20
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +31 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +17 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +9 -8
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +8 -4
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +22 -18
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +43 -16
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +4 -3
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +24 -12
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +10 -9
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +38 -31
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +47 -25
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -6
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +12 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +39 -31
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +16 -5
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-kitten.ts +178 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +24 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +22 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +4 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +10 -2
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +215 -62
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +39 -15
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -3
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +21 -7
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +30 -16
- package/speechflow-cli/src/speechflow-node-xio-file.ts +15 -7
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +28 -15
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +35 -22
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +92 -70
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +79 -22
- package/speechflow-cli/src/speechflow-node.ts +7 -8
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +46 -7
- package/speechflow-cli/src/speechflow-util-audio.ts +31 -17
- package/speechflow-cli/src/speechflow-util-error.ts +3 -3
- package/speechflow-cli/src/speechflow-util-llm.ts +14 -3
- package/speechflow-cli/src/speechflow-util-misc.ts +63 -6
- package/speechflow-cli/src/speechflow-util-queue.ts +103 -81
- package/speechflow-cli/src/speechflow-util-stream.ts +40 -8
- package/speechflow-cli/src/speechflow-util.ts +1 -1
- package/speechflow-cli/src/speechflow.ts +1 -1
- package/speechflow-ui-db/dst/index.html +1 -1
- package/speechflow-ui-db/dst/index.js +15 -15
- package/speechflow-ui-db/etc/eslint.mjs +1 -1
- package/speechflow-ui-db/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-db/etc/stx.conf +1 -1
- package/speechflow-ui-db/etc/stylelint.js +1 -1
- package/speechflow-ui-db/etc/stylelint.yaml +1 -1
- package/speechflow-ui-db/etc/vite-client.mts +1 -1
- package/speechflow-ui-db/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-db/package.json +22 -16
- package/speechflow-ui-db/src/app.styl +1 -1
- package/speechflow-ui-db/src/app.vue +1 -1
- package/speechflow-ui-db/src/index.html +1 -1
- package/speechflow-ui-db/src/index.ts +1 -1
- package/speechflow-ui-st/dst/index.html +1 -1
- package/speechflow-ui-st/dst/index.js +31 -31
- package/speechflow-ui-st/etc/eslint.mjs +1 -1
- package/speechflow-ui-st/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-st/etc/stx.conf +1 -1
- package/speechflow-ui-st/etc/stylelint.js +1 -1
- package/speechflow-ui-st/etc/stylelint.yaml +1 -1
- package/speechflow-ui-st/etc/vite-client.mts +1 -1
- package/speechflow-ui-st/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-st/package.json +23 -17
- package/speechflow-ui-st/src/app.styl +1 -1
- package/speechflow-ui-st/src/app.vue +1 -1
- package/speechflow-ui-st/src/index.html +1 -1
- package/speechflow-ui-st/src/index.ts +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -35,6 +35,66 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
35
35
|
private queueRecv = this.queue.pointerUse("recv")
|
|
36
36
|
private closing = false
|
|
37
37
|
private workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
38
|
+
private lastChunkTime = 0
|
|
39
|
+
|
|
40
|
+
/* known abbreviations from English and German (lowercased),
|
|
41
|
+
which should NOT be treated as sentence boundaries */
|
|
42
|
+
private static abbreviations = new Set([
|
|
43
|
+
"prof", "dr", "mr", "mrs", "ms", "jr", "sr", "st",
|
|
44
|
+
"vs", "etc", "ca", "bzw", "bspw", "usw", "sog", "ggf", "evtl"
|
|
45
|
+
])
|
|
46
|
+
|
|
47
|
+
/* find the first valid sentence boundary in text */
|
|
48
|
+
private static findSentenceBoundary (text: string): { sentence: string, rest: string } | null {
|
|
49
|
+
for (let i = 0; i < text.length; i++) {
|
|
50
|
+
/* match sentence-ending punctuation (including ellipsis "..." and "…") */
|
|
51
|
+
const pm = /^(\.\.\.|\u2026|\.|\?|!)/.exec(text.slice(i, i + 3))
|
|
52
|
+
if (!pm)
|
|
53
|
+
continue
|
|
54
|
+
const firstPunctPos = i
|
|
55
|
+
i += pm[1].length - 1
|
|
56
|
+
|
|
57
|
+
/* extract the word preceding the punctuation mark */
|
|
58
|
+
let j = Math.max(0, firstPunctPos - 1)
|
|
59
|
+
while (j >= 0) {
|
|
60
|
+
/* handle surrogate pairs (for characters outside the BMP) */
|
|
61
|
+
if (j > 0 && /[\uDC00-\uDFFF]/.test(text[j])) {
|
|
62
|
+
if (!/^\p{L}$/u.test(text[j - 1] + text[j]))
|
|
63
|
+
break
|
|
64
|
+
j -= 2
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
if (!/^\p{L}$/u.test(text[j]))
|
|
68
|
+
break
|
|
69
|
+
j--
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
const precedingWord = text.substring(j + 1, firstPunctPos)
|
|
73
|
+
|
|
74
|
+
/* skip abbreviations (only relevant for periods) */
|
|
75
|
+
if (pm[1] === ".") {
|
|
76
|
+
/* skip single-letter abbreviations (handles "U.S.", "e.g.", "i.e.", etc.) */
|
|
77
|
+
if (precedingWord.length === 1 && /^\p{L}$/u.test(precedingWord))
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
/* skip known multi-letter abbreviations (case-insensitive matching) */
|
|
81
|
+
if (SpeechFlowNodeT2TSentence.abbreviations.has(precedingWord.toLowerCase()))
|
|
82
|
+
continue
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* return what follows the punctuation mark
|
|
86
|
+
(also skip over optional closing quotes/parentheses/brackets) */
|
|
87
|
+
const after = text.substring(i + 1)
|
|
88
|
+
const m = after.match(/^(["\u201D\u2019)\]]*)\s+([\s\S]+)$/)
|
|
89
|
+
if (m !== null)
|
|
90
|
+
return { sentence: text.substring(0, i + 1 + m[1].length), rest: m[2] }
|
|
91
|
+
|
|
92
|
+
/* found a punctuation at end of text (possibly with trailing closing chars and whitespace) */
|
|
93
|
+
if (/^["\u201D\u2019)\]]*\s*$/.test(after))
|
|
94
|
+
return { sentence: text.substring(0, i + 1) + after.replace(/\s+$/, ""), rest: "" }
|
|
95
|
+
}
|
|
96
|
+
return null
|
|
97
|
+
}
|
|
38
98
|
|
|
39
99
|
/* construct node */
|
|
40
100
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -53,7 +113,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
53
113
|
|
|
54
114
|
/* concatenate two payloads with proper whitespacing */
|
|
55
115
|
private concatPayload (s1: string, s2: string) {
|
|
56
|
-
if (!(
|
|
116
|
+
if (!(/\s+$/.test(s1) || /^\s+/.test(s2)))
|
|
57
117
|
return `${s1} ${s2}`
|
|
58
118
|
else
|
|
59
119
|
return `${s1}${s2}`
|
|
@@ -64,24 +124,12 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
64
124
|
/* clear destruction flag */
|
|
65
125
|
this.closing = false
|
|
66
126
|
|
|
67
|
-
/* work off queued text frames */
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
/* control working off round */
|
|
74
|
-
if (workingOff)
|
|
75
|
-
return
|
|
76
|
-
workingOff = true
|
|
77
|
-
if (this.workingOffTimer !== null) {
|
|
78
|
-
clearTimeout(this.workingOffTimer)
|
|
79
|
-
this.workingOffTimer = null
|
|
80
|
-
}
|
|
81
|
-
this.queue.off("write", workOffQueue)
|
|
82
|
-
|
|
83
|
-
/* try to work off one or more chunks */
|
|
84
|
-
while (!this.closing) {
|
|
127
|
+
/* work off queued text frames (inner processing) */
|
|
128
|
+
const workOffQueueInner = (): boolean => {
|
|
129
|
+
const maxIterations = 50
|
|
130
|
+
let iterations = 0
|
|
131
|
+
while (!this.closing && iterations < maxIterations) {
|
|
132
|
+
iterations++
|
|
85
133
|
const element = this.queueSplit.peek()
|
|
86
134
|
if (element === undefined)
|
|
87
135
|
break
|
|
@@ -91,46 +139,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
91
139
|
}
|
|
92
140
|
|
|
93
141
|
/* skip elements already completed */
|
|
94
|
-
if (element.type === "text-frame"
|
|
142
|
+
if (element.type === "text-frame"
|
|
143
|
+
&& element.chunk.kind === "final"
|
|
144
|
+
&& element.complete === true) {
|
|
95
145
|
this.queueSplit.walk(+1)
|
|
96
146
|
continue
|
|
97
147
|
}
|
|
98
148
|
|
|
99
149
|
/* perform sentence splitting on input chunk */
|
|
100
150
|
if (element.chunk.kind === "final") {
|
|
151
|
+
element.chunk = element.chunk.clone()
|
|
101
152
|
const chunk = element.chunk
|
|
102
153
|
const payload = chunk.payload as string
|
|
103
|
-
const
|
|
104
|
-
if (
|
|
154
|
+
const boundary = SpeechFlowNodeT2TSentence.findSentenceBoundary(payload)
|
|
155
|
+
if (boundary !== null) {
|
|
105
156
|
/* contains a sentence */
|
|
106
|
-
const
|
|
107
|
-
if (rest !==
|
|
157
|
+
const { sentence, rest } = boundary
|
|
158
|
+
if (rest !== "") {
|
|
108
159
|
/* contains more than a sentence */
|
|
109
160
|
const chunk2 = chunk.clone()
|
|
110
161
|
const duration = Duration.fromMillis(
|
|
111
162
|
chunk.timestampEnd.minus(chunk.timestampStart).toMillis() *
|
|
112
|
-
(sentence.length / payload.length))
|
|
163
|
+
(sentence.length / Math.max(payload.length, 1)))
|
|
113
164
|
chunk2.timestampStart = chunk.timestampStart.plus(duration)
|
|
114
165
|
chunk.timestampEnd = chunk2.timestampStart
|
|
115
166
|
chunk.payload = sentence
|
|
116
167
|
chunk2.payload = rest
|
|
117
168
|
element.complete = true
|
|
118
|
-
this.queue.
|
|
119
|
-
this.queueSplit.touch()
|
|
120
|
-
this.queue.silent(false)
|
|
169
|
+
this.queue.silently(() => { this.queueSplit.touch() })
|
|
121
170
|
this.queueSplit.walk(+1)
|
|
122
171
|
this.queueSplit.insert({ type: "text-frame", chunk: chunk2, complete: false })
|
|
123
172
|
}
|
|
124
173
|
else {
|
|
125
174
|
/* contains just the sentence */
|
|
126
175
|
element.complete = true
|
|
127
|
-
this.queue.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
176
|
+
const position = this.queue.silently(() =>
|
|
177
|
+
this.queueSplit.silently(() => {
|
|
178
|
+
const pos = this.queueSplit.position()
|
|
179
|
+
this.queueSplit.walk(+1)
|
|
180
|
+
return pos
|
|
181
|
+
})
|
|
182
|
+
)
|
|
183
|
+
if (position < this.queue.elements.length)
|
|
184
|
+
this.queueSplit.touch(position)
|
|
134
185
|
}
|
|
135
186
|
}
|
|
136
187
|
else {
|
|
@@ -151,21 +202,52 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
151
202
|
}
|
|
152
203
|
if (element2.chunk.kind === "final") {
|
|
153
204
|
/* merge into following chunk */
|
|
205
|
+
element2.chunk = element2.chunk.clone()
|
|
154
206
|
element2.chunk.timestampStart = element.chunk.timestampStart
|
|
155
207
|
element2.chunk.payload = this.concatPayload(element.chunk.payload as string,
|
|
156
208
|
element2.chunk.payload as string)
|
|
157
209
|
|
|
158
210
|
/* remove current element and touch now current element */
|
|
159
|
-
this.queue.
|
|
160
|
-
this.queueSplit.delete()
|
|
161
|
-
this.queue.silent(false)
|
|
211
|
+
this.queue.silently(() => { this.queueSplit.delete() })
|
|
162
212
|
this.queueSplit.touch()
|
|
163
213
|
}
|
|
164
|
-
else
|
|
165
|
-
|
|
214
|
+
else {
|
|
215
|
+
/* following chunk is intermediate (speculative):
|
|
216
|
+
check timeout to flush incomplete sentence fragment */
|
|
217
|
+
if (this.lastChunkTime > 0
|
|
218
|
+
&& (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
|
|
219
|
+
element.complete = true
|
|
220
|
+
const position2 = this.queue.silently(() =>
|
|
221
|
+
this.queueSplit.silently(() => {
|
|
222
|
+
const pos = this.queueSplit.position()
|
|
223
|
+
this.queueSplit.walk(+1)
|
|
224
|
+
return pos
|
|
225
|
+
})
|
|
226
|
+
)
|
|
227
|
+
if (position2 < this.queue.elements.length)
|
|
228
|
+
this.queueSplit.touch(position2)
|
|
229
|
+
}
|
|
230
|
+
else
|
|
231
|
+
break
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
else if (this.lastChunkTime > 0
|
|
235
|
+
&& (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
|
|
236
|
+
/* no following chunk yet, but timeout expired:
|
|
237
|
+
flush incomplete sentence fragment */
|
|
238
|
+
element.complete = true
|
|
239
|
+
const position = this.queue.silently(() =>
|
|
240
|
+
this.queueSplit.silently(() => {
|
|
241
|
+
const pos = this.queueSplit.position()
|
|
242
|
+
this.queueSplit.walk(+1)
|
|
243
|
+
return pos
|
|
244
|
+
})
|
|
245
|
+
)
|
|
246
|
+
if (position < this.queue.elements.length)
|
|
247
|
+
this.queueSplit.touch(position)
|
|
166
248
|
}
|
|
167
249
|
else {
|
|
168
|
-
/* no following chunk yet */
|
|
250
|
+
/* no following chunk yet, still within timeout */
|
|
169
251
|
break
|
|
170
252
|
}
|
|
171
253
|
}
|
|
@@ -173,18 +255,48 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
173
255
|
else
|
|
174
256
|
break
|
|
175
257
|
}
|
|
258
|
+
return (!this.closing && iterations >= maxIterations)
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/* work off queued text frames (outer processing) */
|
|
262
|
+
let workingOff = false
|
|
263
|
+
const workOffQueue = async () => {
|
|
264
|
+
if (this.closing)
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
/* control working off round */
|
|
268
|
+
if (workingOff)
|
|
269
|
+
return
|
|
270
|
+
workingOff = true
|
|
271
|
+
if (this.workingOffTimer !== null) {
|
|
272
|
+
clearTimeout(this.workingOffTimer)
|
|
273
|
+
this.workingOffTimer = null
|
|
274
|
+
}
|
|
275
|
+
this.queue.off("write", workOffQueue)
|
|
176
276
|
|
|
177
|
-
/*
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
277
|
+
/* try to work off one or more chunks */
|
|
278
|
+
let hasMore = false
|
|
279
|
+
try {
|
|
280
|
+
hasMore = workOffQueueInner()
|
|
281
|
+
}
|
|
282
|
+
catch (error) {
|
|
283
|
+
this.log("error", `sentence splitting error: ${error}`)
|
|
284
|
+
}
|
|
285
|
+
finally {
|
|
286
|
+
/* re-initiate working off round (if still not destroyed) */
|
|
287
|
+
workingOff = false
|
|
288
|
+
if (!this.closing) {
|
|
289
|
+
this.workingOffTimer = setTimeout(workOffQueue, hasMore ? 0 : 100)
|
|
290
|
+
this.queue.once("write", workOffQueue)
|
|
291
|
+
}
|
|
181
292
|
}
|
|
182
|
-
workingOff = false
|
|
183
293
|
}
|
|
184
294
|
this.queue.once("write", workOffQueue)
|
|
185
295
|
|
|
186
296
|
/* provide Duplex stream and internally attach to classifier */
|
|
187
|
-
let
|
|
297
|
+
let previewedPayload = ""
|
|
298
|
+
let flushListenerRegistered = false
|
|
299
|
+
let eofPushed = false
|
|
188
300
|
const self = this
|
|
189
301
|
this.stream = new Stream.Duplex({
|
|
190
302
|
writableObjectMode: true,
|
|
@@ -217,8 +329,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
217
329
|
}
|
|
218
330
|
}
|
|
219
331
|
}
|
|
220
|
-
|
|
332
|
+
previewedPayload = ""
|
|
221
333
|
self.queueRecv.append({ type: "text-frame", chunk, complete: false })
|
|
334
|
+
self.lastChunkTime = Date.now()
|
|
222
335
|
callback()
|
|
223
336
|
}
|
|
224
337
|
},
|
|
@@ -229,6 +342,20 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
229
342
|
callback()
|
|
230
343
|
return
|
|
231
344
|
}
|
|
345
|
+
|
|
346
|
+
/* promote any trailing intermediate chunk to final
|
|
347
|
+
(no replacement will ever arrive, so treat it as final) */
|
|
348
|
+
const recvPos = self.queueRecv.position()
|
|
349
|
+
if (recvPos > 0) {
|
|
350
|
+
const element = self.queueRecv.peek(recvPos - 1)
|
|
351
|
+
if (element
|
|
352
|
+
&& element.type === "text-frame"
|
|
353
|
+
&& element.chunk.kind === "intermediate") {
|
|
354
|
+
element.chunk = element.chunk.clone()
|
|
355
|
+
element.chunk.kind = "final"
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
232
359
|
/* signal end of file */
|
|
233
360
|
self.queueRecv.append({ type: "text-eof" })
|
|
234
361
|
callback()
|
|
@@ -236,17 +363,27 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
236
363
|
|
|
237
364
|
/* send text chunk(s) (readable side of stream) */
|
|
238
365
|
read (_size) {
|
|
366
|
+
/* idempotently push EOF to readable side */
|
|
367
|
+
const pushNull = () => {
|
|
368
|
+
if (eofPushed)
|
|
369
|
+
return
|
|
370
|
+
eofPushed = true
|
|
371
|
+
this.push(null)
|
|
372
|
+
}
|
|
373
|
+
|
|
239
374
|
/* flush pending text chunks */
|
|
240
375
|
const flushPendingChunks = () => {
|
|
376
|
+
flushListenerRegistered = false
|
|
241
377
|
if (self.closing) {
|
|
242
|
-
|
|
378
|
+
pushNull()
|
|
243
379
|
return
|
|
244
380
|
}
|
|
245
381
|
const element = self.queueSend.peek()
|
|
246
382
|
if (element !== undefined
|
|
247
383
|
&& element.type === "text-eof") {
|
|
248
|
-
|
|
384
|
+
pushNull()
|
|
249
385
|
self.queueSend.walk(+1)
|
|
386
|
+
self.queue.trim()
|
|
250
387
|
}
|
|
251
388
|
else if (element !== undefined
|
|
252
389
|
&& element.type === "text-frame"
|
|
@@ -258,7 +395,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
258
395
|
if (nextElement === undefined)
|
|
259
396
|
break
|
|
260
397
|
else if (nextElement.type === "text-eof") {
|
|
261
|
-
|
|
398
|
+
pushNull()
|
|
262
399
|
self.queueSend.walk(+1)
|
|
263
400
|
eofSeen = true
|
|
264
401
|
break
|
|
@@ -266,20 +403,22 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
266
403
|
else if (nextElement.type === "text-frame"
|
|
267
404
|
&& nextElement.complete !== true)
|
|
268
405
|
break
|
|
269
|
-
self.log("info", `send text
|
|
406
|
+
self.log("info", `send text/complete (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)} pos=${self.queueSend.position()}`)
|
|
270
407
|
this.push(nextElement.chunk)
|
|
271
408
|
self.queueSend.walk(+1)
|
|
272
|
-
self.queue.trim()
|
|
273
409
|
}
|
|
410
|
+
previewedPayload = ""
|
|
411
|
+
self.queue.trim()
|
|
274
412
|
|
|
275
413
|
/* wait for more data (unless end-of-stream was reached) */
|
|
276
|
-
if (!eofSeen && !self.closing)
|
|
414
|
+
if (!eofSeen && !self.closing && !flushListenerRegistered) {
|
|
415
|
+
flushListenerRegistered = true
|
|
277
416
|
self.queue.once("write", flushPendingChunks)
|
|
417
|
+
}
|
|
278
418
|
}
|
|
279
419
|
else if (element !== undefined
|
|
280
420
|
&& element.type === "text-frame"
|
|
281
421
|
&& element.complete === false
|
|
282
|
-
&& !previewed
|
|
283
422
|
&& self.params.interim === true) {
|
|
284
423
|
/* merge together all still queued elements and
|
|
285
424
|
send this out as an intermediate chunk as preview */
|
|
@@ -293,17 +432,30 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
293
432
|
break
|
|
294
433
|
previewChunk.payload = self.concatPayload(
|
|
295
434
|
previewChunk.payload as string, element2.chunk.payload as string)
|
|
435
|
+
previewChunk.timestampEnd = element2.chunk.timestampEnd
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/* send preview only if payload actually changed */
|
|
439
|
+
if ((previewChunk.payload as string) !== previewedPayload) {
|
|
440
|
+
this.push(previewChunk)
|
|
441
|
+
self.log("info", `send text/preview (intermediate): ${JSON.stringify(previewChunk.payload)}`)
|
|
442
|
+
previewedPayload = previewChunk.payload as string
|
|
296
443
|
}
|
|
297
|
-
this.push(previewChunk)
|
|
298
|
-
self.log("info", `send text 2 (intermediate): ${JSON.stringify(previewChunk.payload)}`)
|
|
299
|
-
previewed = true
|
|
300
444
|
|
|
301
445
|
/* wait for more data */
|
|
302
|
-
if (!self.closing)
|
|
446
|
+
if (!self.closing && !flushListenerRegistered) {
|
|
447
|
+
flushListenerRegistered = true
|
|
303
448
|
self.queue.once("write", flushPendingChunks)
|
|
449
|
+
}
|
|
304
450
|
}
|
|
305
|
-
else if (!self.closing)
|
|
451
|
+
else if (!self.closing && !flushListenerRegistered) {
|
|
452
|
+
flushListenerRegistered = true
|
|
306
453
|
self.queue.once("write", flushPendingChunks)
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
if (flushListenerRegistered) {
|
|
457
|
+
self.queue.removeListener("write", flushPendingChunks)
|
|
458
|
+
flushListenerRegistered = false
|
|
307
459
|
}
|
|
308
460
|
flushPendingChunks()
|
|
309
461
|
}
|
|
@@ -321,8 +473,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
321
473
|
this.workingOffTimer = null
|
|
322
474
|
}
|
|
323
475
|
|
|
324
|
-
/* remove any pending event listeners */
|
|
476
|
+
/* remove any pending event listeners and clear queue */
|
|
325
477
|
this.queue.removeAllListeners("write")
|
|
478
|
+
this.queue.clear()
|
|
326
479
|
|
|
327
480
|
/* shutdown stream */
|
|
328
481
|
if (this.stream !== null) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -112,6 +112,9 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
112
112
|
/* produce SRT/VTT blocks */
|
|
113
113
|
let output = convertSingle(timestampStart, timestampEnd, chunk.payload)
|
|
114
114
|
if (this.params.words) {
|
|
115
|
+
if (words.length === 0)
|
|
116
|
+
this.log("warning", "word-level subtitle highlighting requested but no word-level timing data available")
|
|
117
|
+
|
|
115
118
|
/* produce additional SRT/VTT blocks with each word highlighted */
|
|
116
119
|
const occurrences = new Map<string, number>()
|
|
117
120
|
for (const word of words) {
|
|
@@ -166,7 +169,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
166
169
|
else if (this.params.mode === "import") {
|
|
167
170
|
/* parse timestamp in SRT format ("HH:MM:SS,mmm") or VTT format ("HH:MM:SS.mmm") */
|
|
168
171
|
const parseTimestamp = (ts: string): Duration => {
|
|
169
|
-
const match = ts.match(/^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/)
|
|
172
|
+
const match = ts.match(/^(\d{2,}):(\d{2}):(\d{2})[,.](\d{3})$/)
|
|
170
173
|
if (!match)
|
|
171
174
|
throw new Error(`invalid timestamp format: "${ts}"`)
|
|
172
175
|
const hours = Number.parseInt(match[1], 10)
|
|
@@ -202,7 +205,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
202
205
|
|
|
203
206
|
/* parse timestamp line */
|
|
204
207
|
const timeLine = lines[lineIdx]
|
|
205
|
-
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})/)
|
|
208
|
+
const timeMatch = timeLine.match(/^(\d{2,}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2,}:\d{2}:\d{2},\d{3})/)
|
|
206
209
|
if (!timeMatch) {
|
|
207
210
|
this.log("warning", "SRT contains invalid timestamp line")
|
|
208
211
|
continue
|
|
@@ -230,8 +233,8 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
230
233
|
const blocks = content.trim().split(/\r?\n\r?\n+/)
|
|
231
234
|
for (const block of blocks) {
|
|
232
235
|
const lines = block.trim().split(/\r?\n/)
|
|
233
|
-
if (lines.length <
|
|
234
|
-
this.log("warning", "VTT block contains fewer than
|
|
236
|
+
if (lines.length < 2) {
|
|
237
|
+
this.log("warning", "VTT block contains fewer than 2 lines")
|
|
235
238
|
continue
|
|
236
239
|
}
|
|
237
240
|
|
|
@@ -244,7 +247,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
244
247
|
|
|
245
248
|
/* parse timestamp line */
|
|
246
249
|
const timeLine = lines[lineIdx]
|
|
247
|
-
const timeMatch = timeLine.match(/^(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/)
|
|
250
|
+
const timeMatch = timeLine.match(/^(\d{2,}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2,}:\d{2}:\d{2}\.\d{3})/)
|
|
248
251
|
if (!timeMatch) {
|
|
249
252
|
this.log("warning", "VTT contains invalid timestamp line")
|
|
250
253
|
continue
|
|
@@ -288,25 +291,42 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
288
291
|
/* accumulate input */
|
|
289
292
|
buffer += chunk.payload
|
|
290
293
|
|
|
291
|
-
/*
|
|
294
|
+
/* find the last double-newline boundary to separate
|
|
295
|
+
complete blocks from a potentially incomplete trailing block */
|
|
296
|
+
const boundary = /\r?\n\r?\n/g
|
|
297
|
+
let lastBoundaryEnd = -1
|
|
298
|
+
let match: RegExpExecArray | null
|
|
299
|
+
while ((match = boundary.exec(buffer)) !== null)
|
|
300
|
+
lastBoundaryEnd = match.index + match[0].length
|
|
301
|
+
|
|
302
|
+
/* if no complete block boundary found, wait for more data */
|
|
303
|
+
if (lastBoundaryEnd < 0) {
|
|
304
|
+
callback()
|
|
305
|
+
return
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/* split buffer into complete portion and remainder */
|
|
309
|
+
const complete = buffer.substring(0, lastBoundaryEnd)
|
|
310
|
+
const remainder = buffer.substring(lastBoundaryEnd)
|
|
311
|
+
|
|
312
|
+
/* parse only the complete portion */
|
|
292
313
|
try {
|
|
293
314
|
/* parse entries */
|
|
294
|
-
const entries = (self.params.format === "srt" ? parseSRT(
|
|
315
|
+
const entries = (self.params.format === "srt" ? parseSRT(complete) : parseVTT(complete))
|
|
295
316
|
|
|
296
317
|
/* emit parsed entries as individual chunks */
|
|
297
318
|
for (const entry of entries) {
|
|
298
319
|
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
299
320
|
this.push(chunkNew)
|
|
300
321
|
}
|
|
301
|
-
|
|
302
|
-
/* clear buffer after successful parse */
|
|
303
|
-
buffer = ""
|
|
304
|
-
callback()
|
|
305
322
|
}
|
|
306
323
|
catch (error: unknown) {
|
|
307
|
-
|
|
308
|
-
callback(util.ensureError(error))
|
|
324
|
+
self.log("warning", `subtitle parse error: ${util.ensureError(error).message}`)
|
|
309
325
|
}
|
|
326
|
+
|
|
327
|
+
/* keep only the unparsed remainder in the buffer */
|
|
328
|
+
buffer = remainder
|
|
329
|
+
callback()
|
|
310
330
|
},
|
|
311
331
|
final (callback) {
|
|
312
332
|
/* process any remaining buffer content */
|
|
@@ -402,7 +422,11 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
402
422
|
const emit = (chunk: SpeechFlowChunk) => {
|
|
403
423
|
const data = JSON.stringify(chunk)
|
|
404
424
|
for (const info of wsPeers.values())
|
|
405
|
-
info.ws.
|
|
425
|
+
if (info.ws.readyState === WebSocket.OPEN)
|
|
426
|
+
info.ws.send(data, (err) => {
|
|
427
|
+
if (err)
|
|
428
|
+
this.log("warning", `HAPI: WebSocket: subtitle send failed: ${err.message}`)
|
|
429
|
+
})
|
|
406
430
|
}
|
|
407
431
|
|
|
408
432
|
/* establish writable stream */
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -103,7 +103,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
103
103
|
|
|
104
104
|
/* count sentences in text */
|
|
105
105
|
private countSentences (text: string): number {
|
|
106
|
-
const matches = text.match(/[.;?!]/g)
|
|
106
|
+
const matches = text.match(/[.;?!]+(?:\s|$)/g)
|
|
107
107
|
return matches ? matches.length : 0
|
|
108
108
|
}
|
|
109
109
|
|
|
@@ -186,7 +186,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
186
186
|
},
|
|
187
187
|
final (callback) {
|
|
188
188
|
/* generate final summary if there is accumulated text */
|
|
189
|
-
if (self.accumulatedText.length > 0
|
|
189
|
+
if (self.accumulatedText.length > 0) {
|
|
190
190
|
self.sentencesSinceLastSummary = 0
|
|
191
191
|
self.log("info", "generating final summary of accumulated text")
|
|
192
192
|
const textToSummarize = self.accumulatedText
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -47,6 +47,7 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
47
47
|
|
|
48
48
|
/* helper function for comparing two values */
|
|
49
49
|
const comparison = (val1: any, op: string, val2: any) => {
|
|
50
|
+
val1 ??= ""
|
|
50
51
|
if (op === "==" || op === "!=") {
|
|
51
52
|
/* equal comparison */
|
|
52
53
|
const str1 = (typeof val1 === "string" ? val1 : val1.toString())
|
|
@@ -73,8 +74,8 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
73
74
|
/* non-equal comparison */
|
|
74
75
|
const coerceNum = (val: any) =>
|
|
75
76
|
typeof val === "number" ? val : (
|
|
76
|
-
typeof val === "string" && val.match(/^[
|
|
77
|
-
typeof val === "string" && val.match(/^[\d
|
|
77
|
+
typeof val === "string" && val.match(/^[+-]?\d+$/) ? Number.parseInt(val, 10) : (
|
|
78
|
+
typeof val === "string" && val.match(/^[+-]?(\d+\.?\d*|\d*\.?\d+)$/) ?
|
|
78
79
|
Number.parseFloat(val) :
|
|
79
80
|
Number(val)
|
|
80
81
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|