speechflow 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/etc/stx.conf +54 -58
- package/package.json +25 -106
- package/{etc → speechflow-cli/etc}/eslint.mjs +1 -2
- package/speechflow-cli/etc/stx.conf +77 -0
- package/speechflow-cli/package.json +116 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +148 -64
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +39 -11
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +384 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +27 -11
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +313 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +59 -12
- package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +11 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +9 -4
- package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +2 -2
- package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +1 -1
- package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +37 -20
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
- package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +4 -3
- package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +9 -5
- package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +16 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +12 -8
- package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +9 -3
- package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +5 -2
- package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +12 -12
- package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
- package/{src → speechflow-cli/src}/speechflow-utils.ts +78 -44
- package/{src → speechflow-cli/src}/speechflow.ts +188 -53
- package/speechflow-ui-db/etc/eslint.mjs +106 -0
- package/speechflow-ui-db/etc/htmllint.json +55 -0
- package/speechflow-ui-db/etc/stx.conf +79 -0
- package/speechflow-ui-db/etc/stylelint.js +46 -0
- package/speechflow-ui-db/etc/stylelint.yaml +33 -0
- package/speechflow-ui-db/etc/tsc-client.json +30 -0
- package/speechflow-ui-db/etc/tsc.node.json +9 -0
- package/speechflow-ui-db/etc/vite-client.mts +63 -0
- package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-db/package.json +75 -0
- package/speechflow-ui-db/src/app-icon.ai +1989 -4
- package/speechflow-ui-db/src/app-icon.svg +26 -0
- package/speechflow-ui-db/src/app.styl +64 -0
- package/speechflow-ui-db/src/app.vue +221 -0
- package/speechflow-ui-db/src/index.html +23 -0
- package/speechflow-ui-db/src/index.ts +26 -0
- package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
- package/speechflow-ui-db/src/tsconfig.json +3 -0
- package/speechflow-ui-st/etc/eslint.mjs +106 -0
- package/speechflow-ui-st/etc/htmllint.json +55 -0
- package/speechflow-ui-st/etc/stx.conf +79 -0
- package/speechflow-ui-st/etc/stylelint.js +46 -0
- package/speechflow-ui-st/etc/stylelint.yaml +33 -0
- package/speechflow-ui-st/etc/tsc-client.json +30 -0
- package/speechflow-ui-st/etc/tsc.node.json +9 -0
- package/speechflow-ui-st/etc/vite-client.mts +63 -0
- package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-st/package.json +79 -0
- package/speechflow-ui-st/src/app-icon.ai +1989 -4
- package/speechflow-ui-st/src/app-icon.svg +26 -0
- package/speechflow-ui-st/src/app.styl +64 -0
- package/speechflow-ui-st/src/app.vue +142 -0
- package/speechflow-ui-st/src/index.html +23 -0
- package/speechflow-ui-st/src/index.ts +26 -0
- package/speechflow-ui-st/src/lib.d.ts +9 -0
- package/speechflow-ui-st/src/tsconfig.json +3 -0
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
- package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
- package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
- package/dst/speechflow-node-a2a-gender.d.ts +0 -18
- package/dst/speechflow-node-a2a-gender.js +0 -271
- package/dst/speechflow-node-a2a-gender.js.map +0 -1
- package/dst/speechflow-node-a2a-meter.d.ts +0 -12
- package/dst/speechflow-node-a2a-meter.js +0 -155
- package/dst/speechflow-node-a2a-meter.js.map +0 -1
- package/dst/speechflow-node-a2a-mute.d.ts +0 -16
- package/dst/speechflow-node-a2a-mute.js +0 -91
- package/dst/speechflow-node-a2a-mute.js.map +0 -1
- package/dst/speechflow-node-a2a-vad.d.ts +0 -16
- package/dst/speechflow-node-a2a-vad.js +0 -285
- package/dst/speechflow-node-a2a-vad.js.map +0 -1
- package/dst/speechflow-node-a2a-wav.d.ts +0 -11
- package/dst/speechflow-node-a2a-wav.js +0 -195
- package/dst/speechflow-node-a2a-wav.js.map +0 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +0 -15
- package/dst/speechflow-node-a2t-deepgram.js +0 -255
- package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -16
- package/dst/speechflow-node-t2a-elevenlabs.js +0 -195
- package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +0 -13
- package/dst/speechflow-node-t2a-kokoro.js +0 -149
- package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
- package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
- package/dst/speechflow-node-t2t-deepl.js +0 -142
- package/dst/speechflow-node-t2t-deepl.js.map +0 -1
- package/dst/speechflow-node-t2t-format.d.ts +0 -11
- package/dst/speechflow-node-t2t-format.js +0 -82
- package/dst/speechflow-node-t2t-format.js.map +0 -1
- package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
- package/dst/speechflow-node-t2t-ollama.js +0 -247
- package/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/dst/speechflow-node-t2t-openai.d.ts +0 -13
- package/dst/speechflow-node-t2t-openai.js +0 -227
- package/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
- package/dst/speechflow-node-t2t-sentence.js +0 -234
- package/dst/speechflow-node-t2t-sentence.js.map +0 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -13
- package/dst/speechflow-node-t2t-subtitle.js +0 -278
- package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
- package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
- package/dst/speechflow-node-t2t-transformers.js +0 -265
- package/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/dst/speechflow-node-x2x-filter.d.ts +0 -11
- package/dst/speechflow-node-x2x-filter.js +0 -117
- package/dst/speechflow-node-x2x-filter.js.map +0 -1
- package/dst/speechflow-node-x2x-trace.d.ts +0 -11
- package/dst/speechflow-node-x2x-trace.js +0 -111
- package/dst/speechflow-node-x2x-trace.js.map +0 -1
- package/dst/speechflow-node-xio-device.d.ts +0 -13
- package/dst/speechflow-node-xio-device.js +0 -226
- package/dst/speechflow-node-xio-device.js.map +0 -1
- package/dst/speechflow-node-xio-file.d.ts +0 -11
- package/dst/speechflow-node-xio-file.js +0 -210
- package/dst/speechflow-node-xio-file.js.map +0 -1
- package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
- package/dst/speechflow-node-xio-mqtt.js +0 -185
- package/dst/speechflow-node-xio-mqtt.js.map +0 -1
- package/dst/speechflow-node-xio-websocket.d.ts +0 -13
- package/dst/speechflow-node-xio-websocket.js +0 -278
- package/dst/speechflow-node-xio-websocket.js.map +0 -1
- package/dst/speechflow-node.d.ts +0 -65
- package/dst/speechflow-node.js +0 -180
- package/dst/speechflow-node.js.map +0 -1
- package/dst/speechflow-utils.d.ts +0 -69
- package/dst/speechflow-utils.js +0 -486
- package/dst/speechflow-utils.js.map +0 -1
- package/dst/speechflow.js +0 -768
- package/dst/speechflow.js.map +0 -1
- package/src/speechflow-node-a2a-meter.ts +0 -130
- package/src/speechflow-node-a2a-vad.ts +0 -285
- package/src/speechflow-node-a2t-deepgram.ts +0 -234
- package/src/speechflow-node-t2t-subtitle.ts +0 -149
- /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
- /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
- /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
- /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
- /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/* standard dependencies */
|
|
8
|
-
import Stream from "node:stream"
|
|
9
|
-
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import * as Deepgram from "@deepgram/sdk"
|
|
12
|
-
import { DateTime, Duration } from "luxon"
|
|
13
|
-
|
|
14
|
-
/* internal dependencies */
|
|
15
|
-
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
-
import * as utils from "./speechflow-utils"
|
|
17
|
-
|
|
18
|
-
/* SpeechFlow node for Deepgram speech-to-text conversion */
|
|
19
|
-
export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
20
|
-
/* declare official node name */
|
|
21
|
-
public static name = "deepgram"
|
|
22
|
-
|
|
23
|
-
/* internal state */
|
|
24
|
-
private dg: Deepgram.LiveClient | null = null
|
|
25
|
-
|
|
26
|
-
/* construct node */
|
|
27
|
-
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
28
|
-
super(id, cfg, opts, args)
|
|
29
|
-
|
|
30
|
-
/* declare node configuration parameters */
|
|
31
|
-
this.configure({
|
|
32
|
-
key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
|
|
33
|
-
keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
|
|
34
|
-
model: { type: "string", val: "nova-3", pos: 0 },
|
|
35
|
-
version: { type: "string", val: "latest", pos: 1 },
|
|
36
|
-
language: { type: "string", val: "multi", pos: 2 }
|
|
37
|
-
})
|
|
38
|
-
|
|
39
|
-
/* declare node input/output format */
|
|
40
|
-
this.input = "audio"
|
|
41
|
-
this.output = "text"
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/* one-time status of node */
|
|
45
|
-
async status () {
|
|
46
|
-
let balance = 0
|
|
47
|
-
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
48
|
-
const response = await deepgram.manage.getProjects()
|
|
49
|
-
if (response !== null && response.error === null) {
|
|
50
|
-
for (const project of response.result.projects) {
|
|
51
|
-
const response = await deepgram.manage.getProjectBalances(project.project_id)
|
|
52
|
-
if (response !== null && response.error === null)
|
|
53
|
-
balance += response.result.balances[0]?.amount ?? 0
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
return { balance: balance.toFixed(2) }
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/* open node */
|
|
60
|
-
async open () {
|
|
61
|
-
/* sanity check situation */
|
|
62
|
-
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
63
|
-
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
64
|
-
|
|
65
|
-
/* create queue for results */
|
|
66
|
-
const queue = new utils.SingleQueue<SpeechFlowChunk>()
|
|
67
|
-
|
|
68
|
-
/* create a store for the meta information */
|
|
69
|
-
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
70
|
-
|
|
71
|
-
/* connect to Deepgram API */
|
|
72
|
-
const deepgram = Deepgram.createClient(this.params.key)
|
|
73
|
-
let language = "en"
|
|
74
|
-
if (this.params.model.match(/^nova-2/) && this.params.language !== "en")
|
|
75
|
-
language = this.params.language
|
|
76
|
-
else if (this.params.model.match(/^nova-3/) && this.params.language !== "en")
|
|
77
|
-
language = "multi"
|
|
78
|
-
this.dg = deepgram.listen.live({
|
|
79
|
-
mip_opt_out: true,
|
|
80
|
-
model: this.params.model,
|
|
81
|
-
version: this.params.version,
|
|
82
|
-
language,
|
|
83
|
-
channels: this.config.audioChannels,
|
|
84
|
-
sample_rate: this.config.audioSampleRate,
|
|
85
|
-
encoding: "linear16",
|
|
86
|
-
multichannel: false,
|
|
87
|
-
endpointing: 10,
|
|
88
|
-
interim_results: false,
|
|
89
|
-
smart_format: true,
|
|
90
|
-
punctuate: true,
|
|
91
|
-
filler_words: true,
|
|
92
|
-
diarize: false,
|
|
93
|
-
numerals: true,
|
|
94
|
-
profanity_filter: false
|
|
95
|
-
})
|
|
96
|
-
|
|
97
|
-
/* hook onto Deepgram API events */
|
|
98
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
99
|
-
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
100
|
-
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
101
|
-
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
102
|
-
if (text === "")
|
|
103
|
-
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
|
|
104
|
-
else {
|
|
105
|
-
this.log("info", `text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s): "${text}"`)
|
|
106
|
-
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
107
|
-
const end = start.plus({ seconds: data.duration })
|
|
108
|
-
const metas = metastore.fetch(start, end)
|
|
109
|
-
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
110
|
-
curr.forEach((val, key) => { prev.set(key, val) })
|
|
111
|
-
return prev
|
|
112
|
-
}, new Map<string, any>())
|
|
113
|
-
metastore.prune(start)
|
|
114
|
-
meta.set("words", words.map((word) => {
|
|
115
|
-
const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
|
|
116
|
-
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
117
|
-
return { word: word.punctuated_word ?? word.word, start, end }
|
|
118
|
-
}))
|
|
119
|
-
const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
|
|
120
|
-
queue.write(chunk)
|
|
121
|
-
}
|
|
122
|
-
})
|
|
123
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
124
|
-
this.log("info", "metadata received")
|
|
125
|
-
})
|
|
126
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
127
|
-
this.log("info", "connection close")
|
|
128
|
-
})
|
|
129
|
-
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
130
|
-
this.log("error", `error: ${error.message}`)
|
|
131
|
-
this.emit("error")
|
|
132
|
-
})
|
|
133
|
-
|
|
134
|
-
/* wait for Deepgram API to be available */
|
|
135
|
-
await new Promise((resolve, reject) => {
|
|
136
|
-
let timer: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
137
|
-
if (timer !== null) {
|
|
138
|
-
timer = null
|
|
139
|
-
reject(new Error("Deepgram: timeout waiting for connection open"))
|
|
140
|
-
}
|
|
141
|
-
}, 8000)
|
|
142
|
-
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
143
|
-
this.log("info", "connection open")
|
|
144
|
-
if (timer !== null) {
|
|
145
|
-
clearTimeout(timer)
|
|
146
|
-
timer = null
|
|
147
|
-
}
|
|
148
|
-
resolve(true)
|
|
149
|
-
})
|
|
150
|
-
})
|
|
151
|
-
|
|
152
|
-
/* remember opening time to receive time zero offset */
|
|
153
|
-
this.timeOpen = DateTime.now()
|
|
154
|
-
|
|
155
|
-
/* workaround Deepgram initialization problems */
|
|
156
|
-
let initDone = false
|
|
157
|
-
let initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
158
|
-
const initTimeoutStart = () => {
|
|
159
|
-
if (initDone)
|
|
160
|
-
return
|
|
161
|
-
setTimeout(async () => {
|
|
162
|
-
if (initTimeout === null)
|
|
163
|
-
return
|
|
164
|
-
initTimeout = null
|
|
165
|
-
this.log("warning", "initialization timeout -- restarting service usage")
|
|
166
|
-
await this.close()
|
|
167
|
-
this.open()
|
|
168
|
-
}, 3000)
|
|
169
|
-
}
|
|
170
|
-
const initTimeoutStop = () => {
|
|
171
|
-
if (initDone)
|
|
172
|
-
return
|
|
173
|
-
initDone = true
|
|
174
|
-
if (initTimeout !== null) {
|
|
175
|
-
clearTimeout(initTimeout)
|
|
176
|
-
initTimeout = null
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
/* provide Duplex stream and internally attach to Deepgram API */
|
|
181
|
-
const dg = this.dg
|
|
182
|
-
const log = (level: string, msg: string) => {
|
|
183
|
-
this.log(level, msg)
|
|
184
|
-
}
|
|
185
|
-
const encoding = this.config.textEncoding
|
|
186
|
-
this.stream = new Stream.Duplex({
|
|
187
|
-
writableObjectMode: true,
|
|
188
|
-
readableObjectMode: true,
|
|
189
|
-
decodeStrings: false,
|
|
190
|
-
highWaterMark: 1,
|
|
191
|
-
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
192
|
-
if (chunk.type !== "audio")
|
|
193
|
-
callback(new Error("expected audio input chunk"))
|
|
194
|
-
else if (!Buffer.isBuffer(chunk.payload))
|
|
195
|
-
callback(new Error("expected Buffer input chunk"))
|
|
196
|
-
else {
|
|
197
|
-
if (chunk.payload.byteLength > 0) {
|
|
198
|
-
log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
199
|
-
initTimeoutStart()
|
|
200
|
-
if (chunk.meta.size > 0)
|
|
201
|
-
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
202
|
-
dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
203
|
-
}
|
|
204
|
-
callback()
|
|
205
|
-
}
|
|
206
|
-
},
|
|
207
|
-
read (size) {
|
|
208
|
-
queue.read().then((chunk) => {
|
|
209
|
-
log("info", `receive data (${chunk.payload.length} bytes)`)
|
|
210
|
-
initTimeoutStop()
|
|
211
|
-
this.push(chunk, encoding)
|
|
212
|
-
})
|
|
213
|
-
},
|
|
214
|
-
final (callback) {
|
|
215
|
-
dg.requestClose()
|
|
216
|
-
this.push(null)
|
|
217
|
-
callback()
|
|
218
|
-
}
|
|
219
|
-
})
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
/* close node */
|
|
223
|
-
async close () {
|
|
224
|
-
/* close stream */
|
|
225
|
-
if (this.stream !== null) {
|
|
226
|
-
this.stream.destroy()
|
|
227
|
-
this.stream = null
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/* shutdown Deepgram API */
|
|
231
|
-
if (this.dg !== null)
|
|
232
|
-
this.dg.requestClose()
|
|
233
|
-
}
|
|
234
|
-
}
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/* standard dependencies */
|
|
8
|
-
import Stream from "node:stream"
|
|
9
|
-
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import { Duration } from "luxon"
|
|
12
|
-
|
|
13
|
-
/* internal dependencies */
|
|
14
|
-
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
-
|
|
16
|
-
/* SpeechFlow node for subtitle (text-to-text) "translations" */
|
|
17
|
-
export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
|
|
18
|
-
/* declare official node name */
|
|
19
|
-
public static name = "subtitle"
|
|
20
|
-
|
|
21
|
-
/* internal state */
|
|
22
|
-
private sequenceNo = 1
|
|
23
|
-
|
|
24
|
-
/* construct node */
|
|
25
|
-
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
26
|
-
super(id, cfg, opts, args)
|
|
27
|
-
|
|
28
|
-
/* declare node configuration parameters */
|
|
29
|
-
this.configure({
|
|
30
|
-
format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
|
|
31
|
-
words: { type: "boolean", val: false }
|
|
32
|
-
})
|
|
33
|
-
|
|
34
|
-
/* declare node input/output format */
|
|
35
|
-
this.input = "text"
|
|
36
|
-
this.output = "text"
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/* open node */
|
|
40
|
-
async open () {
|
|
41
|
-
this.sequenceNo = 1
|
|
42
|
-
|
|
43
|
-
/* provide text-to-subtitle conversion */
|
|
44
|
-
const convert = async (chunk: SpeechFlowChunk) => {
|
|
45
|
-
if (typeof chunk.payload !== "string")
|
|
46
|
-
throw new Error("chunk payload type must be string")
|
|
47
|
-
const convertSingle = (
|
|
48
|
-
start: Duration,
|
|
49
|
-
end: Duration,
|
|
50
|
-
text: string,
|
|
51
|
-
word?: string,
|
|
52
|
-
occurence?: number
|
|
53
|
-
) => {
|
|
54
|
-
if (word) {
|
|
55
|
-
occurence ??= 1
|
|
56
|
-
let match = 1
|
|
57
|
-
word = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
|
|
58
|
-
text = text.replaceAll(new RegExp(`\\b${word}\\b`, "g"), (m) => {
|
|
59
|
-
if (match++ === occurence)
|
|
60
|
-
return `<b>${m}</b>`
|
|
61
|
-
else
|
|
62
|
-
return m
|
|
63
|
-
})
|
|
64
|
-
}
|
|
65
|
-
if (this.params.format === "srt") {
|
|
66
|
-
const startFmt = start.toFormat("hh:mm:ss,SSS")
|
|
67
|
-
const endFmt = end.toFormat("hh:mm:ss,SSS")
|
|
68
|
-
text = `${this.sequenceNo++}\n` +
|
|
69
|
-
`${startFmt} --> ${endFmt}\n` +
|
|
70
|
-
`${text}\n\n`
|
|
71
|
-
}
|
|
72
|
-
else if (this.params.format === "vtt") {
|
|
73
|
-
const startFmt = start.toFormat("hh:mm:ss.SSS")
|
|
74
|
-
const endFmt = end.toFormat("hh:mm:ss.SSS")
|
|
75
|
-
text = `${startFmt} --> ${endFmt}\n` +
|
|
76
|
-
`${text}\n\n`
|
|
77
|
-
}
|
|
78
|
-
return text
|
|
79
|
-
}
|
|
80
|
-
let output = ""
|
|
81
|
-
if (this.params.words) {
|
|
82
|
-
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
83
|
-
const words = (chunk.meta.get("words") ?? []) as
|
|
84
|
-
{ word: string, start: Duration, end: Duration }[]
|
|
85
|
-
const occurences = new Map<string, number>()
|
|
86
|
-
for (const word of words) {
|
|
87
|
-
let occurence = occurences.get(word.word) ?? 0
|
|
88
|
-
occurence++
|
|
89
|
-
occurences.set(word.word, occurence)
|
|
90
|
-
output += convertSingle(word.start, word.end, chunk.payload, word.word, occurence)
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
else
|
|
94
|
-
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
|
|
95
|
-
return output
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/* establish a duplex stream */
|
|
99
|
-
const self = this
|
|
100
|
-
let firstChunk = true
|
|
101
|
-
this.stream = new Stream.Transform({
|
|
102
|
-
readableObjectMode: true,
|
|
103
|
-
writableObjectMode: true,
|
|
104
|
-
decodeStrings: false,
|
|
105
|
-
highWaterMark: 1,
|
|
106
|
-
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
107
|
-
if (firstChunk && self.params.format === "vtt") {
|
|
108
|
-
this.push(new SpeechFlowChunk(
|
|
109
|
-
Duration.fromMillis(0), Duration.fromMillis(0),
|
|
110
|
-
"final", "text",
|
|
111
|
-
"WEBVTT\n\n"
|
|
112
|
-
))
|
|
113
|
-
firstChunk = false
|
|
114
|
-
}
|
|
115
|
-
if (Buffer.isBuffer(chunk.payload))
|
|
116
|
-
callback(new Error("invalid chunk payload type"))
|
|
117
|
-
else {
|
|
118
|
-
if (chunk.payload === "") {
|
|
119
|
-
this.push(chunk)
|
|
120
|
-
callback()
|
|
121
|
-
}
|
|
122
|
-
else {
|
|
123
|
-
convert(chunk).then((payload) => {
|
|
124
|
-
const chunkNew = chunk.clone()
|
|
125
|
-
chunkNew.payload = payload
|
|
126
|
-
this.push(chunkNew)
|
|
127
|
-
callback()
|
|
128
|
-
}).catch((err) => {
|
|
129
|
-
callback(err)
|
|
130
|
-
})
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
},
|
|
134
|
-
final (callback) {
|
|
135
|
-
this.push(null)
|
|
136
|
-
callback()
|
|
137
|
-
}
|
|
138
|
-
})
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
/* open node */
|
|
142
|
-
async close () {
|
|
143
|
-
/* close stream */
|
|
144
|
-
if (this.stream !== null) {
|
|
145
|
-
this.stream.destroy()
|
|
146
|
-
this.stream = null
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|