speechflow 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +9 -9
- package/etc/claude.md +1 -1
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +4 -4
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main.js +1 -1
- package/speechflow-cli/dst/speechflow-main.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +6 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +19 -11
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +7 -6
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +2 -4
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +12 -12
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +10 -9
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +6 -6
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +2 -2
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +50 -25
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +2 -2
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +2 -2
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-audio.js +10 -3
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +4 -5
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/etc/eslint.mjs +1 -3
- package/speechflow-cli/etc/oxlint.jsonc +9 -1
- package/speechflow-cli/etc/stx.conf +1 -2
- package/speechflow-cli/package.json +17 -19
- package/speechflow-cli/src/lib.d.ts +5 -1
- package/speechflow-cli/src/speechflow-main-api.ts +4 -4
- package/speechflow-cli/src/speechflow-main-cli.ts +1 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +16 -16
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
- package/speechflow-cli/src/speechflow-main-status.ts +2 -2
- package/speechflow-cli/src/speechflow-main.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +4 -4
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +23 -14
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +8 -8
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +9 -8
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +2 -4
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +12 -12
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +5 -5
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +12 -11
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +7 -7
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +54 -29
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -4
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-file.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +4 -2
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +1 -1
- package/speechflow-cli/src/speechflow-util-audio.ts +11 -3
- package/speechflow-cli/src/speechflow-util-stream.ts +4 -5
- package/speechflow-ui-db/dst/index.js +14 -14
- package/speechflow-ui-db/etc/oxlint.jsonc +137 -0
- package/speechflow-ui-db/etc/stx.conf +4 -3
- package/speechflow-ui-db/package.json +12 -9
- package/speechflow-ui-st/dst/index.js +32 -32
- package/speechflow-ui-st/etc/oxlint.jsonc +137 -0
- package/speechflow-ui-st/etc/stx.conf +4 -3
- package/speechflow-ui-st/package.json +12 -9
- package/speechflow-cli/dst/test.d.ts +0 -1
- package/speechflow-cli/dst/test.js +0 -18
- package/speechflow-cli/dst/test.js.map +0 -1
- package/speechflow-cli/etc/biome.jsonc +0 -46
- package/speechflow-ui-db/src/lib.d.ts +0 -9
- package/speechflow-ui-st/src/lib.d.ts +0 -9
|
@@ -14,7 +14,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
14
14
|
private sampleRate: number
|
|
15
15
|
|
|
16
16
|
/* eslint no-undef: off */
|
|
17
|
-
static get parameterDescriptors(): AudioParamDescriptor[] {
|
|
17
|
+
static get parameterDescriptors (): AudioParamDescriptor[] {
|
|
18
18
|
return [
|
|
19
19
|
{ name: "threshold", defaultValue: -45, minValue: -100, maxValue: 0, automationRate: "k-rate" }, // dBFS
|
|
20
20
|
{ name: "floor", defaultValue: -64, minValue: -100, maxValue: 0, automationRate: "k-rate" }, // dBFS minimum output level
|
|
@@ -63,7 +63,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
/* process a single sample frame */
|
|
66
|
-
process(
|
|
66
|
+
process (
|
|
67
67
|
inputs: Float32Array[][],
|
|
68
68
|
outputs: Float32Array[][],
|
|
69
69
|
parameters: Record<string, Float32Array>
|
|
@@ -29,20 +29,20 @@ class AudioFiller extends EventEmitter {
|
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
/* optional helper to allow subscribing with strong typing */
|
|
32
|
-
public on(event: "chunk", listener: (chunk: SpeechFlowChunk, type: string) => void): this
|
|
33
|
-
public on(event: string, listener: (...args: any[]) => void): this {
|
|
32
|
+
public on (event: "chunk", listener: (chunk: SpeechFlowChunk, type: string) => void): this
|
|
33
|
+
public on (event: string, listener: (...args: any[]) => void): this {
|
|
34
34
|
return super.on(event, listener)
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
/* convert fractional samples from duration */
|
|
38
|
-
private samplesFromDuration(duration: Duration): number {
|
|
38
|
+
private samplesFromDuration (duration: Duration): number {
|
|
39
39
|
const seconds = duration.as("seconds")
|
|
40
40
|
const samples = seconds * this.sampleRate
|
|
41
41
|
return samples
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
/* convert duration to fractional samples */
|
|
45
|
-
private durationFromSamples(samples: number): Duration {
|
|
45
|
+
private durationFromSamples (samples: number): Duration {
|
|
46
46
|
const seconds = samples / this.sampleRate
|
|
47
47
|
return Duration.fromObject({ seconds })
|
|
48
48
|
}
|
|
@@ -44,8 +44,8 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
|
|
|
44
44
|
throw new Error("mute: node already destroyed")
|
|
45
45
|
try {
|
|
46
46
|
if (params.length === 2 && params[0] === "mode") {
|
|
47
|
-
if (typeof params[1] !== "string"
|
|
48
|
-
!params[1].match(/^(?:none|silenced|unplugged)$/))
|
|
47
|
+
if (typeof params[1] !== "string"
|
|
48
|
+
|| !params[1].match(/^(?:none|silenced|unplugged)$/))
|
|
49
49
|
throw new Error("mute: invalid mode argument in external request")
|
|
50
50
|
const muteMode = params[1] as MuteMode
|
|
51
51
|
this.setMuteMode(muteMode)
|
|
@@ -14,7 +14,7 @@ import { type DenoiseState, Rnnoise } from "@shiguredo/rnnoise-wasm"
|
|
|
14
14
|
let rnnoise: Rnnoise
|
|
15
15
|
let denoiseState: DenoiseState
|
|
16
16
|
|
|
17
|
-
/*
|
|
17
|
+
/* initialize globals */
|
|
18
18
|
;(async () => {
|
|
19
19
|
try {
|
|
20
20
|
rnnoise = await Rnnoise.load()
|
|
@@ -60,4 +60,4 @@ parentPort!.on("message", (msg) => {
|
|
|
60
60
|
process.exit(0)
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
|
-
})
|
|
63
|
+
})
|
|
@@ -63,7 +63,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
63
63
|
else if (typeof msg === "object" && msg !== null && msg.type === "failed")
|
|
64
64
|
reject(new Error(msg.message ?? "RNNoise worker thread initialization failed"))
|
|
65
65
|
else
|
|
66
|
-
reject(new Error(
|
|
66
|
+
reject(new Error("RNNoise worker thread sent unexpected message on startup"))
|
|
67
67
|
})
|
|
68
68
|
this.worker!.once("error", (err) => {
|
|
69
69
|
clearTimeout(timeout)
|
|
@@ -29,7 +29,7 @@ class AsyncQueue<T> {
|
|
|
29
29
|
const resolve = this.resolvers.shift()
|
|
30
30
|
if (resolve) {
|
|
31
31
|
if (v !== null)
|
|
32
|
-
resolve({ value: v })
|
|
32
|
+
resolve({ value: v, done: false })
|
|
33
33
|
else
|
|
34
34
|
resolve({ value: null, done: true })
|
|
35
35
|
}
|
|
@@ -43,7 +43,7 @@ class AsyncQueue<T> {
|
|
|
43
43
|
}
|
|
44
44
|
this.queue.length = 0
|
|
45
45
|
}
|
|
46
|
-
async *[Symbol.asyncIterator](): AsyncIterator<T> {
|
|
46
|
+
async * [Symbol.asyncIterator] (): AsyncIterator<T> {
|
|
47
47
|
while (true) {
|
|
48
48
|
if (this.queue.length > 0) {
|
|
49
49
|
const v = this.queue.shift()
|
|
@@ -70,6 +70,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
70
70
|
/* internal state */
|
|
71
71
|
private client: TranscribeStreamingClient | null = null
|
|
72
72
|
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
|
+
private audioQueue: AsyncQueue<Uint8Array> | null = null
|
|
73
74
|
private closing = false
|
|
74
75
|
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
75
76
|
|
|
@@ -127,8 +128,9 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
127
128
|
})
|
|
128
129
|
|
|
129
130
|
/* create an AudioStream for Amazon Transcribe */
|
|
130
|
-
|
|
131
|
-
const
|
|
131
|
+
this.audioQueue = new AsyncQueue<Uint8Array>()
|
|
132
|
+
const audioQueue = this.audioQueue
|
|
133
|
+
const audioStream = (async function * (q: AsyncQueue<Uint8Array>): AsyncIterable<AudioStream> {
|
|
132
134
|
for await (const chunk of q) {
|
|
133
135
|
yield { AudioEvent: { AudioChunk: chunk } }
|
|
134
136
|
}
|
|
@@ -173,11 +175,11 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
173
175
|
return prev
|
|
174
176
|
}, new Map<string, any>())
|
|
175
177
|
if (this.params.interim) {
|
|
176
|
-
const words = []
|
|
178
|
+
const words: { word: string, start: Duration, end: Duration }[] = []
|
|
177
179
|
for (const item of alt.Items ?? []) {
|
|
178
180
|
if (item.Type === "pronunciation") {
|
|
179
181
|
words.push({
|
|
180
|
-
word: item.Content,
|
|
182
|
+
word: item.Content ?? "",
|
|
181
183
|
start: Duration.fromMillis((item.StartTime ?? 0) * 1000).plus(this.timeZeroOffset),
|
|
182
184
|
end: Duration.fromMillis((item.EndTime ?? 0) * 1000).plus(this.timeZeroOffset)
|
|
183
185
|
})
|
|
@@ -273,10 +275,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
273
275
|
/* indicate closing first to stop all async operations */
|
|
274
276
|
this.closing = true
|
|
275
277
|
|
|
276
|
-
/*
|
|
277
|
-
if (this.
|
|
278
|
-
this.
|
|
279
|
-
this.
|
|
278
|
+
/* shutdown stream */
|
|
279
|
+
if (this.stream !== null) {
|
|
280
|
+
await util.destroyStream(this.stream)
|
|
281
|
+
this.stream = null
|
|
280
282
|
}
|
|
281
283
|
|
|
282
284
|
/* close Amazon Transcribe connection */
|
|
@@ -285,10 +287,17 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
285
287
|
this.client = null
|
|
286
288
|
}
|
|
287
289
|
|
|
288
|
-
/*
|
|
289
|
-
if (this.
|
|
290
|
-
|
|
291
|
-
this.
|
|
290
|
+
/* close audio queue */
|
|
291
|
+
if (this.audioQueue !== null) {
|
|
292
|
+
this.audioQueue.push(null)
|
|
293
|
+
this.audioQueue.destroy()
|
|
294
|
+
this.audioQueue = null
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/* signal EOF to any pending read operations */
|
|
298
|
+
if (this.queue !== null) {
|
|
299
|
+
this.queue.write(null)
|
|
300
|
+
this.queue = null
|
|
292
301
|
}
|
|
293
302
|
}
|
|
294
303
|
}
|
|
@@ -125,18 +125,18 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
125
125
|
const words: { word: string, start: Duration, end: Duration }[] = []
|
|
126
126
|
if (alternative.words && alternative.words.length > 0) {
|
|
127
127
|
for (const wordInfo of alternative.words) {
|
|
128
|
-
const wordStart = wordInfo.startTime
|
|
129
|
-
|
|
128
|
+
const wordStart = wordInfo.startTime ?
|
|
129
|
+
Duration.fromMillis(
|
|
130
130
|
(Number(wordInfo.startTime.seconds ?? 0) * 1000) +
|
|
131
131
|
(Number(wordInfo.startTime.nanos ?? 0) / 1000000)
|
|
132
|
-
).plus(this.timeZeroOffset)
|
|
133
|
-
|
|
134
|
-
const wordEnd = wordInfo.endTime
|
|
135
|
-
|
|
132
|
+
).plus(this.timeZeroOffset) :
|
|
133
|
+
Duration.fromMillis(0)
|
|
134
|
+
const wordEnd = wordInfo.endTime ?
|
|
135
|
+
Duration.fromMillis(
|
|
136
136
|
(Number(wordInfo.endTime.seconds ?? 0) * 1000) +
|
|
137
137
|
(Number(wordInfo.endTime.nanos ?? 0) / 1000000)
|
|
138
|
-
).plus(this.timeZeroOffset)
|
|
139
|
-
|
|
138
|
+
).plus(this.timeZeroOffset) :
|
|
139
|
+
Duration.fromMillis(0)
|
|
140
140
|
words.push({
|
|
141
141
|
word: wordInfo.word ?? "",
|
|
142
142
|
start: wordStart,
|
|
@@ -170,9 +170,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
170
170
|
/* track transcription text */
|
|
171
171
|
let text = ""
|
|
172
172
|
this.ws.on("message", (data) => {
|
|
173
|
-
let ev:
|
|
173
|
+
let ev: Record<string, unknown>
|
|
174
174
|
try {
|
|
175
|
-
ev = JSON.parse(data.toString())
|
|
175
|
+
ev = JSON.parse(data.toString()) as Record<string, unknown>
|
|
176
176
|
}
|
|
177
177
|
catch (err) {
|
|
178
178
|
this.log("warning", `failed to parse WebSocket message: ${err}`)
|
|
@@ -194,8 +194,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
194
194
|
if (this.params.interim && !this.closing && this.queue !== null) {
|
|
195
195
|
const itemId = ev.item_id as string
|
|
196
196
|
const timing = speechTiming.get(itemId)
|
|
197
|
-
const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
198
|
-
const end = timing ? Duration.fromMillis(timing.endMs) : start
|
|
197
|
+
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
198
|
+
const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
|
|
199
199
|
const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
|
|
200
200
|
chunk.meta = aggregateMeta(start, end)
|
|
201
201
|
this.queue.write(chunk)
|
|
@@ -207,8 +207,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
207
207
|
text = ev.transcript as string
|
|
208
208
|
const itemId = ev.item_id as string
|
|
209
209
|
const timing = speechTiming.get(itemId)
|
|
210
|
-
const start = timing ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
211
|
-
const end = timing ? Duration.fromMillis(timing.endMs) : start
|
|
210
|
+
const start = timing !== undefined ? Duration.fromMillis(timing.startMs) : DateTime.now().diff(this.timeOpen!)
|
|
211
|
+
const end = timing !== undefined ? Duration.fromMillis(timing.endMs) : start
|
|
212
212
|
const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
|
|
213
213
|
chunk.meta = aggregateMeta(start, end)
|
|
214
214
|
metastore.prune(start)
|
|
@@ -230,7 +230,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
230
230
|
const itemId = ev.item_id as string
|
|
231
231
|
const audioEndMs = ev.audio_end_ms as number
|
|
232
232
|
const timing = speechTiming.get(itemId)
|
|
233
|
-
if (timing)
|
|
233
|
+
if (timing !== undefined)
|
|
234
234
|
timing.endMs = audioEndMs
|
|
235
235
|
break
|
|
236
236
|
}
|
|
@@ -239,7 +239,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
239
239
|
break
|
|
240
240
|
}
|
|
241
241
|
case "error": {
|
|
242
|
-
|
|
242
|
+
const error = ev.error as { message?: string } | undefined
|
|
243
|
+
this.log("error", `error: ${error?.message ?? "unknown error"}`)
|
|
243
244
|
break
|
|
244
245
|
}
|
|
245
246
|
default:
|
|
@@ -124,11 +124,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
124
124
|
decodeStrings: false,
|
|
125
125
|
highWaterMark: 1,
|
|
126
126
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
127
|
-
if (self.closing)
|
|
127
|
+
if (self.closing)
|
|
128
128
|
callback(new Error("stream already destroyed"))
|
|
129
|
-
|
|
130
|
-
}
|
|
131
|
-
if (Buffer.isBuffer(chunk.payload))
|
|
129
|
+
else if (Buffer.isBuffer(chunk.payload))
|
|
132
130
|
callback(new Error("invalid chunk payload type"))
|
|
133
131
|
else if (chunk.payload === "")
|
|
134
132
|
callback()
|
|
@@ -56,9 +56,9 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
56
56
|
try {
|
|
57
57
|
const elevenlabs = new ElevenLabs.ElevenLabsClient({ apiKey: this.params.key })
|
|
58
58
|
const subscription = await elevenlabs.user.subscription.get()
|
|
59
|
-
const percent = subscription.characterLimit > 0
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
const percent = subscription.characterLimit > 0 ?
|
|
60
|
+
subscription.characterCount / subscription.characterLimit :
|
|
61
|
+
0
|
|
62
62
|
return { usage: `${percent.toFixed(2)}%` }
|
|
63
63
|
}
|
|
64
64
|
catch (_error) {
|
|
@@ -98,20 +98,20 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
98
98
|
const voices = await this.elevenlabs.voices.getAll()
|
|
99
99
|
let voice = voices.voices.find((v) => v.name === this.params.voice)
|
|
100
100
|
if (voice === undefined) {
|
|
101
|
-
voice = voices.voices.find((v) =>
|
|
101
|
+
voice = voices.voices.find((v) => v.name?.startsWith(this.params.voice))
|
|
102
102
|
if (voice === undefined)
|
|
103
103
|
throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
|
|
104
104
|
}
|
|
105
105
|
const labels = voice.labels ?? {}
|
|
106
|
-
const info = Object.keys(labels).length > 0
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
const info = Object.keys(labels).length > 0 ?
|
|
107
|
+
", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ") :
|
|
108
|
+
""
|
|
109
109
|
this.log("info", `selected voice: name: "${voice.name}"${info}`)
|
|
110
110
|
|
|
111
|
-
/* perform text-to-speech operation with
|
|
112
|
-
const model = this.params.optimize === "quality"
|
|
113
|
-
|
|
114
|
-
|
|
111
|
+
/* perform text-to-speech operation with ElevenLabs API */
|
|
112
|
+
const model = this.params.optimize === "quality" ?
|
|
113
|
+
"eleven_turbo_v2_5" :
|
|
114
|
+
"eleven_flash_v2_5"
|
|
115
115
|
const speechStream = (text: string) => {
|
|
116
116
|
this.log("info", `ElevenLabs: send text "${text}"`)
|
|
117
117
|
return this.elevenlabs!.textToSpeech.convert(voice.voiceId, {
|
|
@@ -131,7 +131,7 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
131
131
|
})
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
/* establish resampler from ElevenLabs
|
|
134
|
+
/* establish resampler from ElevenLabs tier-dependent
|
|
135
135
|
output sample rate to our standard audio sample rate (48KHz) */
|
|
136
136
|
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
137
137
|
|
|
@@ -36,8 +36,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
36
36
|
key: { type: "string", val: process.env.SPEECHFLOW_GOOGLE_KEY ?? "" },
|
|
37
37
|
voice: { type: "string", pos: 0, val: "en-US-Neural2-J" },
|
|
38
38
|
language: { type: "string", pos: 1, val: "en-US" },
|
|
39
|
-
speed: { type: "number", pos: 2, val: 1.0, match: (n: number) => n >=
|
|
40
|
-
pitch: { type: "number", pos: 3, val: 0.0, match: (n: number) => n >= -20.0
|
|
39
|
+
speed: { type: "number", pos: 2, val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
|
|
40
|
+
pitch: { type: "number", pos: 3, val: 0.0, match: (n: number) => n >= -20.0 && n <= 20.0 }
|
|
41
41
|
})
|
|
42
42
|
|
|
43
43
|
/* validate API key */
|
|
@@ -103,9 +103,9 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
103
103
|
throw new Error("no audio content returned from Google TTS")
|
|
104
104
|
|
|
105
105
|
/* convert response to buffer */
|
|
106
|
-
const buffer = Buffer.isBuffer(response.audioContent)
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
const buffer = Buffer.isBuffer(response.audioContent) ?
|
|
107
|
+
response.audioContent :
|
|
108
|
+
Buffer.from(response.audioContent)
|
|
109
109
|
this.log("info", `Google TTS: received audio (buffer length: ${buffer.byteLength})`)
|
|
110
110
|
|
|
111
111
|
/* resample from Google's sample rate to our standard rate */
|
|
@@ -139,7 +139,7 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
|
|
|
139
139
|
const samples = result.audio
|
|
140
140
|
const outputSampleRate = result.sampling_rate
|
|
141
141
|
if (outputSampleRate !== this.sampleRate)
|
|
142
|
-
this.log("
|
|
142
|
+
this.log("warning", `unexpected sample rate ${outputSampleRate}Hz (expected ${this.sampleRate}Hz)`)
|
|
143
143
|
|
|
144
144
|
/* calculate duration */
|
|
145
145
|
const duration = samples.length / outputSampleRate
|
|
@@ -85,15 +85,16 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
|
|
|
85
85
|
const out = await this.client!.send(cmd)
|
|
86
86
|
return (out.TranslatedText ?? "").trim()
|
|
87
87
|
}
|
|
88
|
-
catch (e:
|
|
88
|
+
catch (e: unknown) {
|
|
89
89
|
lastError = e
|
|
90
90
|
attempt += 1
|
|
91
91
|
|
|
92
92
|
/* simple backoff for transient errors */
|
|
93
|
+
const err = e as { name?: string, $retryable?: boolean }
|
|
93
94
|
const retriable =
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
err?.name === "ThrottlingException"
|
|
96
|
+
|| err?.name === "ServiceUnavailableException"
|
|
97
|
+
|| err?.$retryable === true
|
|
97
98
|
if (!retriable || attempt >= maxRetries)
|
|
98
99
|
break
|
|
99
100
|
const delayMs = Math.min(1000 * Math.pow(2, attempt - 1), 5000)
|
|
@@ -103,7 +104,7 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
|
|
|
103
104
|
throw util.ensureError(lastError)
|
|
104
105
|
}
|
|
105
106
|
|
|
106
|
-
/* establish a
|
|
107
|
+
/* establish a transform stream and connect it to AWS Translate */
|
|
107
108
|
this.stream = new Stream.Transform({
|
|
108
109
|
readableObjectMode: true,
|
|
109
110
|
writableObjectMode: true,
|
|
@@ -135,17 +136,17 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
|
|
|
135
136
|
|
|
136
137
|
/* close node */
|
|
137
138
|
async close () {
|
|
138
|
-
/* close Amazon Translate connection */
|
|
139
|
-
if (this.client !== null) {
|
|
140
|
-
this.client.destroy()
|
|
141
|
-
this.client = null
|
|
142
|
-
}
|
|
143
|
-
|
|
144
139
|
/* shutdown stream */
|
|
145
140
|
if (this.stream !== null) {
|
|
146
141
|
await util.destroyStream(this.stream)
|
|
147
142
|
this.stream = null
|
|
148
143
|
}
|
|
144
|
+
|
|
145
|
+
/* close Amazon Translate connection */
|
|
146
|
+
if (this.client !== null) {
|
|
147
|
+
this.client.destroy()
|
|
148
|
+
this.client = null
|
|
149
|
+
}
|
|
149
150
|
}
|
|
150
151
|
}
|
|
151
152
|
|
|
@@ -53,7 +53,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
|
|
|
53
53
|
const usage = await deepl.getUsage()
|
|
54
54
|
const limit = usage?.character?.limit ?? 1
|
|
55
55
|
const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
|
|
56
|
-
return { usage: `${percent.toFixed(
|
|
56
|
+
return { usage: `${percent.toFixed(2)}%` }
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
/* open node */
|
|
@@ -75,7 +75,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
|
|
|
75
75
|
return (result?.text ?? text)
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
/* establish a
|
|
78
|
+
/* establish a transform stream and connect it to DeepL translation */
|
|
79
79
|
this.stream = new Stream.Transform({
|
|
80
80
|
readableObjectMode: true,
|
|
81
81
|
writableObjectMode: true,
|
|
@@ -95,7 +95,7 @@ export default class SpeechFlowNodeT2TDeepL extends SpeechFlowNode {
|
|
|
95
95
|
this.push(chunkNew)
|
|
96
96
|
callback()
|
|
97
97
|
}).catch((error: unknown) => {
|
|
98
|
-
callback(util.ensureError(error))
|
|
98
|
+
callback(util.ensureError(error, "DeepL translation failed"))
|
|
99
99
|
})
|
|
100
100
|
}
|
|
101
101
|
},
|
|
@@ -42,7 +42,7 @@ export default class SpeechFlowNodeT2TFormat extends SpeechFlowNode {
|
|
|
42
42
|
return text
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
/* establish a
|
|
45
|
+
/* establish a transform stream and connect it to text formatting */
|
|
46
46
|
this.stream = new Stream.Transform({
|
|
47
47
|
readableObjectMode: true,
|
|
48
48
|
writableObjectMode: true,
|
|
@@ -85,7 +85,7 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
|
|
|
85
85
|
return response.translations?.[0]?.translatedText ?? text
|
|
86
86
|
})
|
|
87
87
|
|
|
88
|
-
/* establish a
|
|
88
|
+
/* establish a transform stream and connect it to Google Translate */
|
|
89
89
|
this.stream = new Stream.Transform({
|
|
90
90
|
readableObjectMode: true,
|
|
91
91
|
writableObjectMode: true,
|
|
@@ -129,4 +129,4 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
|
|
|
129
129
|
this.client = null
|
|
130
130
|
}
|
|
131
131
|
}
|
|
132
|
-
}
|
|
132
|
+
}
|
|
@@ -45,7 +45,7 @@ export default class SpeechFlowNodeT2TModify extends SpeechFlowNode {
|
|
|
45
45
|
const modify = (text: string): string =>
|
|
46
46
|
text.replace(regex, this.params.replace)
|
|
47
47
|
|
|
48
|
-
/* establish a
|
|
48
|
+
/* establish a transform stream and connect it to text modification */
|
|
49
49
|
this.stream = new Stream.Transform({
|
|
50
50
|
readableObjectMode: true,
|
|
51
51
|
writableObjectMode: true,
|
|
@@ -80,4 +80,4 @@ export default class SpeechFlowNodeT2TModify extends SpeechFlowNode {
|
|
|
80
80
|
this.stream = null
|
|
81
81
|
}
|
|
82
82
|
}
|
|
83
|
-
}
|
|
83
|
+
}
|
|
@@ -89,7 +89,7 @@ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
|
|
|
89
89
|
return (single as Transformers.TranslationSingle).translation_text
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
/* establish a
|
|
92
|
+
/* establish a transform stream and connect it to Transformers */
|
|
93
93
|
this.stream = new Stream.Transform({
|
|
94
94
|
readableObjectMode: true,
|
|
95
95
|
writableObjectMode: true,
|
|
@@ -121,17 +121,17 @@ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
|
|
|
121
121
|
|
|
122
122
|
/* close node */
|
|
123
123
|
async close () {
|
|
124
|
-
/* shutdown Transformers */
|
|
125
|
-
if (this.translator !== null) {
|
|
126
|
-
this.translator.dispose()
|
|
127
|
-
this.translator = null
|
|
128
|
-
}
|
|
129
|
-
|
|
130
124
|
/* shutdown stream */
|
|
131
125
|
if (this.stream !== null) {
|
|
132
126
|
await util.destroyStream(this.stream)
|
|
133
127
|
this.stream = null
|
|
134
128
|
}
|
|
129
|
+
|
|
130
|
+
/* shutdown Transformers */
|
|
131
|
+
if (this.translator !== null) {
|
|
132
|
+
this.translator.dispose()
|
|
133
|
+
this.translator = null
|
|
134
|
+
}
|
|
135
135
|
}
|
|
136
136
|
}
|
|
137
137
|
|
|
@@ -141,7 +141,7 @@ export default class SpeechFlowNodeT2TPunctuation extends SpeechFlowNode {
|
|
|
141
141
|
await this.llm.open()
|
|
142
142
|
|
|
143
143
|
/* provide text-to-text punctuation restoration */
|
|
144
|
-
const llm = this.llm
|
|
144
|
+
const llm = this.llm
|
|
145
145
|
const punctuate = async (text: string) => {
|
|
146
146
|
const cfg = this.setup[this.params.lang]
|
|
147
147
|
if (!cfg)
|
|
@@ -128,7 +128,7 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
128
128
|
await this.llm.open()
|
|
129
129
|
|
|
130
130
|
/* provide text-to-text spellchecking */
|
|
131
|
-
const llm = this.llm
|
|
131
|
+
const llm = this.llm
|
|
132
132
|
const spellcheck = async (text: string) => {
|
|
133
133
|
const cfg = this.setup[this.params.lang]
|
|
134
134
|
if (!cfg)
|
|
@@ -124,7 +124,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
124
124
|
return output
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
-
/* establish a
|
|
127
|
+
/* establish a transform stream */
|
|
128
128
|
const self = this
|
|
129
129
|
let headerEmitted = false
|
|
130
130
|
this.stream = new Stream.Transform({
|
|
@@ -264,7 +264,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
264
264
|
/* buffer for accumulating input */
|
|
265
265
|
let buffer = ""
|
|
266
266
|
|
|
267
|
-
/* establish a
|
|
267
|
+
/* establish a transform stream */
|
|
268
268
|
const self = this
|
|
269
269
|
this.stream = new Stream.Transform({
|
|
270
270
|
readableObjectMode: true,
|
|
@@ -127,7 +127,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
127
127
|
await this.llm.open()
|
|
128
128
|
|
|
129
129
|
/* provide text summarization */
|
|
130
|
-
const llm = this.llm
|
|
130
|
+
const llm = this.llm
|
|
131
131
|
const summarize = async (text: string) => {
|
|
132
132
|
const cfg = this.setup[this.params.lang]
|
|
133
133
|
if (!cfg)
|
|
@@ -168,7 +168,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
168
168
|
/* check if we should generate a summary */
|
|
169
169
|
if (self.sentencesSinceLastSummary >= self.params.trigger) {
|
|
170
170
|
self.sentencesSinceLastSummary = 0
|
|
171
|
-
self.log("info",
|
|
171
|
+
self.log("info", "generating summary of accumulated text")
|
|
172
172
|
const textToSummarize = self.accumulatedText
|
|
173
173
|
self.accumulatedText = ""
|
|
174
174
|
summarize(textToSummarize).then((summary) => {
|
|
@@ -188,7 +188,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
188
188
|
/* generate final summary if there is accumulated text */
|
|
189
189
|
if (self.accumulatedText.length > 0 && self.sentencesSinceLastSummary > 0) {
|
|
190
190
|
self.sentencesSinceLastSummary = 0
|
|
191
|
-
self.log("info",
|
|
191
|
+
self.log("info", "generating final summary of accumulated text")
|
|
192
192
|
const textToSummarize = self.accumulatedText
|
|
193
193
|
self.accumulatedText = ""
|
|
194
194
|
summarize(textToSummarize).then((summary) => {
|