speechflow 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +220 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.yaml +5 -3
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +69 -14
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +74 -17
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +20 -6
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import OpenAI from "openai"
|
|
12
|
+
import { DateTime } from "luxon"
|
|
13
|
+
import SpeexResampler from "speex-resampler"
|
|
14
|
+
import ws from "ws"
|
|
15
|
+
|
|
16
|
+
/* internal dependencies */
|
|
17
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
18
|
+
import * as utils from "./speechflow-utils"
|
|
19
|
+
|
|
20
|
+
/* SpeechFlow node for OpenAI Transcribe speech-to-text conversion */
|
|
21
|
+
export default class SpeechFlowNodeOpenAITranscribe extends SpeechFlowNode {
|
|
22
|
+
/* declare official node name */
|
|
23
|
+
public static name = "openaitranscribe"
|
|
24
|
+
|
|
25
|
+
/* internal state */
|
|
26
|
+
private static speexInitialized = false
|
|
27
|
+
private openai: OpenAI | null = null
|
|
28
|
+
private ws: ws.WebSocket | null = null
|
|
29
|
+
private queue: utils.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
30
|
+
private resampler: SpeexResampler | null = null
|
|
31
|
+
private destroyed = false
|
|
32
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
33
|
+
|
|
34
|
+
/* construct node */
|
|
35
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
36
|
+
super(id, cfg, opts, args)
|
|
37
|
+
|
|
38
|
+
/* declare node configuration parameters */
|
|
39
|
+
this.configure({
|
|
40
|
+
key: { type: "string", val: process.env.SPEECHFLOW_OPENAI_KEY },
|
|
41
|
+
api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+/ },
|
|
42
|
+
model: { type: "string", val: "gpt-4o-mini-transcribe" },
|
|
43
|
+
language: { type: "string", val: "de", match: /^(?:en|de)$/ },
|
|
44
|
+
interim: { type: "boolean", val: false }
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
/* declare node input/output format */
|
|
48
|
+
this.input = "audio"
|
|
49
|
+
this.output = "text"
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/* one-time status of node */
|
|
53
|
+
async status () {
|
|
54
|
+
return {}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/* open node */
|
|
58
|
+
async open () {
|
|
59
|
+
/* sanity check situation */
|
|
60
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
61
|
+
throw new Error("OpenAI transcribe node currently supports PCM-S16LE audio only")
|
|
62
|
+
|
|
63
|
+
/* clear destruction flag */
|
|
64
|
+
this.destroyed = false
|
|
65
|
+
|
|
66
|
+
/* create queue for results */
|
|
67
|
+
this.queue = new utils.SingleQueue<SpeechFlowChunk | null>()
|
|
68
|
+
|
|
69
|
+
/* create a store for the meta information */
|
|
70
|
+
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
71
|
+
|
|
72
|
+
/* establish resampler from our standard audio sample rate (48Khz)
|
|
73
|
+
to OpenAI's maximum 24Khz input sample rate */
|
|
74
|
+
if (!SpeechFlowNodeOpenAITranscribe.speexInitialized) {
|
|
75
|
+
/* at least once initialize resampler */
|
|
76
|
+
await SpeexResampler.initPromise
|
|
77
|
+
SpeechFlowNodeOpenAITranscribe.speexInitialized = true
|
|
78
|
+
}
|
|
79
|
+
this.resampler = new SpeexResampler(1, this.config.audioSampleRate, 24000, 7)
|
|
80
|
+
|
|
81
|
+
/* instantiate OpenAI API */
|
|
82
|
+
this.openai = new OpenAI({
|
|
83
|
+
baseURL: this.params.api,
|
|
84
|
+
apiKey: this.params.key,
|
|
85
|
+
dangerouslyAllowBrowser: true
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
/* open the WebSocket connection for streaming */
|
|
89
|
+
const url = `${this.params.api.replace(/^http/, "ws")}/realtime?intent=transcription`
|
|
90
|
+
this.ws = new ws.WebSocket(url, {
|
|
91
|
+
headers: {
|
|
92
|
+
Authorization: `Bearer ${this.params.key}`,
|
|
93
|
+
"OpenAI-Beta": "realtime=v1"
|
|
94
|
+
}
|
|
95
|
+
})
|
|
96
|
+
const sendMessage = (obj: any) => {
|
|
97
|
+
this.ws?.send(JSON.stringify(obj))
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/* wait for OpenAI API to be available */
|
|
101
|
+
await new Promise((resolve, reject) => {
|
|
102
|
+
this.connectionTimeout = setTimeout(() => {
|
|
103
|
+
if (this.connectionTimeout !== null) {
|
|
104
|
+
this.connectionTimeout = null
|
|
105
|
+
reject(new Error("OpenAI: timeout waiting for connection open"))
|
|
106
|
+
}
|
|
107
|
+
}, 8000)
|
|
108
|
+
this.ws!.once("open", () => {
|
|
109
|
+
this.log("info", "connection open")
|
|
110
|
+
if (this.connectionTimeout !== null) {
|
|
111
|
+
clearTimeout(this.connectionTimeout)
|
|
112
|
+
this.connectionTimeout = null
|
|
113
|
+
}
|
|
114
|
+
resolve(true)
|
|
115
|
+
})
|
|
116
|
+
this.ws!.once("error", (err) => {
|
|
117
|
+
if (this.connectionTimeout !== null) {
|
|
118
|
+
clearTimeout(this.connectionTimeout)
|
|
119
|
+
this.connectionTimeout = null
|
|
120
|
+
}
|
|
121
|
+
reject(err)
|
|
122
|
+
})
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
/* configure session */
|
|
126
|
+
sendMessage({
|
|
127
|
+
type: "transcription_session.update",
|
|
128
|
+
session: {
|
|
129
|
+
input_audio_format: "pcm16",
|
|
130
|
+
input_audio_transcription: {
|
|
131
|
+
model: this.params.model,
|
|
132
|
+
language: this.params.language
|
|
133
|
+
},
|
|
134
|
+
turn_detection: {
|
|
135
|
+
type: "server_vad",
|
|
136
|
+
threshold: 0.5,
|
|
137
|
+
prefix_padding_ms: 300,
|
|
138
|
+
silence_duration_ms: 500
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
/* hook onto session events */
|
|
144
|
+
this.ws.on("open", () => {
|
|
145
|
+
this.log("info", "WebSocket connection opened")
|
|
146
|
+
sendMessage({ type: "transcription.create" })
|
|
147
|
+
})
|
|
148
|
+
this.ws.on("close", () => {
|
|
149
|
+
this.log("info", "WebSocket connection closed")
|
|
150
|
+
this.queue!.write(null)
|
|
151
|
+
})
|
|
152
|
+
this.ws.on("error", (err) => {
|
|
153
|
+
this.log("error", `WebSocket connection error: ${err}`)
|
|
154
|
+
})
|
|
155
|
+
let text = ""
|
|
156
|
+
this.ws.on("message", (data) => {
|
|
157
|
+
let ev: any
|
|
158
|
+
try {
|
|
159
|
+
ev = JSON.parse(data.toString())
|
|
160
|
+
}
|
|
161
|
+
catch (err) {
|
|
162
|
+
this.log("warning", `failed to parse WebSocket message: ${err}`)
|
|
163
|
+
return
|
|
164
|
+
}
|
|
165
|
+
if (!(typeof ev === "object" && ev !== null)) {
|
|
166
|
+
this.log("warning", "received invalid WebSocket message")
|
|
167
|
+
return
|
|
168
|
+
}
|
|
169
|
+
switch (ev.type) {
|
|
170
|
+
case "transcription_session.created":
|
|
171
|
+
break
|
|
172
|
+
case "conversation.item.created":
|
|
173
|
+
text = ""
|
|
174
|
+
break
|
|
175
|
+
case "conversation.item.input_audio_transcription.delta": {
|
|
176
|
+
text += ev.delta as string
|
|
177
|
+
if (this.params.interim) {
|
|
178
|
+
const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
|
|
179
|
+
const end = start // FIXME: OpenAI does not provide timestamps
|
|
180
|
+
const metas = metastore.fetch(start, end)
|
|
181
|
+
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
182
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
183
|
+
return prev
|
|
184
|
+
}, new Map<string, any>())
|
|
185
|
+
const chunk = new SpeechFlowChunk(start, end, "intermediate", "text", text)
|
|
186
|
+
chunk.meta = meta
|
|
187
|
+
this.queue!.write(chunk)
|
|
188
|
+
}
|
|
189
|
+
break
|
|
190
|
+
}
|
|
191
|
+
case "conversation.item.input_audio_transcription.completed": {
|
|
192
|
+
text = ev.transcript as string
|
|
193
|
+
const start = DateTime.now().diff(this.timeOpen!) // FIXME: OpenAI does not provide timestamps
|
|
194
|
+
const end = start // FIXME: OpenAI does not provide timestamps
|
|
195
|
+
const metas = metastore.fetch(start, end)
|
|
196
|
+
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
197
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
198
|
+
return prev
|
|
199
|
+
}, new Map<string, any>())
|
|
200
|
+
metastore.prune(start)
|
|
201
|
+
const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
|
|
202
|
+
chunk.meta = meta
|
|
203
|
+
this.queue!.write(chunk)
|
|
204
|
+
text = ""
|
|
205
|
+
break
|
|
206
|
+
}
|
|
207
|
+
case "input_audio_buffer.speech_started":
|
|
208
|
+
this.log("info", "VAD: speech started")
|
|
209
|
+
break
|
|
210
|
+
case "input_audio_buffer.speech_stopped":
|
|
211
|
+
this.log("info", "VAD: speech stopped")
|
|
212
|
+
break
|
|
213
|
+
case "input_audio_buffer.committed":
|
|
214
|
+
this.log("info", "input buffer committed")
|
|
215
|
+
break
|
|
216
|
+
case "error":
|
|
217
|
+
this.log("error", `error: ${ev.error?.message}`)
|
|
218
|
+
break
|
|
219
|
+
default:
|
|
220
|
+
break
|
|
221
|
+
}
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
/* remember opening time to receive time zero offset */
|
|
225
|
+
this.timeOpen = DateTime.now()
|
|
226
|
+
|
|
227
|
+
/* provide Duplex stream and internally attach to OpenAI API */
|
|
228
|
+
const self = this
|
|
229
|
+
this.stream = new Stream.Duplex({
|
|
230
|
+
writableObjectMode: true,
|
|
231
|
+
readableObjectMode: true,
|
|
232
|
+
decodeStrings: false,
|
|
233
|
+
highWaterMark: 1,
|
|
234
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
235
|
+
if (self.destroyed || self.ws === null) {
|
|
236
|
+
callback(new Error("stream already destroyed"))
|
|
237
|
+
return
|
|
238
|
+
}
|
|
239
|
+
if (chunk.type !== "audio")
|
|
240
|
+
callback(new Error("expected audio input chunk"))
|
|
241
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
242
|
+
callback(new Error("expected Buffer input chunk"))
|
|
243
|
+
else {
|
|
244
|
+
if (chunk.payload.byteLength > 0) {
|
|
245
|
+
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
246
|
+
if (chunk.meta.size > 0)
|
|
247
|
+
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
248
|
+
try {
|
|
249
|
+
const payload = self.resampler!.processChunk(chunk.payload)
|
|
250
|
+
const audioB64 = payload.toString("base64")
|
|
251
|
+
sendMessage({
|
|
252
|
+
type: "input_audio_buffer.append",
|
|
253
|
+
audio: audioB64 /* intentionally discard all time information */
|
|
254
|
+
})
|
|
255
|
+
}
|
|
256
|
+
catch (error) {
|
|
257
|
+
callback(error instanceof Error ? error : new Error("failed to send to OpenAI transcribe"))
|
|
258
|
+
return
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
callback()
|
|
262
|
+
}
|
|
263
|
+
},
|
|
264
|
+
read (size) {
|
|
265
|
+
if (self.destroyed || self.queue === null) {
|
|
266
|
+
this.push(null)
|
|
267
|
+
return
|
|
268
|
+
}
|
|
269
|
+
self.queue.read().then((chunk) => {
|
|
270
|
+
if (self.destroyed) {
|
|
271
|
+
this.push(null)
|
|
272
|
+
return
|
|
273
|
+
}
|
|
274
|
+
if (chunk === null) {
|
|
275
|
+
self.log("info", "received EOF signal")
|
|
276
|
+
this.push(null)
|
|
277
|
+
}
|
|
278
|
+
else {
|
|
279
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
280
|
+
this.push(chunk)
|
|
281
|
+
}
|
|
282
|
+
}).catch((error) => {
|
|
283
|
+
if (!self.destroyed)
|
|
284
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
285
|
+
})
|
|
286
|
+
},
|
|
287
|
+
final (callback) {
|
|
288
|
+
if (self.destroyed || self.ws === null) {
|
|
289
|
+
callback()
|
|
290
|
+
return
|
|
291
|
+
}
|
|
292
|
+
try {
|
|
293
|
+
sendMessage({ type: "input_audio_buffer.commit" })
|
|
294
|
+
self.ws.close()
|
|
295
|
+
/* NOTICE: do not push null here -- let the OpenAI close event handle it */
|
|
296
|
+
callback()
|
|
297
|
+
}
|
|
298
|
+
catch (error) {
|
|
299
|
+
self.log("warning", `error closing OpenAI connection: ${error}`)
|
|
300
|
+
callback(error instanceof Error ? error : new Error("failed to close OpenAI connection"))
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
})
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/* close node */
|
|
307
|
+
async close () {
|
|
308
|
+
/* indicate destruction first to stop all async operations */
|
|
309
|
+
this.destroyed = true
|
|
310
|
+
|
|
311
|
+
/* clear connection timeout */
|
|
312
|
+
if (this.connectionTimeout !== null) {
|
|
313
|
+
clearTimeout(this.connectionTimeout)
|
|
314
|
+
this.connectionTimeout = null
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/* signal EOF to any pending read operations */
|
|
318
|
+
if (this.queue !== null) {
|
|
319
|
+
this.queue.write(null)
|
|
320
|
+
this.queue = null
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/* close OpenAI connection */
|
|
324
|
+
if (this.ws !== null) {
|
|
325
|
+
this.ws.close()
|
|
326
|
+
this.ws = null
|
|
327
|
+
}
|
|
328
|
+
if (this.openai !== null)
|
|
329
|
+
this.openai = null
|
|
330
|
+
|
|
331
|
+
/* close stream */
|
|
332
|
+
if (this.stream !== null) {
|
|
333
|
+
this.stream.destroy()
|
|
334
|
+
this.stream = null
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { getStreamAsBuffer } from "get-stream"
|
|
12
|
+
import SpeexResampler from "speex-resampler"
|
|
13
|
+
import {
|
|
14
|
+
PollyClient, SynthesizeSpeechCommand,
|
|
15
|
+
Engine, VoiceId, LanguageCode, TextType
|
|
16
|
+
} from "@aws-sdk/client-polly"
|
|
17
|
+
|
|
18
|
+
/* internal dependencies */
|
|
19
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
20
|
+
|
|
21
|
+
/* SpeechFlow node for AWS Polly text-to-speech conversion */
|
|
22
|
+
export default class SpeechFlowNodeAWSPolly extends SpeechFlowNode {
|
|
23
|
+
/* declare official node name */
|
|
24
|
+
public static name = "awspolly"
|
|
25
|
+
|
|
26
|
+
/* internal state */
|
|
27
|
+
private client: PollyClient | null = null
|
|
28
|
+
private static speexInitialized = false
|
|
29
|
+
private destroyed = false
|
|
30
|
+
private resampler: SpeexResampler | null = null
|
|
31
|
+
|
|
32
|
+
/* construct node */
|
|
33
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
34
|
+
super(id, cfg, opts, args)
|
|
35
|
+
|
|
36
|
+
/* declare node configuration parameters */
|
|
37
|
+
this.configure({
|
|
38
|
+
key: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY },
|
|
39
|
+
secKey: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY_SEC },
|
|
40
|
+
region: { type: "string", val: "eu-central-1" },
|
|
41
|
+
voice: { type: "string", val: "Amy", pos: 0, match: /^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Vicki|Daniel)$/ },
|
|
42
|
+
language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ }
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
/* sanity check parameters */
|
|
46
|
+
if (!this.params.key)
|
|
47
|
+
throw new Error("AWS Access Key not configured")
|
|
48
|
+
if (!this.params.secKey)
|
|
49
|
+
throw new Error("AWS Secret Access Key not configured")
|
|
50
|
+
|
|
51
|
+
/* declare node input/output format */
|
|
52
|
+
this.input = "text"
|
|
53
|
+
this.output = "audio"
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/* one-time status of node */
|
|
57
|
+
async status () {
|
|
58
|
+
return {}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/* open node */
|
|
62
|
+
async open () {
|
|
63
|
+
/* clear destruction flag */
|
|
64
|
+
this.destroyed = false
|
|
65
|
+
|
|
66
|
+
/* establish AWS Polly connection */
|
|
67
|
+
this.client = new PollyClient({
|
|
68
|
+
region: this.params.region,
|
|
69
|
+
credentials: {
|
|
70
|
+
accessKeyId: this.params.key,
|
|
71
|
+
secretAccessKey: this.params.secKey
|
|
72
|
+
}
|
|
73
|
+
})
|
|
74
|
+
if (this.client === null)
|
|
75
|
+
throw new Error("failed to establish AWS Polly client")
|
|
76
|
+
|
|
77
|
+
/* list of voices */
|
|
78
|
+
const voices = {
|
|
79
|
+
"Amy": { language: "en", languageCode: "en-GB", engine: "generative" },
|
|
80
|
+
"Danielle": { language: "en", languageCode: "en-US", engine: "generative" },
|
|
81
|
+
"Joanna": { language: "en", languageCode: "en-US", engine: "generative" },
|
|
82
|
+
"Matthew": { language: "en", languageCode: "en-US", engine: "generative" },
|
|
83
|
+
"Ruth": { language: "en", languageCode: "en-US", engine: "generative" },
|
|
84
|
+
"Stephen": { language: "en", languageCode: "en-US", engine: "generative" },
|
|
85
|
+
"Vicki": { language: "de", languageCode: "de-DE", engine: "generative" },
|
|
86
|
+
"Daniel": { language: "de", languageCode: "de-DE", engine: "generative" },
|
|
87
|
+
}
|
|
88
|
+
const voiceConfig = voices[this.params.voice as keyof typeof voices]
|
|
89
|
+
if (voiceConfig === undefined)
|
|
90
|
+
throw new Error("unsupported voice")
|
|
91
|
+
if (voiceConfig.language !== this.params.language)
|
|
92
|
+
throw new Error(`voice does only support language "${voiceConfig.language}"`)
|
|
93
|
+
|
|
94
|
+
/* perform text-to-speech operation with AWS Polly API */
|
|
95
|
+
const textToSpeech = async (text: string) => {
|
|
96
|
+
const cmd = new SynthesizeSpeechCommand({
|
|
97
|
+
LanguageCode: voiceConfig.languageCode as LanguageCode,
|
|
98
|
+
Engine: voiceConfig.engine as Engine,
|
|
99
|
+
VoiceId: this.params.voice as VoiceId,
|
|
100
|
+
OutputFormat: "pcm",
|
|
101
|
+
SampleRate: "16000", /* maximum supported for PCM output */
|
|
102
|
+
TextType: "text" as TextType,
|
|
103
|
+
Text: text
|
|
104
|
+
})
|
|
105
|
+
const res = await this.client!.send(cmd)
|
|
106
|
+
const stream = res.AudioStream as AsyncIterable<Uint8Array> | null
|
|
107
|
+
if (stream === null)
|
|
108
|
+
throw new Error("stream not returned")
|
|
109
|
+
const buffer = await getStreamAsBuffer(stream)
|
|
110
|
+
const bufferResampled = this.resampler!.processChunk(buffer)
|
|
111
|
+
return bufferResampled
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* establish resampler from AWS Polly's maximum 16Khz output
|
|
115
|
+
(for PCM output) to our standard audio sample rate (48KHz) */
|
|
116
|
+
if (!SpeechFlowNodeAWSPolly.speexInitialized) {
|
|
117
|
+
/* at least once initialize resampler */
|
|
118
|
+
await SpeexResampler.initPromise
|
|
119
|
+
SpeechFlowNodeAWSPolly.speexInitialized = true
|
|
120
|
+
}
|
|
121
|
+
this.resampler = new SpeexResampler(1, 16000, this.config.audioSampleRate, 7)
|
|
122
|
+
|
|
123
|
+
/* create transform stream and connect it to the AWS Polly API */
|
|
124
|
+
const self = this
|
|
125
|
+
this.stream = new Stream.Transform({
|
|
126
|
+
writableObjectMode: true,
|
|
127
|
+
readableObjectMode: true,
|
|
128
|
+
decodeStrings: false,
|
|
129
|
+
highWaterMark: 1,
|
|
130
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
131
|
+
if (self.destroyed) {
|
|
132
|
+
callback(new Error("stream already destroyed"))
|
|
133
|
+
return
|
|
134
|
+
}
|
|
135
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
136
|
+
callback(new Error("invalid chunk payload type"))
|
|
137
|
+
else if (chunk.payload.length > 0) {
|
|
138
|
+
self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
|
|
139
|
+
textToSpeech(chunk.payload as string).then((buffer) => {
|
|
140
|
+
if (self.destroyed)
|
|
141
|
+
throw new Error("stream destroyed during processing")
|
|
142
|
+
const chunkNew = chunk.clone()
|
|
143
|
+
chunkNew.type = "audio"
|
|
144
|
+
chunkNew.payload = buffer
|
|
145
|
+
this.push(chunkNew)
|
|
146
|
+
callback()
|
|
147
|
+
}).catch((error) => {
|
|
148
|
+
callback(error instanceof Error ?
|
|
149
|
+
error : new Error(`failed to send to AWS Polly: ${String(error)}`))
|
|
150
|
+
})
|
|
151
|
+
}
|
|
152
|
+
else
|
|
153
|
+
callback()
|
|
154
|
+
},
|
|
155
|
+
final (callback) {
|
|
156
|
+
if (self.destroyed) {
|
|
157
|
+
callback()
|
|
158
|
+
return
|
|
159
|
+
}
|
|
160
|
+
this.push(null)
|
|
161
|
+
callback()
|
|
162
|
+
}
|
|
163
|
+
})
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/* close node */
|
|
167
|
+
async close () {
|
|
168
|
+
/* indicate destruction */
|
|
169
|
+
this.destroyed = true
|
|
170
|
+
|
|
171
|
+
/* destroy resampler */
|
|
172
|
+
if (this.resampler !== null)
|
|
173
|
+
this.resampler = null
|
|
174
|
+
|
|
175
|
+
/* destroy AWS Polly API */
|
|
176
|
+
if (this.client !== null) {
|
|
177
|
+
this.client.destroy()
|
|
178
|
+
this.client = null
|
|
179
|
+
}
|
|
180
|
+
/* destroy stream */
|
|
181
|
+
if (this.stream !== null) {
|
|
182
|
+
this.stream.destroy()
|
|
183
|
+
this.stream = null
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
@@ -52,10 +52,17 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
52
52
|
|
|
53
53
|
/* one-time status of node */
|
|
54
54
|
async status () {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
55
|
+
try {
|
|
56
|
+
const elevenlabs = new ElevenLabs.ElevenLabsClient({ apiKey: this.params.key })
|
|
57
|
+
const subscription = await elevenlabs.user.subscription.get()
|
|
58
|
+
const percent = subscription.characterLimit > 0
|
|
59
|
+
? subscription.characterCount / subscription.characterLimit
|
|
60
|
+
: 0
|
|
61
|
+
return { usage: `${percent.toFixed(2)}%` }
|
|
62
|
+
}
|
|
63
|
+
catch (_error) {
|
|
64
|
+
return { usage: "unknown" }
|
|
65
|
+
}
|
|
59
66
|
}
|
|
60
67
|
|
|
61
68
|
/* open node */
|
|
@@ -88,15 +95,15 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
88
95
|
/* determine voice for text-to-speech operation
|
|
89
96
|
(for details see https://elevenlabs.io/text-to-speech) */
|
|
90
97
|
const voices = await this.elevenlabs.voices.getAll()
|
|
91
|
-
let voice = voices.voices.find((
|
|
98
|
+
let voice = voices.voices.find((v) => v.name === this.params.voice)
|
|
92
99
|
if (voice === undefined) {
|
|
93
|
-
voice = voices.voices.find((
|
|
100
|
+
voice = voices.voices.find((v) => (v.name ?? "").startsWith(this.params.voice))
|
|
94
101
|
if (voice === undefined)
|
|
95
102
|
throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
|
|
96
103
|
}
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
104
|
+
const labels = voice.labels ?? {}
|
|
105
|
+
const info = Object.keys(labels).length > 0 ?
|
|
106
|
+
", " + Object.entries(labels).map(([ key, val ]) => `${key}: "${val}"`).join(", ") : ""
|
|
100
107
|
this.log("info", `selected voice: name: "${voice.name}"${info}`)
|
|
101
108
|
|
|
102
109
|
/* perform text-to-speech operation with Elevenlabs API */
|
|
@@ -139,11 +146,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
139
146
|
decodeStrings: false,
|
|
140
147
|
highWaterMark: 1,
|
|
141
148
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
142
|
-
if (self.destroyed)
|
|
149
|
+
if (self.destroyed)
|
|
143
150
|
callback(new Error("stream already destroyed"))
|
|
144
|
-
|
|
145
|
-
}
|
|
146
|
-
if (Buffer.isBuffer(chunk.payload))
|
|
151
|
+
else if (Buffer.isBuffer(chunk.payload))
|
|
147
152
|
callback(new Error("invalid chunk payload type"))
|
|
148
153
|
else {
|
|
149
154
|
(async () => {
|
|
@@ -158,12 +163,12 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
158
163
|
}
|
|
159
164
|
}
|
|
160
165
|
try {
|
|
161
|
-
const stream = await speechStream(chunk.payload as string)
|
|
162
166
|
if (self.destroyed) {
|
|
163
167
|
clearProcessTimeout()
|
|
164
168
|
callback(new Error("stream destroyed during processing"))
|
|
165
169
|
return
|
|
166
170
|
}
|
|
171
|
+
const stream = await speechStream(chunk.payload as string)
|
|
167
172
|
const buffer = await getStreamAsBuffer(stream)
|
|
168
173
|
if (self.destroyed) {
|
|
169
174
|
clearProcessTimeout()
|
|
@@ -51,26 +51,31 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
51
51
|
artifact += `:${progress.file}`
|
|
52
52
|
let percent = 0
|
|
53
53
|
if (typeof progress.loaded === "number" && typeof progress.total === "number")
|
|
54
|
-
percent = (progress.loaded
|
|
54
|
+
percent = (progress.loaded / progress.total) * 100
|
|
55
55
|
else if (typeof progress.progress === "number")
|
|
56
56
|
percent = progress.progress
|
|
57
57
|
if (percent > 0)
|
|
58
58
|
progressState.set(artifact, percent)
|
|
59
59
|
}
|
|
60
|
-
|
|
60
|
+
let interval: ReturnType<typeof setInterval> | null = setInterval(() => {
|
|
61
61
|
for (const [ artifact, percent ] of progressState) {
|
|
62
62
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
63
63
|
if (percent >= 100.0)
|
|
64
64
|
progressState.delete(artifact)
|
|
65
65
|
}
|
|
66
|
-
if (progressState.size === 0)
|
|
66
|
+
if (progressState.size === 0 && interval !== null) {
|
|
67
67
|
clearInterval(interval)
|
|
68
|
+
interval = null
|
|
69
|
+
}
|
|
68
70
|
}, 1000)
|
|
69
71
|
this.kokoro = await KokoroTTS.from_pretrained(model, {
|
|
70
72
|
dtype: "q4f16",
|
|
71
73
|
progress_callback: progressCallback
|
|
72
74
|
})
|
|
73
|
-
|
|
75
|
+
if (interval !== null) {
|
|
76
|
+
clearInterval(interval)
|
|
77
|
+
interval = null
|
|
78
|
+
}
|
|
74
79
|
if (this.kokoro === null)
|
|
75
80
|
throw new Error("failed to instantiate Kokoro")
|
|
76
81
|
|
|
@@ -78,19 +83,19 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
78
83
|
output to our standard audio sample rate (48KHz) */
|
|
79
84
|
if (!SpeechFlowNodeKokoro.speexInitialized) {
|
|
80
85
|
/* at least once initialize resampler */
|
|
81
|
-
await SpeexResampler.initPromise
|
|
82
86
|
SpeechFlowNodeKokoro.speexInitialized = true
|
|
87
|
+
await SpeexResampler.initPromise
|
|
83
88
|
}
|
|
84
89
|
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
85
90
|
|
|
86
91
|
/* determine voice for text-to-speech operation */
|
|
87
|
-
const voices = {
|
|
92
|
+
const voices: Record<string, string> = {
|
|
88
93
|
"Aoede": "af_aoede",
|
|
89
94
|
"Heart": "af_heart",
|
|
90
95
|
"Puck": "am_puck",
|
|
91
96
|
"Fenrir": "am_fenrir"
|
|
92
97
|
}
|
|
93
|
-
const voice =
|
|
98
|
+
const voice = voices[this.params.voice]
|
|
94
99
|
if (voice === undefined)
|
|
95
100
|
throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
|
|
96
101
|
|