speechflow 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +46 -11
- package/dst/speechflow-node-a2a-gender.d.ts +17 -0
- package/dst/speechflow-node-a2a-gender.js +272 -0
- package/dst/speechflow-node-a2a-gender.js.map +1 -0
- package/dst/speechflow-node-a2a-meter.js +7 -3
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.js +1 -0
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.js +47 -63
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +145 -122
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +3 -0
- package/dst/speechflow-node-a2t-deepgram.js +29 -4
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +3 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +18 -6
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.d.ts +3 -0
- package/dst/speechflow-node-t2t-deepl.js +8 -1
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.d.ts +11 -0
- package/dst/speechflow-node-x2x-filter.js +113 -0
- package/dst/speechflow-node-x2x-filter.js.map +1 -0
- package/dst/speechflow-node-x2x-trace.js +25 -11
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +17 -6
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +61 -28
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js +7 -5
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js +5 -5
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-node.d.ts +5 -1
- package/dst/speechflow-node.js +9 -2
- package/dst/speechflow-node.js.map +1 -1
- package/dst/speechflow-utils.d.ts +14 -1
- package/dst/speechflow-utils.js +110 -2
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +73 -14
- package/dst/speechflow.js.map +1 -1
- package/etc/speechflow.yaml +53 -26
- package/package.json +12 -10
- package/src/speechflow-node-a2a-gender.ts +272 -0
- package/src/speechflow-node-a2a-meter.ts +8 -4
- package/src/speechflow-node-a2a-mute.ts +1 -0
- package/src/speechflow-node-a2a-vad.ts +58 -68
- package/src/speechflow-node-a2a-wav.ts +128 -91
- package/src/speechflow-node-a2t-deepgram.ts +32 -5
- package/src/speechflow-node-t2a-elevenlabs.ts +21 -8
- package/src/speechflow-node-t2a-kokoro.ts +3 -3
- package/src/speechflow-node-t2t-deepl.ts +11 -3
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +2 -2
- package/src/speechflow-node-t2t-openai.ts +3 -3
- package/src/speechflow-node-t2t-subtitle.ts +1 -1
- package/src/speechflow-node-t2t-transformers.ts +2 -2
- package/src/speechflow-node-x2x-filter.ts +122 -0
- package/src/speechflow-node-x2x-trace.ts +29 -12
- package/src/speechflow-node-xio-device.ts +24 -9
- package/src/speechflow-node-xio-file.ts +76 -36
- package/src/speechflow-node-xio-mqtt.ts +11 -9
- package/src/speechflow-node-xio-websocket.ts +7 -7
- package/src/speechflow-node.ts +11 -2
- package/src/speechflow-utils.ts +81 -2
- package/src/speechflow.ts +96 -35
|
@@ -7,52 +7,69 @@
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Stream from "node:stream"
|
|
9
9
|
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import wav from "wav"
|
|
12
|
-
|
|
13
10
|
/* internal dependencies */
|
|
14
|
-
import SpeechFlowNode
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
|
|
13
|
+
/* write WAV header */
|
|
14
|
+
const writeWavHeader = (
|
|
15
|
+
length: number,
|
|
16
|
+
options?: { audioFormat?: number, channels?: number, sampleRate?: number, bitDepth?: number }
|
|
17
|
+
) => {
|
|
18
|
+
const audioFormat = options?.audioFormat ?? 0x001 /* PCM */
|
|
19
|
+
const channels = options?.channels ?? 1 /* mono */
|
|
20
|
+
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
|
|
21
|
+
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
22
|
+
|
|
23
|
+
const headerLength = 44
|
|
24
|
+
const dataLength = length || (4294967295 - 100)
|
|
25
|
+
const fileSize = dataLength + headerLength
|
|
26
|
+
const header = Buffer.alloc(headerLength)
|
|
27
|
+
|
|
28
|
+
const RIFF = Buffer.alloc(4, "RIFF")
|
|
29
|
+
const WAVE = Buffer.alloc(4, "WAVE")
|
|
30
|
+
const fmt = Buffer.alloc(4, "fmt ")
|
|
31
|
+
const data = Buffer.alloc(4, "data")
|
|
32
|
+
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
33
|
+
const blockAlign = (channels * bitDepth) / 8
|
|
34
|
+
|
|
35
|
+
let offset = 0
|
|
36
|
+
RIFF.copy(header, offset); offset += RIFF.length
|
|
37
|
+
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
38
|
+
WAVE.copy(header, offset); offset += WAVE.length
|
|
39
|
+
fmt.copy(header, offset); offset += fmt.length
|
|
40
|
+
header.writeUInt32LE(16, offset); offset += 4
|
|
41
|
+
header.writeUInt16LE(audioFormat, offset); offset += 2
|
|
42
|
+
header.writeUInt16LE(channels, offset); offset += 2
|
|
43
|
+
header.writeUInt32LE(sampleRate, offset); offset += 4
|
|
44
|
+
header.writeUInt32LE(byteRate, offset); offset += 4
|
|
45
|
+
header.writeUInt16LE(blockAlign, offset); offset += 2
|
|
46
|
+
header.writeUInt16LE(bitDepth, offset); offset += 2
|
|
47
|
+
data.copy(header, offset); offset += data.length
|
|
48
|
+
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
49
|
+
|
|
50
|
+
return header
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* read WAV header */
|
|
54
|
+
const readWavHeader = (buffer: Buffer) => {
|
|
55
|
+
let offset = 0
|
|
56
|
+
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
57
|
+
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
58
|
+
const waveHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
59
|
+
const fmtHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
60
|
+
const formatLength = buffer.readUInt32LE(offset); offset += 4
|
|
61
|
+
const audioFormat = buffer.readUInt16LE(offset); offset += 2
|
|
62
|
+
const channels = buffer.readUInt16LE(offset); offset += 2
|
|
63
|
+
const sampleRate = buffer.readUInt32LE(offset); offset += 4
|
|
64
|
+
const byteRate = buffer.readUInt32LE(offset); offset += 4
|
|
65
|
+
const blockAlign = buffer.readUInt16LE(offset); offset += 2
|
|
66
|
+
const bitDepth = buffer.readUInt16LE(offset); offset += 2
|
|
67
|
+
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
68
|
+
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
72
|
+
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
56
73
|
}
|
|
57
74
|
}
|
|
58
75
|
|
|
@@ -77,52 +94,72 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
77
94
|
|
|
78
95
|
/* open node */
|
|
79
96
|
async open () {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
97
|
+
/* establish a transform stream */
|
|
98
|
+
const self = this
|
|
99
|
+
let firstChunk = true
|
|
100
|
+
this.stream = new Stream.Transform({
|
|
101
|
+
readableObjectMode: true,
|
|
102
|
+
writableObjectMode: true,
|
|
103
|
+
decodeStrings: false,
|
|
104
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
105
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
106
|
+
callback(new Error("invalid chunk payload type"))
|
|
107
|
+
else if (firstChunk) {
|
|
108
|
+
if (self.params.mode === "encode") {
|
|
109
|
+
/* convert raw/PCM to WAV/PCM
|
|
110
|
+
(NOTICE: as this is a continuous stream, the
|
|
111
|
+
resulting WAV header is not 100% conforming
|
|
112
|
+
to the WAV standard, as it has to use a zero
|
|
113
|
+
duration information. This cannot be changed in
|
|
114
|
+
a stream-based processing.) */
|
|
115
|
+
const headerBuffer = writeWavHeader(0, {
|
|
116
|
+
audioFormat: 0x0001 /* PCM */,
|
|
117
|
+
channels: self.config.audioChannels,
|
|
118
|
+
sampleRate: self.config.audioSampleRate,
|
|
119
|
+
bitDepth: self.config.audioBitDepth
|
|
120
|
+
})
|
|
121
|
+
const headerChunk = chunk.clone()
|
|
122
|
+
headerChunk.payload = headerBuffer
|
|
123
|
+
this.push(headerChunk)
|
|
124
|
+
this.push(chunk)
|
|
125
|
+
callback()
|
|
126
|
+
}
|
|
127
|
+
else if (self.params.mode === "decode") {
|
|
128
|
+
/* convert WAV/PCM to raw/PCM */
|
|
129
|
+
const header = readWavHeader(chunk.payload)
|
|
130
|
+
self.log("info", "WAV audio stream: " +
|
|
131
|
+
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
|
|
132
|
+
"0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
|
|
133
|
+
`channels=${header.channels} ` +
|
|
134
|
+
`sampleRate=${header.sampleRate} ` +
|
|
135
|
+
`bitDepth=${header.bitDepth}`)
|
|
136
|
+
if (header.audioFormat !== 0x0001 /* PCM */)
|
|
137
|
+
throw new Error("WAV not based on PCM format")
|
|
138
|
+
if (header.bitDepth !== 16)
|
|
139
|
+
throw new Error("WAV not based on 16 bit samples")
|
|
140
|
+
if (header.sampleRate !== 48000)
|
|
141
|
+
throw new Error("WAV not based on 48Khz sample rate")
|
|
142
|
+
if (header.channels !== 1)
|
|
143
|
+
throw new Error("WAV not based on mono channel")
|
|
144
|
+
chunk.payload = chunk.payload.subarray(44)
|
|
145
|
+
this.push(chunk)
|
|
146
|
+
callback()
|
|
147
|
+
}
|
|
148
|
+
else
|
|
149
|
+
throw new Error(`invalid operation mode "${self.params.mode}"`)
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
/* pass-through original chunk */
|
|
153
|
+
this.push(chunk)
|
|
154
|
+
callback()
|
|
155
|
+
}
|
|
156
|
+
firstChunk = false
|
|
157
|
+
},
|
|
158
|
+
final (callback) {
|
|
159
|
+
this.push(null)
|
|
160
|
+
callback()
|
|
161
|
+
}
|
|
162
|
+
})
|
|
126
163
|
}
|
|
127
164
|
|
|
128
165
|
/* close node */
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import * as Deepgram from "@deepgram/sdk"
|
|
@@ -29,7 +29,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
29
29
|
|
|
30
30
|
/* declare node configuration parameters */
|
|
31
31
|
this.configure({
|
|
32
|
-
key: { type: "string", val: process.env.
|
|
32
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
|
|
33
|
+
keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
|
|
33
34
|
model: { type: "string", val: "nova-3", pos: 0 },
|
|
34
35
|
version: { type: "string", val: "latest", pos: 1 },
|
|
35
36
|
language: { type: "string", val: "multi", pos: 2 }
|
|
@@ -40,6 +41,21 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
40
41
|
this.output = "text"
|
|
41
42
|
}
|
|
42
43
|
|
|
44
|
+
/* one-time status of node */
|
|
45
|
+
async status () {
|
|
46
|
+
let balance = 0
|
|
47
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
48
|
+
const response = await deepgram.manage.getProjects()
|
|
49
|
+
if (response !== null && response.error === null) {
|
|
50
|
+
for (const project of response.result.projects) {
|
|
51
|
+
const response = await deepgram.manage.getProjectBalances(project.project_id)
|
|
52
|
+
if (response !== null && response.error === null)
|
|
53
|
+
balance += response.result.balances[0]?.amount ?? 0
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return { balance: balance.toFixed(2) }
|
|
57
|
+
}
|
|
58
|
+
|
|
43
59
|
/* open node */
|
|
44
60
|
async open () {
|
|
45
61
|
/* sanity check situation */
|
|
@@ -49,6 +65,9 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
49
65
|
/* create queue for results */
|
|
50
66
|
const queue = new utils.SingleQueue<SpeechFlowChunk>()
|
|
51
67
|
|
|
68
|
+
/* create a store for the meta information */
|
|
69
|
+
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
70
|
+
|
|
52
71
|
/* connect to Deepgram API */
|
|
53
72
|
const deepgram = Deepgram.createClient(this.params.key)
|
|
54
73
|
let language = "en"
|
|
@@ -70,21 +89,27 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
70
89
|
smart_format: true,
|
|
71
90
|
punctuate: true,
|
|
72
91
|
filler_words: true,
|
|
73
|
-
diarize:
|
|
92
|
+
diarize: false,
|
|
74
93
|
numerals: true,
|
|
75
94
|
profanity_filter: false
|
|
76
95
|
})
|
|
77
96
|
|
|
78
97
|
/* hook onto Deepgram API events */
|
|
79
98
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
80
|
-
const text = (data.channel?.alternatives[0]
|
|
99
|
+
const text = (data.channel?.alternatives[0]?.transcript as string) ?? ""
|
|
81
100
|
if (text === "")
|
|
82
101
|
this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`)
|
|
83
102
|
else {
|
|
84
103
|
this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`)
|
|
85
104
|
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
86
105
|
const end = start.plus({ seconds: data.duration })
|
|
87
|
-
const
|
|
106
|
+
const metas = metastore.fetch(start, end)
|
|
107
|
+
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
108
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
109
|
+
return prev
|
|
110
|
+
}, new Map<string, any>())
|
|
111
|
+
metastore.prune(start)
|
|
112
|
+
const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
|
|
88
113
|
queue.write(chunk)
|
|
89
114
|
}
|
|
90
115
|
})
|
|
@@ -164,6 +189,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
164
189
|
if (chunk.payload.byteLength > 0) {
|
|
165
190
|
log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`)
|
|
166
191
|
initTimeoutStart()
|
|
192
|
+
if (chunk.meta.size > 0)
|
|
193
|
+
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
167
194
|
dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
168
195
|
}
|
|
169
196
|
callback()
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
|
|
@@ -30,11 +30,13 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
30
30
|
|
|
31
31
|
/* declare node configuration parameters */
|
|
32
32
|
this.configure({
|
|
33
|
-
key:
|
|
34
|
-
voice:
|
|
35
|
-
language:
|
|
36
|
-
speed:
|
|
37
|
-
|
|
33
|
+
key: { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
|
|
34
|
+
voice: { type: "string", val: "Brian", pos: 0, match: /^(?:Brittney|Cassidy|Leonie|Mark|Brian)$/ },
|
|
35
|
+
language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
|
|
36
|
+
speed: { type: "number", val: 1.00, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
|
|
37
|
+
stability: { type: "number", val: 0.5, pos: 3, match: (n: number) => n >= 0.0 && n <= 1.0 },
|
|
38
|
+
similarity: { type: "number", val: 0.75, pos: 4, match: (n: number) => n >= 0.0 && n <= 1.0 },
|
|
39
|
+
optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
|
|
38
40
|
})
|
|
39
41
|
|
|
40
42
|
/* declare node input/output format */
|
|
@@ -42,6 +44,14 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
42
44
|
this.output = "audio"
|
|
43
45
|
}
|
|
44
46
|
|
|
47
|
+
/* one-time status of node */
|
|
48
|
+
async status () {
|
|
49
|
+
const elevenlabs = new ElevenLabs.ElevenLabsClient({ apiKey: this.params.key })
|
|
50
|
+
const subscription = await elevenlabs.user.subscription.get()
|
|
51
|
+
const percent = subscription.characterCount / subscription.characterLimit
|
|
52
|
+
return { usage: `${percent.toFixed(2)}%` }
|
|
53
|
+
}
|
|
54
|
+
|
|
45
55
|
/* open node */
|
|
46
56
|
async open () {
|
|
47
57
|
/* establish ElevenLabs API connection */
|
|
@@ -82,7 +92,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
82
92
|
|
|
83
93
|
/* perform text-to-speech operation with Elevenlabs API */
|
|
84
94
|
const model = this.params.optimize === "quality" ?
|
|
85
|
-
"
|
|
95
|
+
"eleven_turbo_v2_5" :
|
|
86
96
|
"eleven_flash_v2_5"
|
|
87
97
|
const speechStream = (text: string) => {
|
|
88
98
|
this.log("info", `ElevenLabs: send text "${text}"`)
|
|
@@ -93,7 +103,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
93
103
|
outputFormat: `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
|
|
94
104
|
seed: 815, /* arbitrary, but fixated by us */
|
|
95
105
|
voiceSettings: {
|
|
96
|
-
speed:
|
|
106
|
+
speed: this.params.speed,
|
|
107
|
+
stability: this.params.stability,
|
|
108
|
+
similarityBoost: this.params.similarity
|
|
97
109
|
}
|
|
98
110
|
}, {
|
|
99
111
|
timeoutInSeconds: 30,
|
|
@@ -120,6 +132,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
120
132
|
if (Buffer.isBuffer(chunk.payload))
|
|
121
133
|
callback(new Error("invalid chunk payload type"))
|
|
122
134
|
else {
|
|
135
|
+
log("info", `ElevenLabs: send text: ${JSON.stringify(chunk.payload)}`)
|
|
123
136
|
speechStream(chunk.payload).then((stream) => {
|
|
124
137
|
getStreamAsBuffer(stream).then((buffer) => {
|
|
125
138
|
const bufferResampled = resampler.processChunk(buffer)
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import { KokoroTTS }
|
|
12
|
-
import SpeexResampler
|
|
11
|
+
import { KokoroTTS } from "kokoro-js"
|
|
12
|
+
import SpeexResampler from "speex-resampler"
|
|
13
13
|
|
|
14
14
|
/* internal dependencies */
|
|
15
15
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import * as DeepL
|
|
11
|
+
import * as DeepL from "deepl-node"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -27,7 +27,7 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
27
27
|
|
|
28
28
|
/* declare node configuration parameters */
|
|
29
29
|
this.configure({
|
|
30
|
-
key: { type: "string", val: process.env.
|
|
30
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY },
|
|
31
31
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
32
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
33
33
|
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
@@ -42,6 +42,14 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
42
42
|
this.output = "text"
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
/* one-time status of node */
|
|
46
|
+
async status () {
|
|
47
|
+
this.deepl = new DeepL.Translator(this.params.key)
|
|
48
|
+
const usage = await this.deepl.getUsage()
|
|
49
|
+
const percent = (usage?.character?.count ?? 0) / (usage?.character?.limit ?? 0) * 100
|
|
50
|
+
return { usage: `${percent.toFixed(8)}%` }
|
|
51
|
+
}
|
|
52
|
+
|
|
45
53
|
/* open node */
|
|
46
54
|
async open () {
|
|
47
55
|
/* instantiate DeepL API SDK */
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import wrapText
|
|
11
|
+
import wrapText from "wrap-text"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import { Ollama }
|
|
11
|
+
import { Ollama } from "ollama"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import OpenAI
|
|
11
|
+
import OpenAI from "openai"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -149,7 +149,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
|
|
|
149
149
|
this.configure({
|
|
150
150
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
151
151
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
152
|
-
key: { type: "string", val: process.env.
|
|
152
|
+
key: { type: "string", val: process.env.SPEECHFLOW_OPENAI_KEY },
|
|
153
153
|
api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+?:\d+$/ },
|
|
154
154
|
model: { type: "string", val: "gpt-4o-mini" }
|
|
155
155
|
})
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import path
|
|
9
|
-
import Stream
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
10
|
|
|
11
11
|
/* external dependencies */
|
|
12
12
|
import * as Transformers from "@huggingface/transformers"
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
|
|
13
|
+
/* SpeechFlow node for data flow filtering (based on meta information) */
|
|
14
|
+
export default class SpeechFlowNodeFilter extends SpeechFlowNode {
|
|
15
|
+
/* declare official node name */
|
|
16
|
+
public static name = "filter"
|
|
17
|
+
|
|
18
|
+
/* construct node */
|
|
19
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
20
|
+
super(id, cfg, opts, args)
|
|
21
|
+
|
|
22
|
+
/* declare node configuration parameters */
|
|
23
|
+
this.configure({
|
|
24
|
+
type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
|
|
25
|
+
var: { type: "string", pos: 1, val: "", match: /^(?:meta:.+|payload:(?:length|text)|time:(?:start|end))$/ },
|
|
26
|
+
op: { type: "string", pos: 2, val: "==", match: /^(?:<|<=|==|!=|~~|!~|>=|>)$/ },
|
|
27
|
+
val: { type: "string", pos: 3, val: "", match: /^.*$/ }
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
/* declare node input/output format */
|
|
31
|
+
this.input = this.params.type
|
|
32
|
+
this.output = this.params.type
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/* open node */
|
|
36
|
+
async open () {
|
|
37
|
+
/* helper function for comparing two values */
|
|
38
|
+
const comparison = (val1: any, op: string, val2: any) => {
|
|
39
|
+
if (op === "==" || op === "!=") {
|
|
40
|
+
/* equal comparison */
|
|
41
|
+
const str1 = (typeof val1 === "string" ? val1 : val1.toString()) as string
|
|
42
|
+
const str2 = (typeof val2 === "string" ? val2 : val2.toString()) as string
|
|
43
|
+
return (op === "==" ? (str1 === str2) : (str1 !== str2))
|
|
44
|
+
}
|
|
45
|
+
else if (op === "~~" || op === "!~") {
|
|
46
|
+
/* regular expression comparison */
|
|
47
|
+
const str = (typeof val1 === "string" ? val1 : val1.toString()) as string
|
|
48
|
+
const regexp = (
|
|
49
|
+
val2 instanceof RegExp ?
|
|
50
|
+
val2 :
|
|
51
|
+
typeof val2 === "string" ?
|
|
52
|
+
new RegExp(val2) :
|
|
53
|
+
new RegExp(val2.toString()))
|
|
54
|
+
return (op === "~~" ? regexp.test(str) : !regexp.test(str))
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
/* non-equal comparison */
|
|
58
|
+
const coerceNum = (val: any) => {
|
|
59
|
+
return typeof val === "number" ? val : (
|
|
60
|
+
typeof val === "string" && val.match(/^[\d+-]+$/) ? parseInt(val) : (
|
|
61
|
+
typeof val === "string" && val.match(/^[\d.+-]+$/) ?
|
|
62
|
+
parseFloat(val) :
|
|
63
|
+
Number(val)
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
}
|
|
67
|
+
const num1 = coerceNum(val1)
|
|
68
|
+
const num2 = coerceNum(val2)
|
|
69
|
+
return (
|
|
70
|
+
op === "<" ?
|
|
71
|
+
(num1 < num2) :
|
|
72
|
+
op === "<=" ?
|
|
73
|
+
(num1 <= num2) :
|
|
74
|
+
op === ">=" ?
|
|
75
|
+
(num1 >= num2) :
|
|
76
|
+
op === ">" ?
|
|
77
|
+
(num1 > num2) :
|
|
78
|
+
false
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/* provide Transform stream */
|
|
84
|
+
const self = this
|
|
85
|
+
this.stream = new Stream.Transform({
|
|
86
|
+
writableObjectMode: true,
|
|
87
|
+
readableObjectMode: true,
|
|
88
|
+
decodeStrings: false,
|
|
89
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
90
|
+
let val1: any
|
|
91
|
+
const val2: any = self.params.val
|
|
92
|
+
const m = self.params.var.match(/^meta:(.+)$/)
|
|
93
|
+
if (m !== null)
|
|
94
|
+
val1 = chunk.meta.get(m[1])
|
|
95
|
+
else if (self.params.key === "payload:length")
|
|
96
|
+
val1 = chunk.payload.length
|
|
97
|
+
else if (self.params.key === "payload:text")
|
|
98
|
+
val1 = (self.params.type === "text" ? chunk.payload as string : "")
|
|
99
|
+
else if (self.params.key === "time:start")
|
|
100
|
+
val1 = chunk.timestampStart.toMillis()
|
|
101
|
+
else if (self.params.key === "time:end")
|
|
102
|
+
val1 = chunk.timestampEnd.toMillis()
|
|
103
|
+
if (comparison(val1, self.params.ops, val2))
|
|
104
|
+
this.push(chunk)
|
|
105
|
+
callback()
|
|
106
|
+
},
|
|
107
|
+
final (callback) {
|
|
108
|
+
this.push(null)
|
|
109
|
+
callback()
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* close node */
|
|
115
|
+
async close () {
|
|
116
|
+
/* close stream */
|
|
117
|
+
if (this.stream !== null) {
|
|
118
|
+
this.stream.destroy()
|
|
119
|
+
this.stream = null
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|