speechflow 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +37 -3
- package/dst/speechflow-node-a2a-gender.d.ts +17 -0
- package/dst/speechflow-node-a2a-gender.js +272 -0
- package/dst/speechflow-node-a2a-gender.js.map +1 -0
- package/dst/speechflow-node-a2a-meter.js +2 -2
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.js +1 -0
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.js +47 -63
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +145 -122
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.js +13 -3
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.js +10 -5
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.d.ts +11 -0
- package/dst/speechflow-node-x2x-filter.js +113 -0
- package/dst/speechflow-node-x2x-filter.js.map +1 -0
- package/dst/speechflow-node-x2x-trace.js +24 -10
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +14 -5
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +58 -27
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-node.js +1 -0
- package/dst/speechflow-node.js.map +1 -1
- package/dst/speechflow-utils.d.ts +14 -1
- package/dst/speechflow-utils.js +110 -2
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +56 -53
- package/dst/speechflow.js.map +1 -1
- package/etc/speechflow.yaml +51 -24
- package/package.json +6 -5
- package/src/speechflow-node-a2a-gender.ts +272 -0
- package/src/speechflow-node-a2a-meter.ts +3 -3
- package/src/speechflow-node-a2a-mute.ts +1 -0
- package/src/speechflow-node-a2a-vad.ts +58 -68
- package/src/speechflow-node-a2a-wav.ts +128 -91
- package/src/speechflow-node-a2t-deepgram.ts +15 -4
- package/src/speechflow-node-t2a-elevenlabs.ts +13 -8
- package/src/speechflow-node-t2a-kokoro.ts +3 -3
- package/src/speechflow-node-t2t-deepl.ts +2 -2
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +2 -2
- package/src/speechflow-node-t2t-openai.ts +2 -2
- package/src/speechflow-node-t2t-subtitle.ts +1 -1
- package/src/speechflow-node-t2t-transformers.ts +2 -2
- package/src/speechflow-node-x2x-filter.ts +122 -0
- package/src/speechflow-node-x2x-trace.ts +28 -11
- package/src/speechflow-node-xio-device.ts +20 -8
- package/src/speechflow-node-xio-file.ts +74 -36
- package/src/speechflow-node-xio-mqtt.ts +3 -3
- package/src/speechflow-node-xio-websocket.ts +1 -1
- package/src/speechflow-node.ts +2 -0
- package/src/speechflow-utils.ts +81 -2
- package/src/speechflow.ts +84 -81
|
@@ -7,52 +7,69 @@
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import Stream from "node:stream"
|
|
9
9
|
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import wav from "wav"
|
|
12
|
-
|
|
13
10
|
/* internal dependencies */
|
|
14
|
-
import SpeechFlowNode
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
|
|
13
|
+
/* write WAV header */
|
|
14
|
+
const writeWavHeader = (
|
|
15
|
+
length: number,
|
|
16
|
+
options?: { audioFormat?: number, channels?: number, sampleRate?: number, bitDepth?: number }
|
|
17
|
+
) => {
|
|
18
|
+
const audioFormat = options?.audioFormat ?? 0x001 /* PCM */
|
|
19
|
+
const channels = options?.channels ?? 1 /* mono */
|
|
20
|
+
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
|
|
21
|
+
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
22
|
+
|
|
23
|
+
const headerLength = 44
|
|
24
|
+
const dataLength = length || (4294967295 - 100)
|
|
25
|
+
const fileSize = dataLength + headerLength
|
|
26
|
+
const header = Buffer.alloc(headerLength)
|
|
27
|
+
|
|
28
|
+
const RIFF = Buffer.alloc(4, "RIFF")
|
|
29
|
+
const WAVE = Buffer.alloc(4, "WAVE")
|
|
30
|
+
const fmt = Buffer.alloc(4, "fmt ")
|
|
31
|
+
const data = Buffer.alloc(4, "data")
|
|
32
|
+
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
33
|
+
const blockAlign = (channels * bitDepth) / 8
|
|
34
|
+
|
|
35
|
+
let offset = 0
|
|
36
|
+
RIFF.copy(header, offset); offset += RIFF.length
|
|
37
|
+
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
38
|
+
WAVE.copy(header, offset); offset += WAVE.length
|
|
39
|
+
fmt.copy(header, offset); offset += fmt.length
|
|
40
|
+
header.writeUInt32LE(16, offset); offset += 4
|
|
41
|
+
header.writeUInt16LE(audioFormat, offset); offset += 2
|
|
42
|
+
header.writeUInt16LE(channels, offset); offset += 2
|
|
43
|
+
header.writeUInt32LE(sampleRate, offset); offset += 4
|
|
44
|
+
header.writeUInt32LE(byteRate, offset); offset += 4
|
|
45
|
+
header.writeUInt16LE(blockAlign, offset); offset += 2
|
|
46
|
+
header.writeUInt16LE(bitDepth, offset); offset += 2
|
|
47
|
+
data.copy(header, offset); offset += data.length
|
|
48
|
+
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
49
|
+
|
|
50
|
+
return header
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* read WAV header */
|
|
54
|
+
const readWavHeader = (buffer: Buffer) => {
|
|
55
|
+
let offset = 0
|
|
56
|
+
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
57
|
+
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
58
|
+
const waveHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
59
|
+
const fmtHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
60
|
+
const formatLength = buffer.readUInt32LE(offset); offset += 4
|
|
61
|
+
const audioFormat = buffer.readUInt16LE(offset); offset += 2
|
|
62
|
+
const channels = buffer.readUInt16LE(offset); offset += 2
|
|
63
|
+
const sampleRate = buffer.readUInt32LE(offset); offset += 4
|
|
64
|
+
const byteRate = buffer.readUInt32LE(offset); offset += 4
|
|
65
|
+
const blockAlign = buffer.readUInt16LE(offset); offset += 2
|
|
66
|
+
const bitDepth = buffer.readUInt16LE(offset); offset += 2
|
|
67
|
+
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
68
|
+
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
72
|
+
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
56
73
|
}
|
|
57
74
|
}
|
|
58
75
|
|
|
@@ -77,52 +94,72 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
77
94
|
|
|
78
95
|
/* open node */
|
|
79
96
|
async open () {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
97
|
+
/* establish a transform stream */
|
|
98
|
+
const self = this
|
|
99
|
+
let firstChunk = true
|
|
100
|
+
this.stream = new Stream.Transform({
|
|
101
|
+
readableObjectMode: true,
|
|
102
|
+
writableObjectMode: true,
|
|
103
|
+
decodeStrings: false,
|
|
104
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
105
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
106
|
+
callback(new Error("invalid chunk payload type"))
|
|
107
|
+
else if (firstChunk) {
|
|
108
|
+
if (self.params.mode === "encode") {
|
|
109
|
+
/* convert raw/PCM to WAV/PCM
|
|
110
|
+
(NOTICE: as this is a continuous stream, the
|
|
111
|
+
resulting WAV header is not 100% conforming
|
|
112
|
+
to the WAV standard, as it has to use a zero
|
|
113
|
+
duration information. This cannot be changed in
|
|
114
|
+
a stream-based processing.) */
|
|
115
|
+
const headerBuffer = writeWavHeader(0, {
|
|
116
|
+
audioFormat: 0x0001 /* PCM */,
|
|
117
|
+
channels: self.config.audioChannels,
|
|
118
|
+
sampleRate: self.config.audioSampleRate,
|
|
119
|
+
bitDepth: self.config.audioBitDepth
|
|
120
|
+
})
|
|
121
|
+
const headerChunk = chunk.clone()
|
|
122
|
+
headerChunk.payload = headerBuffer
|
|
123
|
+
this.push(headerChunk)
|
|
124
|
+
this.push(chunk)
|
|
125
|
+
callback()
|
|
126
|
+
}
|
|
127
|
+
else if (self.params.mode === "decode") {
|
|
128
|
+
/* convert WAV/PCM to raw/PCM */
|
|
129
|
+
const header = readWavHeader(chunk.payload)
|
|
130
|
+
self.log("info", "WAV audio stream: " +
|
|
131
|
+
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
|
|
132
|
+
"0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
|
|
133
|
+
`channels=${header.channels} ` +
|
|
134
|
+
`sampleRate=${header.sampleRate} ` +
|
|
135
|
+
`bitDepth=${header.bitDepth}`)
|
|
136
|
+
if (header.audioFormat !== 0x0001 /* PCM */)
|
|
137
|
+
throw new Error("WAV not based on PCM format")
|
|
138
|
+
if (header.bitDepth !== 16)
|
|
139
|
+
throw new Error("WAV not based on 16 bit samples")
|
|
140
|
+
if (header.sampleRate !== 48000)
|
|
141
|
+
throw new Error("WAV not based on 48Khz sample rate")
|
|
142
|
+
if (header.channels !== 1)
|
|
143
|
+
throw new Error("WAV not based on mono channel")
|
|
144
|
+
chunk.payload = chunk.payload.subarray(44)
|
|
145
|
+
this.push(chunk)
|
|
146
|
+
callback()
|
|
147
|
+
}
|
|
148
|
+
else
|
|
149
|
+
throw new Error(`invalid operation mode "${self.params.mode}"`)
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
/* pass-through original chunk */
|
|
153
|
+
this.push(chunk)
|
|
154
|
+
callback()
|
|
155
|
+
}
|
|
156
|
+
firstChunk = false
|
|
157
|
+
},
|
|
158
|
+
final (callback) {
|
|
159
|
+
this.push(null)
|
|
160
|
+
callback()
|
|
161
|
+
}
|
|
162
|
+
})
|
|
126
163
|
}
|
|
127
164
|
|
|
128
165
|
/* close node */
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import * as Deepgram from "@deepgram/sdk"
|
|
@@ -65,6 +65,9 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
65
65
|
/* create queue for results */
|
|
66
66
|
const queue = new utils.SingleQueue<SpeechFlowChunk>()
|
|
67
67
|
|
|
68
|
+
/* create a store for the meta information */
|
|
69
|
+
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
70
|
+
|
|
68
71
|
/* connect to Deepgram API */
|
|
69
72
|
const deepgram = Deepgram.createClient(this.params.key)
|
|
70
73
|
let language = "en"
|
|
@@ -86,21 +89,27 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
86
89
|
smart_format: true,
|
|
87
90
|
punctuate: true,
|
|
88
91
|
filler_words: true,
|
|
89
|
-
diarize:
|
|
92
|
+
diarize: false,
|
|
90
93
|
numerals: true,
|
|
91
94
|
profanity_filter: false
|
|
92
95
|
})
|
|
93
96
|
|
|
94
97
|
/* hook onto Deepgram API events */
|
|
95
98
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
96
|
-
const text = (data.channel?.alternatives[0]
|
|
99
|
+
const text = (data.channel?.alternatives[0]?.transcript as string) ?? ""
|
|
97
100
|
if (text === "")
|
|
98
101
|
this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`)
|
|
99
102
|
else {
|
|
100
103
|
this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`)
|
|
101
104
|
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
102
105
|
const end = start.plus({ seconds: data.duration })
|
|
103
|
-
const
|
|
106
|
+
const metas = metastore.fetch(start, end)
|
|
107
|
+
const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
108
|
+
curr.forEach((val, key) => { prev.set(key, val) })
|
|
109
|
+
return prev
|
|
110
|
+
}, new Map<string, any>())
|
|
111
|
+
metastore.prune(start)
|
|
112
|
+
const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
|
|
104
113
|
queue.write(chunk)
|
|
105
114
|
}
|
|
106
115
|
})
|
|
@@ -180,6 +189,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
180
189
|
if (chunk.payload.byteLength > 0) {
|
|
181
190
|
log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`)
|
|
182
191
|
initTimeoutStart()
|
|
192
|
+
if (chunk.meta.size > 0)
|
|
193
|
+
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
183
194
|
dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
184
195
|
}
|
|
185
196
|
callback()
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
|
|
@@ -30,11 +30,13 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
30
30
|
|
|
31
31
|
/* declare node configuration parameters */
|
|
32
32
|
this.configure({
|
|
33
|
-
key:
|
|
34
|
-
voice:
|
|
35
|
-
language:
|
|
36
|
-
speed:
|
|
37
|
-
|
|
33
|
+
key: { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
|
|
34
|
+
voice: { type: "string", val: "Brian", pos: 0, match: /^(?:Brittney|Cassidy|Leonie|Mark|Brian)$/ },
|
|
35
|
+
language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
|
|
36
|
+
speed: { type: "number", val: 1.00, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
|
|
37
|
+
stability: { type: "number", val: 0.5, pos: 3, match: (n: number) => n >= 0.0 && n <= 1.0 },
|
|
38
|
+
similarity: { type: "number", val: 0.75, pos: 4, match: (n: number) => n >= 0.0 && n <= 1.0 },
|
|
39
|
+
optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
|
|
38
40
|
})
|
|
39
41
|
|
|
40
42
|
/* declare node input/output format */
|
|
@@ -90,7 +92,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
90
92
|
|
|
91
93
|
/* perform text-to-speech operation with Elevenlabs API */
|
|
92
94
|
const model = this.params.optimize === "quality" ?
|
|
93
|
-
"
|
|
95
|
+
"eleven_turbo_v2_5" :
|
|
94
96
|
"eleven_flash_v2_5"
|
|
95
97
|
const speechStream = (text: string) => {
|
|
96
98
|
this.log("info", `ElevenLabs: send text "${text}"`)
|
|
@@ -101,7 +103,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
101
103
|
outputFormat: `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
|
|
102
104
|
seed: 815, /* arbitrary, but fixated by us */
|
|
103
105
|
voiceSettings: {
|
|
104
|
-
speed:
|
|
106
|
+
speed: this.params.speed,
|
|
107
|
+
stability: this.params.stability,
|
|
108
|
+
similarityBoost: this.params.similarity
|
|
105
109
|
}
|
|
106
110
|
}, {
|
|
107
111
|
timeoutInSeconds: 30,
|
|
@@ -128,6 +132,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
128
132
|
if (Buffer.isBuffer(chunk.payload))
|
|
129
133
|
callback(new Error("invalid chunk payload type"))
|
|
130
134
|
else {
|
|
135
|
+
log("info", `ElevenLabs: send text: ${JSON.stringify(chunk.payload)}`)
|
|
131
136
|
speechStream(chunk.payload).then((stream) => {
|
|
132
137
|
getStreamAsBuffer(stream).then((buffer) => {
|
|
133
138
|
const bufferResampled = resampler.processChunk(buffer)
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import { KokoroTTS }
|
|
12
|
-
import SpeexResampler
|
|
11
|
+
import { KokoroTTS } from "kokoro-js"
|
|
12
|
+
import SpeexResampler from "speex-resampler"
|
|
13
13
|
|
|
14
14
|
/* internal dependencies */
|
|
15
15
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import * as DeepL
|
|
11
|
+
import * as DeepL from "deepl-node"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import wrapText
|
|
11
|
+
import wrapText from "wrap-text"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import { Ollama }
|
|
11
|
+
import { Ollama } from "ollama"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
|
-
import OpenAI
|
|
11
|
+
import OpenAI from "openai"
|
|
12
12
|
|
|
13
13
|
/* internal dependencies */
|
|
14
14
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import path
|
|
9
|
-
import Stream
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
10
|
|
|
11
11
|
/* external dependencies */
|
|
12
12
|
import * as Transformers from "@huggingface/transformers"
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
|
|
13
|
+
/* SpeechFlow node for data flow filtering (based on meta information) */
|
|
14
|
+
export default class SpeechFlowNodeFilter extends SpeechFlowNode {
|
|
15
|
+
/* declare official node name */
|
|
16
|
+
public static name = "filter"
|
|
17
|
+
|
|
18
|
+
/* construct node */
|
|
19
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
20
|
+
super(id, cfg, opts, args)
|
|
21
|
+
|
|
22
|
+
/* declare node configuration parameters */
|
|
23
|
+
this.configure({
|
|
24
|
+
type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
|
|
25
|
+
var: { type: "string", pos: 1, val: "", match: /^(?:meta:.+|payload:(?:length|text)|time:(?:start|end))$/ },
|
|
26
|
+
op: { type: "string", pos: 2, val: "==", match: /^(?:<|<=|==|!=|~~|!~|>=|>)$/ },
|
|
27
|
+
val: { type: "string", pos: 3, val: "", match: /^.*$/ }
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
/* declare node input/output format */
|
|
31
|
+
this.input = this.params.type
|
|
32
|
+
this.output = this.params.type
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/* open node */
|
|
36
|
+
async open () {
|
|
37
|
+
/* helper function for comparing two values */
|
|
38
|
+
const comparison = (val1: any, op: string, val2: any) => {
|
|
39
|
+
if (op === "==" || op === "!=") {
|
|
40
|
+
/* equal comparison */
|
|
41
|
+
const str1 = (typeof val1 === "string" ? val1 : val1.toString()) as string
|
|
42
|
+
const str2 = (typeof val2 === "string" ? val2 : val2.toString()) as string
|
|
43
|
+
return (op === "==" ? (str1 === str2) : (str1 !== str2))
|
|
44
|
+
}
|
|
45
|
+
else if (op === "~~" || op === "!~") {
|
|
46
|
+
/* regular expression comparison */
|
|
47
|
+
const str = (typeof val1 === "string" ? val1 : val1.toString()) as string
|
|
48
|
+
const regexp = (
|
|
49
|
+
val2 instanceof RegExp ?
|
|
50
|
+
val2 :
|
|
51
|
+
typeof val2 === "string" ?
|
|
52
|
+
new RegExp(val2) :
|
|
53
|
+
new RegExp(val2.toString()))
|
|
54
|
+
return (op === "~~" ? regexp.test(str) : !regexp.test(str))
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
/* non-equal comparison */
|
|
58
|
+
const coerceNum = (val: any) => {
|
|
59
|
+
return typeof val === "number" ? val : (
|
|
60
|
+
typeof val === "string" && val.match(/^[\d+-]+$/) ? parseInt(val) : (
|
|
61
|
+
typeof val === "string" && val.match(/^[\d.+-]+$/) ?
|
|
62
|
+
parseFloat(val) :
|
|
63
|
+
Number(val)
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
}
|
|
67
|
+
const num1 = coerceNum(val1)
|
|
68
|
+
const num2 = coerceNum(val2)
|
|
69
|
+
return (
|
|
70
|
+
op === "<" ?
|
|
71
|
+
(num1 < num2) :
|
|
72
|
+
op === "<=" ?
|
|
73
|
+
(num1 <= num2) :
|
|
74
|
+
op === ">=" ?
|
|
75
|
+
(num1 >= num2) :
|
|
76
|
+
op === ">" ?
|
|
77
|
+
(num1 > num2) :
|
|
78
|
+
false
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/* provide Transform stream */
|
|
84
|
+
const self = this
|
|
85
|
+
this.stream = new Stream.Transform({
|
|
86
|
+
writableObjectMode: true,
|
|
87
|
+
readableObjectMode: true,
|
|
88
|
+
decodeStrings: false,
|
|
89
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
90
|
+
let val1: any
|
|
91
|
+
const val2: any = self.params.val
|
|
92
|
+
const m = self.params.var.match(/^meta:(.+)$/)
|
|
93
|
+
if (m !== null)
|
|
94
|
+
val1 = chunk.meta.get(m[1])
|
|
95
|
+
else if (self.params.key === "payload:length")
|
|
96
|
+
val1 = chunk.payload.length
|
|
97
|
+
else if (self.params.key === "payload:text")
|
|
98
|
+
val1 = (self.params.type === "text" ? chunk.payload as string : "")
|
|
99
|
+
else if (self.params.key === "time:start")
|
|
100
|
+
val1 = chunk.timestampStart.toMillis()
|
|
101
|
+
else if (self.params.key === "time:end")
|
|
102
|
+
val1 = chunk.timestampEnd.toMillis()
|
|
103
|
+
if (comparison(val1, self.params.ops, val2))
|
|
104
|
+
this.push(chunk)
|
|
105
|
+
callback()
|
|
106
|
+
},
|
|
107
|
+
final (callback) {
|
|
108
|
+
this.push(null)
|
|
109
|
+
callback()
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* close node */
|
|
115
|
+
async close () {
|
|
116
|
+
/* close stream */
|
|
117
|
+
if (this.stream !== null) {
|
|
118
|
+
this.stream.destroy()
|
|
119
|
+
this.stream = null
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -5,7 +5,9 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
9
11
|
import { Duration } from "luxon"
|
|
10
12
|
|
|
11
13
|
/* internal dependencies */
|
|
@@ -41,7 +43,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
|
|
|
41
43
|
this.log(level, msg)
|
|
42
44
|
}
|
|
43
45
|
|
|
44
|
-
/* provide
|
|
46
|
+
/* provide Transform stream */
|
|
45
47
|
const type = this.params.type
|
|
46
48
|
this.stream = new Stream.Transform({
|
|
47
49
|
writableObjectMode: true,
|
|
@@ -49,23 +51,38 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
|
|
|
49
51
|
decodeStrings: false,
|
|
50
52
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
51
53
|
let error: Error | undefined
|
|
52
|
-
const
|
|
54
|
+
const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
|
|
55
|
+
const fmtMeta = (meta: Map<string, any>) => {
|
|
56
|
+
if (meta.size === 0)
|
|
57
|
+
return "none"
|
|
58
|
+
else
|
|
59
|
+
return `{ ${Array.from(meta.entries())
|
|
60
|
+
.map(([ k, v ]) => `${k}: ${JSON.stringify(v)}`)
|
|
61
|
+
.join(", ")
|
|
62
|
+
} }`
|
|
63
|
+
}
|
|
53
64
|
if (Buffer.isBuffer(chunk.payload)) {
|
|
54
65
|
if (type === "audio")
|
|
55
|
-
log("debug", `
|
|
56
|
-
`
|
|
57
|
-
`
|
|
66
|
+
log("debug", `chunk: type=${chunk.type} ` +
|
|
67
|
+
`kind=${chunk.kind} ` +
|
|
68
|
+
`start=${fmtTime(chunk.timestampStart)} ` +
|
|
69
|
+
`end=${fmtTime(chunk.timestampEnd)} ` +
|
|
70
|
+
`payload-type=Buffer payload-length=${chunk.payload.byteLength} ` +
|
|
71
|
+
`meta=${fmtMeta(chunk.meta)}`)
|
|
58
72
|
else
|
|
59
|
-
error = new Error(
|
|
73
|
+
error = new Error(`${type} chunk: seen Buffer instead of String chunk type`)
|
|
60
74
|
}
|
|
61
75
|
else {
|
|
62
76
|
if (type === "text")
|
|
63
|
-
log("debug",
|
|
64
|
-
`
|
|
77
|
+
log("debug", `${type} chunk: type=${chunk.type}` +
|
|
78
|
+
`kind=${chunk.kind} ` +
|
|
79
|
+
`start=${fmtTime(chunk.timestampStart)} ` +
|
|
80
|
+
`end=${fmtTime(chunk.timestampEnd)} ` +
|
|
65
81
|
`payload-type=String payload-length=${chunk.payload.length} ` +
|
|
66
|
-
`payload-encoding=${encoding} payload-content="${chunk.payload.toString()}"`
|
|
82
|
+
`payload-encoding=${encoding} payload-content="${chunk.payload.toString()}" ` +
|
|
83
|
+
`meta=${fmtMeta(chunk.meta)}`)
|
|
67
84
|
else
|
|
68
|
-
error = new Error(
|
|
85
|
+
error = new Error(`${type} chunk: seen String instead of Buffer chunk type`)
|
|
69
86
|
}
|
|
70
87
|
if (error !== undefined)
|
|
71
88
|
callback(error)
|