speechflow 1.7.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +387 -119
- package/etc/claude.md +5 -5
- package/etc/speechflow.yaml +2 -2
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-error.js +5 -0
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/package.json +34 -17
- package/speechflow-cli/src/lib.d.ts +5 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
- package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
- package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
- package/speechflow-cli/src/speechflow-util-error.ts +9 -0
- package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/package.json +9 -9
- package/speechflow-ui-st/package.json +9 -9
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
|
@@ -9,37 +9,39 @@ import fs from "node:fs"
|
|
|
9
9
|
import Stream from "node:stream"
|
|
10
10
|
|
|
11
11
|
/* internal dependencies */
|
|
12
|
-
import SpeechFlowNode
|
|
13
|
-
import * as util
|
|
12
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
13
|
+
import * as util from "./speechflow-util"
|
|
14
14
|
|
|
15
15
|
/* SpeechFlow node for file access */
|
|
16
16
|
export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
17
17
|
/* declare official node name */
|
|
18
18
|
public static name = "xio-file"
|
|
19
19
|
|
|
20
|
+
/* file descriptor for seekable write mode */
|
|
21
|
+
private fd: number | null = null
|
|
22
|
+
|
|
20
23
|
/* construct node */
|
|
21
24
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
22
25
|
super(id, cfg, opts, args)
|
|
23
26
|
|
|
24
27
|
/* declare node configuration parameters */
|
|
25
28
|
this.configure({
|
|
26
|
-
path:
|
|
27
|
-
mode:
|
|
28
|
-
type:
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
path: { type: "string", pos: 0, val: "" },
|
|
30
|
+
mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w)$/ },
|
|
31
|
+
type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ },
|
|
32
|
+
seekable: { type: "boolean", val: false },
|
|
33
|
+
chunkAudio: { type: "number", val: 200, match: (n: number) => n >= 10 && n <= 1000 },
|
|
34
|
+
chunkText: { type: "number", val: 65536, match: (n: number) => n >= 1024 && n <= 131072 }
|
|
31
35
|
})
|
|
32
36
|
|
|
33
37
|
/* sanity check parameters */
|
|
34
38
|
if (this.params.path === "")
|
|
35
39
|
throw new Error("required parameter \"path\" has to be given")
|
|
40
|
+
if (this.params.seekable && this.params.path === "-")
|
|
41
|
+
throw new Error("parameter \"seekable\" cannot be used with standard I/O")
|
|
36
42
|
|
|
37
43
|
/* declare node input/output format */
|
|
38
|
-
if (this.params.mode === "
|
|
39
|
-
this.input = this.params.type
|
|
40
|
-
this.output = this.params.type
|
|
41
|
-
}
|
|
42
|
-
else if (this.params.mode === "r") {
|
|
44
|
+
if (this.params.mode === "r") {
|
|
43
45
|
this.input = "none"
|
|
44
46
|
this.output = this.params.type
|
|
45
47
|
}
|
|
@@ -56,8 +58,8 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
56
58
|
const highWaterMarkAudio = (
|
|
57
59
|
this.config.audioSampleRate *
|
|
58
60
|
(this.config.audioBitDepth / 8)
|
|
59
|
-
) / (1000 / this.params.
|
|
60
|
-
const highWaterMarkText = this.params.
|
|
61
|
+
) / (1000 / this.params.chunkAudio)
|
|
62
|
+
const highWaterMarkText = this.params.chunkText
|
|
61
63
|
|
|
62
64
|
/* utility function: create a writable stream as chunker that
|
|
63
65
|
writes to process.stdout but properly handles finish events.
|
|
@@ -81,59 +83,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
81
83
|
})
|
|
82
84
|
|
|
83
85
|
/* dispatch according to mode and path */
|
|
84
|
-
if (this.params.mode === "
|
|
85
|
-
if (this.params.path === "-") {
|
|
86
|
-
/* standard I/O */
|
|
87
|
-
if (this.params.type === "audio") {
|
|
88
|
-
process.stdin.setEncoding()
|
|
89
|
-
process.stdout.setEncoding()
|
|
90
|
-
const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
|
|
91
|
-
process.stdin.pipe(streamR)
|
|
92
|
-
const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
|
|
93
|
-
streamW.pipe(process.stdout)
|
|
94
|
-
this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
|
|
95
|
-
}
|
|
96
|
-
else {
|
|
97
|
-
process.stdin.setEncoding(this.config.textEncoding)
|
|
98
|
-
process.stdout.setEncoding(this.config.textEncoding)
|
|
99
|
-
const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
|
|
100
|
-
process.stdin.pipe(streamR)
|
|
101
|
-
const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
|
|
102
|
-
streamW.pipe(process.stdout)
|
|
103
|
-
this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
/* file I/O */
|
|
108
|
-
if (this.params.type === "audio") {
|
|
109
|
-
this.stream = Stream.Duplex.from({
|
|
110
|
-
readable: fs.createReadStream(this.params.path,
|
|
111
|
-
{ highWaterMark: highWaterMarkAudio }),
|
|
112
|
-
writable: fs.createWriteStream(this.params.path,
|
|
113
|
-
{ highWaterMark: highWaterMarkAudio })
|
|
114
|
-
})
|
|
115
|
-
}
|
|
116
|
-
else {
|
|
117
|
-
this.stream = Stream.Duplex.from({
|
|
118
|
-
readable: fs.createReadStream(this.params.path, {
|
|
119
|
-
highWaterMark: highWaterMarkText,
|
|
120
|
-
encoding: this.config.textEncoding
|
|
121
|
-
}),
|
|
122
|
-
writable: fs.createWriteStream(this.params.path, {
|
|
123
|
-
highWaterMark: highWaterMarkText,
|
|
124
|
-
encoding: this.config.textEncoding
|
|
125
|
-
})
|
|
126
|
-
})
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
/* convert regular stream into object-mode stream */
|
|
131
|
-
const wrapper1 = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
132
|
-
const wrapper2 = util.createTransformStreamForReadableSide(
|
|
133
|
-
this.params.type, () => this.timeZero)
|
|
134
|
-
this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
|
|
135
|
-
}
|
|
136
|
-
else if (this.params.mode === "r") {
|
|
86
|
+
if (this.params.mode === "r") {
|
|
137
87
|
if (this.params.path === "-") {
|
|
138
88
|
/* standard I/O */
|
|
139
89
|
let chunker: Stream.PassThrough
|
|
@@ -176,15 +126,63 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
176
126
|
}
|
|
177
127
|
else {
|
|
178
128
|
/* file I/O */
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
writable =
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
129
|
+
if (this.params.seekable) {
|
|
130
|
+
/* seekable file I/O with file descriptor */
|
|
131
|
+
this.fd = fs.openSync(this.params.path, "w")
|
|
132
|
+
let writePosition = 0
|
|
133
|
+
const self = this
|
|
134
|
+
const writable = new Stream.Writable({
|
|
135
|
+
objectMode: true,
|
|
136
|
+
decodeStrings: false,
|
|
137
|
+
highWaterMark: 1,
|
|
138
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
139
|
+
const payload = Buffer.isBuffer(chunk.payload) ?
|
|
140
|
+
chunk.payload : Buffer.from(chunk.payload)
|
|
141
|
+
const seekPosition = chunk.meta.get("chunk:seek") as number | undefined
|
|
142
|
+
if (seekPosition !== undefined) {
|
|
143
|
+
/* seek to specified position and write (overload) */
|
|
144
|
+
fs.write(self.fd!, payload, 0, payload.byteLength, seekPosition, callback)
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
/* append at current position */
|
|
148
|
+
fs.write(self.fd!, payload, 0, payload.byteLength, writePosition, (err) => {
|
|
149
|
+
if (err)
|
|
150
|
+
callback(err)
|
|
151
|
+
else {
|
|
152
|
+
writePosition += payload.byteLength
|
|
153
|
+
callback()
|
|
154
|
+
}
|
|
155
|
+
})
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
final (callback) {
|
|
159
|
+
callback()
|
|
160
|
+
},
|
|
161
|
+
destroy (err, callback) {
|
|
162
|
+
if (self.fd !== null) {
|
|
163
|
+
fs.close(self.fd, () => {
|
|
164
|
+
self.fd = null
|
|
165
|
+
callback(err)
|
|
166
|
+
})
|
|
167
|
+
}
|
|
168
|
+
else
|
|
169
|
+
callback(err)
|
|
170
|
+
}
|
|
171
|
+
})
|
|
172
|
+
this.stream = writable
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
/* non-seekable file I/O with stream */
|
|
176
|
+
let writable: Stream.Writable
|
|
177
|
+
if (this.params.type === "audio")
|
|
178
|
+
writable = fs.createWriteStream(this.params.path,
|
|
179
|
+
{ highWaterMark: highWaterMarkAudio })
|
|
180
|
+
else
|
|
181
|
+
writable = fs.createWriteStream(this.params.path,
|
|
182
|
+
{ highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
|
|
183
|
+
const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
|
|
184
|
+
this.stream = Stream.compose(wrapper, writable)
|
|
185
|
+
}
|
|
188
186
|
}
|
|
189
187
|
}
|
|
190
188
|
else
|
|
@@ -202,12 +200,14 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
202
200
|
/* for stdio streams, just end without destroying */
|
|
203
201
|
const stream = this.stream
|
|
204
202
|
if ((stream instanceof Stream.Writable || stream instanceof Stream.Duplex) &&
|
|
205
|
-
(!stream.writableEnded && !stream.destroyed)
|
|
203
|
+
(!stream.writableEnded && !stream.destroyed)) {
|
|
206
204
|
await Promise.race([
|
|
207
205
|
new Promise<void>((resolve, reject) => {
|
|
208
206
|
stream.end((err?: Error) => {
|
|
209
|
-
if (err)
|
|
210
|
-
|
|
207
|
+
if (err)
|
|
208
|
+
reject(err)
|
|
209
|
+
else
|
|
210
|
+
resolve()
|
|
211
211
|
})
|
|
212
212
|
}),
|
|
213
213
|
util.timeout(5000)
|
|
@@ -216,6 +216,19 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
|
|
|
216
216
|
}
|
|
217
217
|
this.stream = null
|
|
218
218
|
}
|
|
219
|
+
|
|
220
|
+
/* ensure file descriptor is closed */
|
|
221
|
+
if (this.fd !== null) {
|
|
222
|
+
await new Promise<void>((resolve, reject) => {
|
|
223
|
+
fs.close(this.fd!, (err) => {
|
|
224
|
+
this.fd = null
|
|
225
|
+
if (err)
|
|
226
|
+
reject(err)
|
|
227
|
+
else
|
|
228
|
+
resolve()
|
|
229
|
+
})
|
|
230
|
+
})
|
|
231
|
+
}
|
|
219
232
|
}
|
|
220
233
|
}
|
|
221
234
|
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { DateTime } from "luxon"
|
|
12
|
+
import { VBANServer, VBANAudioPacket,
|
|
13
|
+
EBitsResolutions, ECodecs } from "vban"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
import * as util from "./speechflow-util"
|
|
18
|
+
|
|
19
|
+
/* VBAN sample rate index to Hz mapping */
|
|
20
|
+
const sampleRateToIndex: { [ rate: number ]: number } = {
|
|
21
|
+
6000: 0, 12000: 1, 24000: 2, 48000: 3, 96000: 4, 192000: 5, 384000: 6,
|
|
22
|
+
8000: 7, 16000: 8, 32000: 9, 64000: 10, 128000: 11, 256000: 12, 512000: 13,
|
|
23
|
+
11025: 14, 22050: 15, 44100: 16, 88200: 17, 176400: 18, 352800: 19, 705600: 20
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/* SpeechFlow node for VBAN networking */
|
|
27
|
+
export default class SpeechFlowNodeXIOVBAN extends SpeechFlowNode {
|
|
28
|
+
/* declare official node name */
|
|
29
|
+
public static name = "xio-vban"
|
|
30
|
+
|
|
31
|
+
/* internal state */
|
|
32
|
+
private server: VBANServer | null = null
|
|
33
|
+
private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
|
|
34
|
+
private frameCounter = 0
|
|
35
|
+
private targetAddress = ""
|
|
36
|
+
private targetPort = 0
|
|
37
|
+
|
|
38
|
+
/* construct node */
|
|
39
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
40
|
+
super(id, cfg, opts, args)
|
|
41
|
+
|
|
42
|
+
/* declare node configuration parameters */
|
|
43
|
+
this.configure({
|
|
44
|
+
listen: { type: "string", pos: 0, val: "", match: /^(?:|\d+|.+?:\d+)$/ },
|
|
45
|
+
connect: { type: "string", pos: 1, val: "", match: /^(?:|.+?:\d+)$/ },
|
|
46
|
+
stream: { type: "string", pos: 2, val: "Stream", match: /^.{1,16}$/ },
|
|
47
|
+
mode: { type: "string", pos: 3, val: "rw", match: /^(?:r|w|rw)$/ }
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
/* sanity check parameters */
|
|
51
|
+
if (this.params.listen === "" && this.params.connect === "")
|
|
52
|
+
throw new Error("VBAN node requires either listen or connect mode")
|
|
53
|
+
if (this.params.mode === "r" && this.params.listen === "")
|
|
54
|
+
throw new Error("VBAN read mode requires a listen address")
|
|
55
|
+
if (this.params.mode === "w" && this.params.connect === "")
|
|
56
|
+
throw new Error("VBAN write mode requires a connect address")
|
|
57
|
+
|
|
58
|
+
/* VBAN only handles audio */
|
|
59
|
+
if (this.params.mode === "rw") {
|
|
60
|
+
this.input = "audio"
|
|
61
|
+
this.output = "audio"
|
|
62
|
+
}
|
|
63
|
+
else if (this.params.mode === "r") {
|
|
64
|
+
this.input = "none"
|
|
65
|
+
this.output = "audio"
|
|
66
|
+
}
|
|
67
|
+
else if (this.params.mode === "w") {
|
|
68
|
+
this.input = "audio"
|
|
69
|
+
this.output = "none"
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/* parse address:port string */
|
|
74
|
+
private parseAddress (addr: string, defaultPort: number): { host: string, port: number } {
|
|
75
|
+
if (addr.match(/^\d+$/))
|
|
76
|
+
return { host: "0.0.0.0", port: Number.parseInt(addr, 10) }
|
|
77
|
+
const m = addr.match(/^(.+?):(\d+)$/)
|
|
78
|
+
if (m === null)
|
|
79
|
+
return { host: addr, port: defaultPort }
|
|
80
|
+
return { host: m[1], port: Number.parseInt(m[2], 10) }
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/* open node */
|
|
84
|
+
async open () {
|
|
85
|
+
/* create VBAN server */
|
|
86
|
+
this.server = new VBANServer({
|
|
87
|
+
application: {
|
|
88
|
+
applicationName: "SpeechFlow",
|
|
89
|
+
manufacturerName: "Dr. Ralf S. Engelschall",
|
|
90
|
+
deviceName: this.id
|
|
91
|
+
}
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
/* setup error handling */
|
|
95
|
+
this.server.on("error", (err: Error) => {
|
|
96
|
+
this.log("error", `VBAN error: ${err.message}`)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
/* setup chunk queue for incoming audio */
|
|
100
|
+
this.chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
|
|
101
|
+
|
|
102
|
+
/* determine target for sending */
|
|
103
|
+
if (this.params.connect !== "") {
|
|
104
|
+
const target = this.parseAddress(this.params.connect, 6980)
|
|
105
|
+
this.targetAddress = target.host
|
|
106
|
+
this.targetPort = target.port
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/* handle incoming VBAN packets */
|
|
110
|
+
this.server.on("message", (packet: any, sender: { address: string, port: number }) => {
|
|
111
|
+
if (this.params.mode === "w")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
/* only handle audio packets */
|
|
115
|
+
if (!(packet instanceof VBANAudioPacket))
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
/* optionally filter by stream name */
|
|
119
|
+
if (this.params.stream !== "" && packet.streamName !== this.params.stream)
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
/* get audio data from packet */
|
|
123
|
+
if (!Buffer.isBuffer(packet.data)) {
|
|
124
|
+
this.log("warning", "VBAN packet data is not a Buffer")
|
|
125
|
+
return
|
|
126
|
+
}
|
|
127
|
+
const data = packet.data
|
|
128
|
+
|
|
129
|
+
/* convert audio format if necessary */
|
|
130
|
+
let audioBuffer: Buffer
|
|
131
|
+
const bitResolution = packet.bitResolution
|
|
132
|
+
if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT16) {
|
|
133
|
+
/* 16-bit signed integer - matches our format */
|
|
134
|
+
audioBuffer = data
|
|
135
|
+
}
|
|
136
|
+
else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_BYTE8) {
|
|
137
|
+
/* 8-bit unsigned to 16-bit signed */
|
|
138
|
+
audioBuffer = Buffer.alloc(data.length * 2)
|
|
139
|
+
for (let i = 0; i < data.length; i++) {
|
|
140
|
+
const sample = ((data[i] - 128) / 128) * 32767
|
|
141
|
+
audioBuffer.writeInt16LE(Math.round(sample), i * 2)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT24) {
|
|
145
|
+
/* 24-bit signed to 16-bit signed */
|
|
146
|
+
const samples = Math.floor(data.length / 3)
|
|
147
|
+
audioBuffer = Buffer.alloc(samples * 2)
|
|
148
|
+
for (let i = 0; i < samples; i++) {
|
|
149
|
+
const b0 = data[i * 3]
|
|
150
|
+
const b1 = data[i * 3 + 1]
|
|
151
|
+
const b2 = data[i * 3 + 2]
|
|
152
|
+
const value = ((b2 << 16) | (b1 << 8) | b0) & 0xFFFFFF
|
|
153
|
+
const signed = value > 0x7FFFFF ? value - 0x1000000 : value
|
|
154
|
+
const sample = (signed / 0x800000) * 32767
|
|
155
|
+
audioBuffer.writeInt16LE(Math.round(sample), i * 2)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT32) {
|
|
159
|
+
/* 32-bit signed to 16-bit signed */
|
|
160
|
+
const samples = Math.floor(data.length / 4)
|
|
161
|
+
audioBuffer = Buffer.alloc(samples * 2)
|
|
162
|
+
for (let i = 0; i < samples; i++) {
|
|
163
|
+
const value = data.readInt32LE(i * 4)
|
|
164
|
+
const sample = (value / 0x80000000) * 32767
|
|
165
|
+
audioBuffer.writeInt16LE(Math.round(sample), i * 2)
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_FLOAT32) {
|
|
169
|
+
/* 32-bit float to 16-bit signed */
|
|
170
|
+
const samples = Math.floor(data.length / 4)
|
|
171
|
+
audioBuffer = Buffer.alloc(samples * 2)
|
|
172
|
+
for (let i = 0; i < samples; i++) {
|
|
173
|
+
const value = data.readFloatLE(i * 4)
|
|
174
|
+
const sample = Math.max(-32768, Math.min(32767, Math.round(value * 32767)))
|
|
175
|
+
audioBuffer.writeInt16LE(sample, i * 2)
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_FLOAT64) {
|
|
179
|
+
/* 64-bit float to 16-bit signed */
|
|
180
|
+
const samples = Math.floor(data.length / 8)
|
|
181
|
+
audioBuffer = Buffer.alloc(samples * 2)
|
|
182
|
+
for (let i = 0; i < samples; i++) {
|
|
183
|
+
const value = data.readDoubleLE(i * 8)
|
|
184
|
+
const sample = Math.max(-32768, Math.min(32767, Math.round(value * 32767)))
|
|
185
|
+
audioBuffer.writeInt16LE(sample, i * 2)
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
/* unsupported format */
|
|
190
|
+
this.log("warning", `unsupported VBAN bit resolution: ${bitResolution}`)
|
|
191
|
+
return
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/* handle channel conversion if needed */
|
|
195
|
+
const channels = packet.nbChannel + 1
|
|
196
|
+
if (channels > 1 && this.config.audioChannels === 1) {
|
|
197
|
+
/* downmix to mono */
|
|
198
|
+
const samples = audioBuffer.length / 2 / channels
|
|
199
|
+
const monoBuffer = Buffer.alloc(samples * 2)
|
|
200
|
+
for (let i = 0; i < samples; i++) {
|
|
201
|
+
let sum = 0
|
|
202
|
+
for (let ch = 0; ch < channels; ch++)
|
|
203
|
+
sum += audioBuffer.readInt16LE((i * channels + ch) * 2)
|
|
204
|
+
monoBuffer.writeInt16LE(Math.round(sum / channels), i * 2)
|
|
205
|
+
}
|
|
206
|
+
audioBuffer = monoBuffer
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/* create chunk with timing information */
|
|
210
|
+
const now = DateTime.now()
|
|
211
|
+
const start = now.diff(this.timeZero)
|
|
212
|
+
const duration = util.audioBufferDuration(audioBuffer,
|
|
213
|
+
this.config.audioSampleRate, this.config.audioBitDepth, this.config.audioChannels)
|
|
214
|
+
const end = start.plus(duration * 1000)
|
|
215
|
+
const chunk = new SpeechFlowChunk(start, end, "final", "audio", audioBuffer)
|
|
216
|
+
this.chunkQueue?.write(chunk)
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
/* setup listening */
|
|
220
|
+
this.server.on("listening", () => {
|
|
221
|
+
const address = this.server!.address()
|
|
222
|
+
this.log("info", `VBAN listening on ${address.address}:${address.port}`)
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
/* bind to listen port */
|
|
226
|
+
if (this.params.listen !== "") {
|
|
227
|
+
const listen = this.parseAddress(this.params.listen, 6980)
|
|
228
|
+
this.server.bind(listen.port, listen.host)
|
|
229
|
+
}
|
|
230
|
+
else
|
|
231
|
+
/* still need to bind for sending */
|
|
232
|
+
this.server.bind(0)
|
|
233
|
+
|
|
234
|
+
/* create duplex stream */
|
|
235
|
+
const self = this
|
|
236
|
+
const reads = new util.PromiseSet<void>()
|
|
237
|
+
this.stream = new Stream.Duplex({
|
|
238
|
+
writableObjectMode: true,
|
|
239
|
+
readableObjectMode: true,
|
|
240
|
+
decodeStrings: false,
|
|
241
|
+
highWaterMark: 1,
|
|
242
|
+
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
243
|
+
if (self.params.mode === "r") {
|
|
244
|
+
callback(new Error("write operation on read-only node"))
|
|
245
|
+
return
|
|
246
|
+
}
|
|
247
|
+
if (chunk.type !== "audio") {
|
|
248
|
+
callback(new Error("VBAN only supports audio type"))
|
|
249
|
+
return
|
|
250
|
+
}
|
|
251
|
+
if (self.targetAddress === "") {
|
|
252
|
+
callback(new Error("no VBAN target address configured"))
|
|
253
|
+
return
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/* get audio buffer */
|
|
257
|
+
const audioBuffer = chunk.payload as Buffer
|
|
258
|
+
|
|
259
|
+
/* determine VBAN sample rate index */
|
|
260
|
+
const sampleRateIndex = sampleRateToIndex[self.config.audioSampleRate]
|
|
261
|
+
if (sampleRateIndex === undefined) {
|
|
262
|
+
callback(new Error(`unsupported sample rate for VBAN: ${self.config.audioSampleRate}`))
|
|
263
|
+
return
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/* calculate number of samples */
|
|
267
|
+
const bytesPerSample = self.config.audioBitDepth / 8
|
|
268
|
+
const nbSample = (audioBuffer.length / bytesPerSample / self.config.audioChannels) - 1
|
|
269
|
+
if (nbSample < 0 || nbSample > 255)
|
|
270
|
+
self.log("warning", `VBAN nbSample out of range: ${nbSample} (clamped to 0-255)`)
|
|
271
|
+
|
|
272
|
+
/* create VBAN audio packet */
|
|
273
|
+
const packet = new VBANAudioPacket({
|
|
274
|
+
streamName: self.params.stream,
|
|
275
|
+
srIndex: sampleRateIndex,
|
|
276
|
+
nbSample: Math.min(255, Math.max(0, nbSample)),
|
|
277
|
+
nbChannel: self.config.audioChannels - 1,
|
|
278
|
+
bitResolution: EBitsResolutions.VBAN_DATATYPE_INT16,
|
|
279
|
+
codec: ECodecs.VBAN_CODEC_PCM,
|
|
280
|
+
frameCounter: self.frameCounter++
|
|
281
|
+
}, audioBuffer)
|
|
282
|
+
|
|
283
|
+
/* send packet */
|
|
284
|
+
self.server!.send(packet, self.targetPort, self.targetAddress)
|
|
285
|
+
.then(() => callback())
|
|
286
|
+
.catch((err: Error) => callback(err))
|
|
287
|
+
},
|
|
288
|
+
async final (callback) {
|
|
289
|
+
await reads.awaitAll()
|
|
290
|
+
callback()
|
|
291
|
+
},
|
|
292
|
+
read (size: number) {
|
|
293
|
+
if (self.params.mode === "w")
|
|
294
|
+
throw new Error("read operation on write-only node")
|
|
295
|
+
reads.add(self.chunkQueue!.read().then((chunk) => {
|
|
296
|
+
this.push(chunk, "binary")
|
|
297
|
+
}).catch((err: Error) => {
|
|
298
|
+
self.log("warning", `read on chunk queue operation failed: ${err}`)
|
|
299
|
+
this.push(null)
|
|
300
|
+
}))
|
|
301
|
+
}
|
|
302
|
+
})
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/* close node */
|
|
306
|
+
async close () {
|
|
307
|
+
/* drain and clear chunk queue reference */
|
|
308
|
+
if (this.chunkQueue !== null) {
|
|
309
|
+
this.chunkQueue.drain()
|
|
310
|
+
this.chunkQueue = null
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/* close VBAN server */
|
|
314
|
+
if (this.server !== null) {
|
|
315
|
+
this.server.close()
|
|
316
|
+
this.server = null
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/* shutdown stream */
|
|
320
|
+
if (this.stream !== null) {
|
|
321
|
+
await util.destroyStream(this.stream)
|
|
322
|
+
this.stream = null
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|