speechflow 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +387 -119
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
  10. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
  12. package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
  13. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  14. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  15. package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
  16. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  18. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
  19. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  21. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
  22. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  23. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  24. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  25. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
  26. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  28. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  29. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  31. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  32. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  33. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  34. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
  35. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  36. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
  37. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  39. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  40. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  41. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  42. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  43. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  45. package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
  46. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  48. package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
  49. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  51. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  52. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  53. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  54. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
  55. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  56. package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
  57. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  59. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  60. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  62. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  63. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  65. package/speechflow-cli/dst/speechflow-util.js +1 -0
  66. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  67. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  68. package/speechflow-cli/package.json +34 -17
  69. package/speechflow-cli/src/lib.d.ts +5 -0
  70. package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
  71. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
  72. package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
  73. package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
  74. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
  75. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
  76. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
  77. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
  78. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  79. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  80. package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
  81. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
  82. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  83. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  84. package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
  85. package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
  86. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  87. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
  88. package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
  89. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  90. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  91. package/speechflow-cli/src/speechflow-util.ts +1 -0
  92. package/speechflow-ui-db/package.json +9 -9
  93. package/speechflow-ui-st/package.json +9 -9
  94. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  95. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  96. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  97. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  98. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  99. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -9,37 +9,39 @@ import fs from "node:fs"
9
9
  import Stream from "node:stream"
10
10
 
11
11
  /* internal dependencies */
12
- import SpeechFlowNode from "./speechflow-node"
13
- import * as util from "./speechflow-util"
12
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
13
+ import * as util from "./speechflow-util"
14
14
 
15
15
  /* SpeechFlow node for file access */
16
16
  export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
17
17
  /* declare official node name */
18
18
  public static name = "xio-file"
19
19
 
20
+ /* file descriptor for seekable write mode */
21
+ private fd: number | null = null
22
+
20
23
  /* construct node */
21
24
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
22
25
  super(id, cfg, opts, args)
23
26
 
24
27
  /* declare node configuration parameters */
25
28
  this.configure({
26
- path: { type: "string", pos: 0, val: "" },
27
- mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
28
- type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ },
29
- chunka: { type: "number", val: 200, match: (n: number) => n >= 10 && n <= 1000 },
30
- chunkt: { type: "number", val: 65536, match: (n: number) => n >= 1024 && n <= 131072 }
29
+ path: { type: "string", pos: 0, val: "" },
30
+ mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w)$/ },
31
+ type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ },
32
+ seekable: { type: "boolean", val: false },
33
+ chunkAudio: { type: "number", val: 200, match: (n: number) => n >= 10 && n <= 1000 },
34
+ chunkText: { type: "number", val: 65536, match: (n: number) => n >= 1024 && n <= 131072 }
31
35
  })
32
36
 
33
37
  /* sanity check parameters */
34
38
  if (this.params.path === "")
35
39
  throw new Error("required parameter \"path\" has to be given")
40
+ if (this.params.seekable && this.params.path === "-")
41
+ throw new Error("parameter \"seekable\" cannot be used with standard I/O")
36
42
 
37
43
  /* declare node input/output format */
38
- if (this.params.mode === "rw") {
39
- this.input = this.params.type
40
- this.output = this.params.type
41
- }
42
- else if (this.params.mode === "r") {
44
+ if (this.params.mode === "r") {
43
45
  this.input = "none"
44
46
  this.output = this.params.type
45
47
  }
@@ -56,8 +58,8 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
56
58
  const highWaterMarkAudio = (
57
59
  this.config.audioSampleRate *
58
60
  (this.config.audioBitDepth / 8)
59
- ) / (1000 / this.params.chunka)
60
- const highWaterMarkText = this.params.chunkt
61
+ ) / (1000 / this.params.chunkAudio)
62
+ const highWaterMarkText = this.params.chunkText
61
63
 
62
64
  /* utility function: create a writable stream as chunker that
63
65
  writes to process.stdout but properly handles finish events.
@@ -81,59 +83,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
81
83
  })
82
84
 
83
85
  /* dispatch according to mode and path */
84
- if (this.params.mode === "rw") {
85
- if (this.params.path === "-") {
86
- /* standard I/O */
87
- if (this.params.type === "audio") {
88
- process.stdin.setEncoding()
89
- process.stdout.setEncoding()
90
- const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
91
- process.stdin.pipe(streamR)
92
- const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
93
- streamW.pipe(process.stdout)
94
- this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
95
- }
96
- else {
97
- process.stdin.setEncoding(this.config.textEncoding)
98
- process.stdout.setEncoding(this.config.textEncoding)
99
- const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
100
- process.stdin.pipe(streamR)
101
- const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
102
- streamW.pipe(process.stdout)
103
- this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
104
- }
105
- }
106
- else {
107
- /* file I/O */
108
- if (this.params.type === "audio") {
109
- this.stream = Stream.Duplex.from({
110
- readable: fs.createReadStream(this.params.path,
111
- { highWaterMark: highWaterMarkAudio }),
112
- writable: fs.createWriteStream(this.params.path,
113
- { highWaterMark: highWaterMarkAudio })
114
- })
115
- }
116
- else {
117
- this.stream = Stream.Duplex.from({
118
- readable: fs.createReadStream(this.params.path, {
119
- highWaterMark: highWaterMarkText,
120
- encoding: this.config.textEncoding
121
- }),
122
- writable: fs.createWriteStream(this.params.path, {
123
- highWaterMark: highWaterMarkText,
124
- encoding: this.config.textEncoding
125
- })
126
- })
127
- }
128
- }
129
-
130
- /* convert regular stream into object-mode stream */
131
- const wrapper1 = util.createTransformStreamForWritableSide(this.params.type, 1)
132
- const wrapper2 = util.createTransformStreamForReadableSide(
133
- this.params.type, () => this.timeZero)
134
- this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
135
- }
136
- else if (this.params.mode === "r") {
86
+ if (this.params.mode === "r") {
137
87
  if (this.params.path === "-") {
138
88
  /* standard I/O */
139
89
  let chunker: Stream.PassThrough
@@ -176,15 +126,63 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
176
126
  }
177
127
  else {
178
128
  /* file I/O */
179
- let writable: Stream.Writable
180
- if (this.params.type === "audio")
181
- writable = fs.createWriteStream(this.params.path,
182
- { highWaterMark: highWaterMarkAudio })
183
- else
184
- writable = fs.createWriteStream(this.params.path,
185
- { highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
186
- const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
187
- this.stream = Stream.compose(wrapper, writable)
129
+ if (this.params.seekable) {
130
+ /* seekable file I/O with file descriptor */
131
+ this.fd = fs.openSync(this.params.path, "w")
132
+ let writePosition = 0
133
+ const self = this
134
+ const writable = new Stream.Writable({
135
+ objectMode: true,
136
+ decodeStrings: false,
137
+ highWaterMark: 1,
138
+ write (chunk: SpeechFlowChunk, encoding, callback) {
139
+ const payload = Buffer.isBuffer(chunk.payload) ?
140
+ chunk.payload : Buffer.from(chunk.payload)
141
+ const seekPosition = chunk.meta.get("chunk:seek") as number | undefined
142
+ if (seekPosition !== undefined) {
143
+ /* seek to specified position and write (overload) */
144
+ fs.write(self.fd!, payload, 0, payload.byteLength, seekPosition, callback)
145
+ }
146
+ else {
147
+ /* append at current position */
148
+ fs.write(self.fd!, payload, 0, payload.byteLength, writePosition, (err) => {
149
+ if (err)
150
+ callback(err)
151
+ else {
152
+ writePosition += payload.byteLength
153
+ callback()
154
+ }
155
+ })
156
+ }
157
+ },
158
+ final (callback) {
159
+ callback()
160
+ },
161
+ destroy (err, callback) {
162
+ if (self.fd !== null) {
163
+ fs.close(self.fd, () => {
164
+ self.fd = null
165
+ callback(err)
166
+ })
167
+ }
168
+ else
169
+ callback(err)
170
+ }
171
+ })
172
+ this.stream = writable
173
+ }
174
+ else {
175
+ /* non-seekable file I/O with stream */
176
+ let writable: Stream.Writable
177
+ if (this.params.type === "audio")
178
+ writable = fs.createWriteStream(this.params.path,
179
+ { highWaterMark: highWaterMarkAudio })
180
+ else
181
+ writable = fs.createWriteStream(this.params.path,
182
+ { highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
183
+ const wrapper = util.createTransformStreamForWritableSide(this.params.type, 1)
184
+ this.stream = Stream.compose(wrapper, writable)
185
+ }
188
186
  }
189
187
  }
190
188
  else
@@ -202,12 +200,14 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
202
200
  /* for stdio streams, just end without destroying */
203
201
  const stream = this.stream
204
202
  if ((stream instanceof Stream.Writable || stream instanceof Stream.Duplex) &&
205
- (!stream.writableEnded && !stream.destroyed) ) {
203
+ (!stream.writableEnded && !stream.destroyed)) {
206
204
  await Promise.race([
207
205
  new Promise<void>((resolve, reject) => {
208
206
  stream.end((err?: Error) => {
209
- if (err) reject(err)
210
- else resolve()
207
+ if (err)
208
+ reject(err)
209
+ else
210
+ resolve()
211
211
  })
212
212
  }),
213
213
  util.timeout(5000)
@@ -216,6 +216,19 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
216
216
  }
217
217
  this.stream = null
218
218
  }
219
+
220
+ /* ensure file descriptor is closed */
221
+ if (this.fd !== null) {
222
+ await new Promise<void>((resolve, reject) => {
223
+ fs.close(this.fd!, (err) => {
224
+ this.fd = null
225
+ if (err)
226
+ reject(err)
227
+ else
228
+ resolve()
229
+ })
230
+ })
231
+ }
219
232
  }
220
233
  }
221
234
 
@@ -0,0 +1,325 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { DateTime } from "luxon"
12
+ import { VBANServer, VBANAudioPacket,
13
+ EBitsResolutions, ECodecs } from "vban"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as util from "./speechflow-util"
18
+
19
+ /* VBAN sample rate index to Hz mapping */
20
+ const sampleRateToIndex: { [ rate: number ]: number } = {
21
+ 6000: 0, 12000: 1, 24000: 2, 48000: 3, 96000: 4, 192000: 5, 384000: 6,
22
+ 8000: 7, 16000: 8, 32000: 9, 64000: 10, 128000: 11, 256000: 12, 512000: 13,
23
+ 11025: 14, 22050: 15, 44100: 16, 88200: 17, 176400: 18, 352800: 19, 705600: 20
24
+ }
25
+
26
+ /* SpeechFlow node for VBAN networking */
27
+ export default class SpeechFlowNodeXIOVBAN extends SpeechFlowNode {
28
+ /* declare official node name */
29
+ public static name = "xio-vban"
30
+
31
+ /* internal state */
32
+ private server: VBANServer | null = null
33
+ private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
34
+ private frameCounter = 0
35
+ private targetAddress = ""
36
+ private targetPort = 0
37
+
38
+ /* construct node */
39
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
40
+ super(id, cfg, opts, args)
41
+
42
+ /* declare node configuration parameters */
43
+ this.configure({
44
+ listen: { type: "string", pos: 0, val: "", match: /^(?:|\d+|.+?:\d+)$/ },
45
+ connect: { type: "string", pos: 1, val: "", match: /^(?:|.+?:\d+)$/ },
46
+ stream: { type: "string", pos: 2, val: "Stream", match: /^.{1,16}$/ },
47
+ mode: { type: "string", pos: 3, val: "rw", match: /^(?:r|w|rw)$/ }
48
+ })
49
+
50
+ /* sanity check parameters */
51
+ if (this.params.listen === "" && this.params.connect === "")
52
+ throw new Error("VBAN node requires either listen or connect mode")
53
+ if (this.params.mode === "r" && this.params.listen === "")
54
+ throw new Error("VBAN read mode requires a listen address")
55
+ if (this.params.mode === "w" && this.params.connect === "")
56
+ throw new Error("VBAN write mode requires a connect address")
57
+
58
+ /* VBAN only handles audio */
59
+ if (this.params.mode === "rw") {
60
+ this.input = "audio"
61
+ this.output = "audio"
62
+ }
63
+ else if (this.params.mode === "r") {
64
+ this.input = "none"
65
+ this.output = "audio"
66
+ }
67
+ else if (this.params.mode === "w") {
68
+ this.input = "audio"
69
+ this.output = "none"
70
+ }
71
+ }
72
+
73
+ /* parse address:port string */
74
+ private parseAddress (addr: string, defaultPort: number): { host: string, port: number } {
75
+ if (addr.match(/^\d+$/))
76
+ return { host: "0.0.0.0", port: Number.parseInt(addr, 10) }
77
+ const m = addr.match(/^(.+?):(\d+)$/)
78
+ if (m === null)
79
+ return { host: addr, port: defaultPort }
80
+ return { host: m[1], port: Number.parseInt(m[2], 10) }
81
+ }
82
+
83
+ /* open node */
84
+ async open () {
85
+ /* create VBAN server */
86
+ this.server = new VBANServer({
87
+ application: {
88
+ applicationName: "SpeechFlow",
89
+ manufacturerName: "Dr. Ralf S. Engelschall",
90
+ deviceName: this.id
91
+ }
92
+ })
93
+
94
+ /* setup error handling */
95
+ this.server.on("error", (err: Error) => {
96
+ this.log("error", `VBAN error: ${err.message}`)
97
+ })
98
+
99
+ /* setup chunk queue for incoming audio */
100
+ this.chunkQueue = new util.SingleQueue<SpeechFlowChunk>()
101
+
102
+ /* determine target for sending */
103
+ if (this.params.connect !== "") {
104
+ const target = this.parseAddress(this.params.connect, 6980)
105
+ this.targetAddress = target.host
106
+ this.targetPort = target.port
107
+ }
108
+
109
+ /* handle incoming VBAN packets */
110
+ this.server.on("message", (packet: any, sender: { address: string, port: number }) => {
111
+ if (this.params.mode === "w")
112
+ return
113
+
114
+ /* only handle audio packets */
115
+ if (!(packet instanceof VBANAudioPacket))
116
+ return
117
+
118
+ /* optionally filter by stream name */
119
+ if (this.params.stream !== "" && packet.streamName !== this.params.stream)
120
+ return
121
+
122
+ /* get audio data from packet */
123
+ if (!Buffer.isBuffer(packet.data)) {
124
+ this.log("warning", "VBAN packet data is not a Buffer")
125
+ return
126
+ }
127
+ const data = packet.data
128
+
129
+ /* convert audio format if necessary */
130
+ let audioBuffer: Buffer
131
+ const bitResolution = packet.bitResolution
132
+ if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT16) {
133
+ /* 16-bit signed integer - matches our format */
134
+ audioBuffer = data
135
+ }
136
+ else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_BYTE8) {
137
+ /* 8-bit unsigned to 16-bit signed */
138
+ audioBuffer = Buffer.alloc(data.length * 2)
139
+ for (let i = 0; i < data.length; i++) {
140
+ const sample = ((data[i] - 128) / 128) * 32767
141
+ audioBuffer.writeInt16LE(Math.round(sample), i * 2)
142
+ }
143
+ }
144
+ else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT24) {
145
+ /* 24-bit signed to 16-bit signed */
146
+ const samples = Math.floor(data.length / 3)
147
+ audioBuffer = Buffer.alloc(samples * 2)
148
+ for (let i = 0; i < samples; i++) {
149
+ const b0 = data[i * 3]
150
+ const b1 = data[i * 3 + 1]
151
+ const b2 = data[i * 3 + 2]
152
+ const value = ((b2 << 16) | (b1 << 8) | b0) & 0xFFFFFF
153
+ const signed = value > 0x7FFFFF ? value - 0x1000000 : value
154
+ const sample = (signed / 0x800000) * 32767
155
+ audioBuffer.writeInt16LE(Math.round(sample), i * 2)
156
+ }
157
+ }
158
+ else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_INT32) {
159
+ /* 32-bit signed to 16-bit signed */
160
+ const samples = Math.floor(data.length / 4)
161
+ audioBuffer = Buffer.alloc(samples * 2)
162
+ for (let i = 0; i < samples; i++) {
163
+ const value = data.readInt32LE(i * 4)
164
+ const sample = (value / 0x80000000) * 32767
165
+ audioBuffer.writeInt16LE(Math.round(sample), i * 2)
166
+ }
167
+ }
168
+ else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_FLOAT32) {
169
+ /* 32-bit float to 16-bit signed */
170
+ const samples = Math.floor(data.length / 4)
171
+ audioBuffer = Buffer.alloc(samples * 2)
172
+ for (let i = 0; i < samples; i++) {
173
+ const value = data.readFloatLE(i * 4)
174
+ const sample = Math.max(-32768, Math.min(32767, Math.round(value * 32767)))
175
+ audioBuffer.writeInt16LE(sample, i * 2)
176
+ }
177
+ }
178
+ else if (bitResolution === EBitsResolutions.VBAN_DATATYPE_FLOAT64) {
179
+ /* 64-bit float to 16-bit signed */
180
+ const samples = Math.floor(data.length / 8)
181
+ audioBuffer = Buffer.alloc(samples * 2)
182
+ for (let i = 0; i < samples; i++) {
183
+ const value = data.readDoubleLE(i * 8)
184
+ const sample = Math.max(-32768, Math.min(32767, Math.round(value * 32767)))
185
+ audioBuffer.writeInt16LE(sample, i * 2)
186
+ }
187
+ }
188
+ else {
189
+ /* unsupported format */
190
+ this.log("warning", `unsupported VBAN bit resolution: ${bitResolution}`)
191
+ return
192
+ }
193
+
194
+ /* handle channel conversion if needed */
195
+ const channels = packet.nbChannel + 1
196
+ if (channels > 1 && this.config.audioChannels === 1) {
197
+ /* downmix to mono */
198
+ const samples = audioBuffer.length / 2 / channels
199
+ const monoBuffer = Buffer.alloc(samples * 2)
200
+ for (let i = 0; i < samples; i++) {
201
+ let sum = 0
202
+ for (let ch = 0; ch < channels; ch++)
203
+ sum += audioBuffer.readInt16LE((i * channels + ch) * 2)
204
+ monoBuffer.writeInt16LE(Math.round(sum / channels), i * 2)
205
+ }
206
+ audioBuffer = monoBuffer
207
+ }
208
+
209
+ /* create chunk with timing information */
210
+ const now = DateTime.now()
211
+ const start = now.diff(this.timeZero)
212
+ const duration = util.audioBufferDuration(audioBuffer,
213
+ this.config.audioSampleRate, this.config.audioBitDepth, this.config.audioChannels)
214
+ const end = start.plus(duration * 1000)
215
+ const chunk = new SpeechFlowChunk(start, end, "final", "audio", audioBuffer)
216
+ this.chunkQueue?.write(chunk)
217
+ })
218
+
219
+ /* setup listening */
220
+ this.server.on("listening", () => {
221
+ const address = this.server!.address()
222
+ this.log("info", `VBAN listening on ${address.address}:${address.port}`)
223
+ })
224
+
225
+ /* bind to listen port */
226
+ if (this.params.listen !== "") {
227
+ const listen = this.parseAddress(this.params.listen, 6980)
228
+ this.server.bind(listen.port, listen.host)
229
+ }
230
+ else
231
+ /* still need to bind for sending */
232
+ this.server.bind(0)
233
+
234
+ /* create duplex stream */
235
+ const self = this
236
+ const reads = new util.PromiseSet<void>()
237
+ this.stream = new Stream.Duplex({
238
+ writableObjectMode: true,
239
+ readableObjectMode: true,
240
+ decodeStrings: false,
241
+ highWaterMark: 1,
242
+ write (chunk: SpeechFlowChunk, encoding, callback) {
243
+ if (self.params.mode === "r") {
244
+ callback(new Error("write operation on read-only node"))
245
+ return
246
+ }
247
+ if (chunk.type !== "audio") {
248
+ callback(new Error("VBAN only supports audio type"))
249
+ return
250
+ }
251
+ if (self.targetAddress === "") {
252
+ callback(new Error("no VBAN target address configured"))
253
+ return
254
+ }
255
+
256
+ /* get audio buffer */
257
+ const audioBuffer = chunk.payload as Buffer
258
+
259
+ /* determine VBAN sample rate index */
260
+ const sampleRateIndex = sampleRateToIndex[self.config.audioSampleRate]
261
+ if (sampleRateIndex === undefined) {
262
+ callback(new Error(`unsupported sample rate for VBAN: ${self.config.audioSampleRate}`))
263
+ return
264
+ }
265
+
266
+ /* calculate number of samples */
267
+ const bytesPerSample = self.config.audioBitDepth / 8
268
+ const nbSample = (audioBuffer.length / bytesPerSample / self.config.audioChannels) - 1
269
+ if (nbSample < 0 || nbSample > 255)
270
+ self.log("warning", `VBAN nbSample out of range: ${nbSample} (clamped to 0-255)`)
271
+
272
+ /* create VBAN audio packet */
273
+ const packet = new VBANAudioPacket({
274
+ streamName: self.params.stream,
275
+ srIndex: sampleRateIndex,
276
+ nbSample: Math.min(255, Math.max(0, nbSample)),
277
+ nbChannel: self.config.audioChannels - 1,
278
+ bitResolution: EBitsResolutions.VBAN_DATATYPE_INT16,
279
+ codec: ECodecs.VBAN_CODEC_PCM,
280
+ frameCounter: self.frameCounter++
281
+ }, audioBuffer)
282
+
283
+ /* send packet */
284
+ self.server!.send(packet, self.targetPort, self.targetAddress)
285
+ .then(() => callback())
286
+ .catch((err: Error) => callback(err))
287
+ },
288
+ async final (callback) {
289
+ await reads.awaitAll()
290
+ callback()
291
+ },
292
+ read (size: number) {
293
+ if (self.params.mode === "w")
294
+ throw new Error("read operation on write-only node")
295
+ reads.add(self.chunkQueue!.read().then((chunk) => {
296
+ this.push(chunk, "binary")
297
+ }).catch((err: Error) => {
298
+ self.log("warning", `read on chunk queue operation failed: ${err}`)
299
+ this.push(null)
300
+ }))
301
+ }
302
+ })
303
+ }
304
+
305
+ /* close node */
306
+ async close () {
307
+ /* drain and clear chunk queue reference */
308
+ if (this.chunkQueue !== null) {
309
+ this.chunkQueue.drain()
310
+ this.chunkQueue = null
311
+ }
312
+
313
+ /* close VBAN server */
314
+ if (this.server !== null) {
315
+ this.server.close()
316
+ this.server = null
317
+ }
318
+
319
+ /* shutdown stream */
320
+ if (this.stream !== null) {
321
+ await util.destroyStream(this.stream)
322
+ this.stream = null
323
+ }
324
+ }
325
+ }