speechflow 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +227 -54
  3. package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
  5. package/dst/speechflow-node-a2a-wav.d.ts +11 -0
  6. package/dst/speechflow-node-a2a-wav.js +170 -0
  7. package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
  8. package/dst/speechflow-node-a2t-deepgram.js +220 -0
  9. package/dst/speechflow-node-deepgram.d.ts +3 -1
  10. package/dst/speechflow-node-deepgram.js +86 -22
  11. package/dst/speechflow-node-deepl.d.ts +3 -1
  12. package/dst/speechflow-node-deepl.js +25 -20
  13. package/dst/speechflow-node-device.d.ts +3 -1
  14. package/dst/speechflow-node-device.js +53 -2
  15. package/dst/speechflow-node-elevenlabs.d.ts +4 -1
  16. package/dst/speechflow-node-elevenlabs.js +88 -49
  17. package/dst/speechflow-node-ffmpeg.d.ts +3 -1
  18. package/dst/speechflow-node-ffmpeg.js +42 -4
  19. package/dst/speechflow-node-file.d.ts +3 -1
  20. package/dst/speechflow-node-file.js +84 -13
  21. package/dst/speechflow-node-format.d.ts +11 -0
  22. package/dst/speechflow-node-format.js +80 -0
  23. package/dst/speechflow-node-gemma.d.ts +3 -1
  24. package/dst/speechflow-node-gemma.js +84 -23
  25. package/dst/speechflow-node-mqtt.d.ts +13 -0
  26. package/dst/speechflow-node-mqtt.js +181 -0
  27. package/dst/speechflow-node-opus.d.ts +12 -0
  28. package/dst/speechflow-node-opus.js +135 -0
  29. package/dst/speechflow-node-subtitle.d.ts +12 -0
  30. package/dst/speechflow-node-subtitle.js +96 -0
  31. package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
  32. package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
  33. package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
  34. package/dst/speechflow-node-t2t-deepl.js +133 -0
  35. package/dst/speechflow-node-t2t-format.d.ts +11 -0
  36. package/dst/speechflow-node-t2t-format.js +80 -0
  37. package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
  38. package/dst/speechflow-node-t2t-gemma.js +213 -0
  39. package/dst/speechflow-node-t2t-opus.d.ts +12 -0
  40. package/dst/speechflow-node-t2t-opus.js +135 -0
  41. package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
  42. package/dst/speechflow-node-t2t-subtitle.js +96 -0
  43. package/dst/speechflow-node-trace.d.ts +11 -0
  44. package/dst/speechflow-node-trace.js +88 -0
  45. package/dst/speechflow-node-wav.d.ts +11 -0
  46. package/dst/speechflow-node-wav.js +170 -0
  47. package/dst/speechflow-node-websocket.d.ts +3 -1
  48. package/dst/speechflow-node-websocket.js +149 -49
  49. package/dst/speechflow-node-whisper-common.d.ts +34 -0
  50. package/dst/speechflow-node-whisper-common.js +7 -0
  51. package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
  52. package/dst/speechflow-node-whisper-ggml.js +97 -0
  53. package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
  54. package/dst/speechflow-node-whisper-onnx.js +131 -0
  55. package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
  56. package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
  57. package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
  58. package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
  59. package/dst/speechflow-node-whisper-worker.d.ts +1 -0
  60. package/dst/speechflow-node-whisper-worker.js +116 -0
  61. package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
  62. package/dst/speechflow-node-whisper-worker2.js +82 -0
  63. package/dst/speechflow-node-whisper.d.ts +19 -0
  64. package/dst/speechflow-node-whisper.js +604 -0
  65. package/dst/speechflow-node-x2x-trace.d.ts +11 -0
  66. package/dst/speechflow-node-x2x-trace.js +88 -0
  67. package/dst/speechflow-node-xio-device.d.ts +13 -0
  68. package/dst/speechflow-node-xio-device.js +205 -0
  69. package/dst/speechflow-node-xio-file.d.ts +11 -0
  70. package/dst/speechflow-node-xio-file.js +176 -0
  71. package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
  72. package/dst/speechflow-node-xio-mqtt.js +181 -0
  73. package/dst/speechflow-node-xio-websocket.d.ts +13 -0
  74. package/dst/speechflow-node-xio-websocket.js +275 -0
  75. package/dst/speechflow-node.d.ts +25 -7
  76. package/dst/speechflow-node.js +74 -9
  77. package/dst/speechflow-utils.d.ts +23 -0
  78. package/dst/speechflow-utils.js +194 -0
  79. package/dst/speechflow.js +146 -43
  80. package/etc/biome.jsonc +12 -4
  81. package/etc/stx.conf +65 -0
  82. package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
  83. package/package.json +49 -31
  84. package/sample.yaml +61 -23
  85. package/src/lib.d.ts +6 -1
  86. package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
  87. package/src/speechflow-node-a2a-wav.ts +143 -0
  88. package/src/speechflow-node-a2t-deepgram.ts +199 -0
  89. package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
  90. package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
  91. package/src/speechflow-node-t2t-format.ts +85 -0
  92. package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
  93. package/src/speechflow-node-t2t-opus.ts +111 -0
  94. package/src/speechflow-node-t2t-subtitle.ts +101 -0
  95. package/src/speechflow-node-x2x-trace.ts +92 -0
  96. package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
  97. package/src/speechflow-node-xio-file.ts +153 -0
  98. package/src/speechflow-node-xio-mqtt.ts +154 -0
  99. package/src/speechflow-node-xio-websocket.ts +248 -0
  100. package/src/speechflow-node.ts +78 -13
  101. package/src/speechflow-utils.ts +212 -0
  102. package/src/speechflow.ts +150 -43
  103. package/etc/nps.yaml +0 -40
  104. package/src/speechflow-node-deepgram.ts +0 -133
  105. package/src/speechflow-node-elevenlabs.ts +0 -116
  106. package/src/speechflow-node-file.ts +0 -108
  107. package/src/speechflow-node-websocket.ts +0 -179
@@ -0,0 +1,248 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import ws from "ws"
12
+ import ReconnWebsocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+ import * as utils from "./speechflow-utils"
17
+
18
+ /* SpeechFlow node for Websocket networking */
19
+ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
20
+ /* declare official node name */
21
+ public static name = "websocket"
22
+
23
+ /* internal state */
24
+ private server: ws.WebSocketServer | null = null
25
+ private client: WebSocket | null = null
26
+
27
+ /* construct node */
28
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
29
+ super(id, cfg, opts, args)
30
+
31
+ /* declare node configuration parameters */
32
+ this.configure({
33
+ listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
34
+ connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
35
+ mode: { type: "string", val: "r", match: /^(?:r|w|rw)$/ },
36
+ type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
37
+ })
38
+
39
+ /* sanity check usage */
40
+ if (this.params.listen !== "" && this.params.connect !== "")
41
+ throw new Error("Websocket node cannot listen and connect at the same time")
42
+ else if (this.params.listen === "" && this.params.connect === "")
43
+ throw new Error("Websocket node requires either listen or connect mode")
44
+
45
+ /* declare node input/output format */
46
+ if (this.params.mode === "rw") {
47
+ this.input = this.params.type
48
+ this.output = this.params.type
49
+ }
50
+ else if (this.params.mode === "r") {
51
+ this.input = "none"
52
+ this.output = this.params.type
53
+ }
54
+ else if (this.params.mode === "w") {
55
+ this.input = this.params.type
56
+ this.output = "none"
57
+ }
58
+ }
59
+
60
+ /* open node */
61
+ async open () {
62
+ if (this.params.listen !== "") {
63
+ /* listen locally on a Websocket port */
64
+ const url = new URL(this.params.listen)
65
+ const websockets = new Set<ws.WebSocket>()
66
+ const chunkQueue = new utils.SingleQueue<SpeechFlowChunk>()
67
+ const server = new ws.WebSocketServer({
68
+ host: url.hostname,
69
+ port: Number.parseInt(url.port),
70
+ path: url.pathname
71
+ })
72
+ server.on("listening", () => {
73
+ this.log("info", `listening on URL ${this.params.listen}`)
74
+ })
75
+ server.on("connection", (ws, request) => {
76
+ const peer = `${request.socket.remoteAddress}:${request.socket.remotePort}`
77
+ this.log("info", `connection opened on URL ${this.params.listen} by peer ${peer}`)
78
+ websockets.add(ws)
79
+ ws.on("close", () => {
80
+ this.log("info", `connection closed on URL ${this.params.listen} by peer ${peer}`)
81
+ websockets.delete(ws)
82
+ })
83
+ ws.on("error", (error) => {
84
+ this.log("error", `error of connection on URL ${this.params.listen} for peer ${peer}: ${error.message}`)
85
+ })
86
+ ws.on("message", (data, isBinary) => {
87
+ if (this.params.mode === "w") {
88
+ this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
89
+ "received remote data on write-only node")
90
+ return
91
+ }
92
+ if (!isBinary) {
93
+ this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
94
+ "received non-binary message")
95
+ return
96
+ }
97
+ let buffer: Buffer
98
+ if (Buffer.isBuffer(data))
99
+ buffer = data
100
+ else if (data instanceof ArrayBuffer)
101
+ buffer = Buffer.from(data)
102
+ else
103
+ buffer = Buffer.concat(data)
104
+ const chunk = utils.streamChunkDecode(buffer)
105
+ chunkQueue.write(chunk)
106
+ })
107
+ })
108
+ server.on("error", (error) => {
109
+ this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`)
110
+ })
111
+ const type = this.params.type
112
+ const mode = this.params.mode
113
+ this.stream = new Stream.Duplex({
114
+ writableObjectMode: true,
115
+ readableObjectMode: true,
116
+ decodeStrings: false,
117
+ write (chunk: SpeechFlowChunk, encoding, callback) {
118
+ if (mode === "r")
119
+ callback(new Error("write operation on read-only node"))
120
+ else if (chunk.type !== type)
121
+ callback(new Error(`written chunk is not of ${type} type`))
122
+ else if (websockets.size === 0)
123
+ callback(new Error("still no Websocket connections available"))
124
+ else {
125
+ const data = utils.streamChunkEncode(chunk)
126
+ const results = []
127
+ for (const websocket of websockets.values()) {
128
+ results.push(new Promise<void>((resolve, reject) => {
129
+ websocket.send(data, (error) => {
130
+ if (error)
131
+ reject(error)
132
+ else
133
+ resolve()
134
+ })
135
+ }))
136
+ }
137
+ Promise.all(results).then(() => {
138
+ callback()
139
+ }).catch((errors: Error[]) => {
140
+ const error = new Error(errors.map((e) => e.message).join("; "))
141
+ callback(error)
142
+ })
143
+ }
144
+ },
145
+ read (size: number) {
146
+ if (mode === "w")
147
+ throw new Error("read operation on write-only node")
148
+ chunkQueue.read().then((chunk) => {
149
+ this.push(chunk, "binary")
150
+ })
151
+ }
152
+ })
153
+ }
154
+ else if (this.params.connect !== "") {
155
+ /* connect remotely to a Websocket port */
156
+ this.client = new ReconnWebsocket(this.params.connect, [], {
157
+ WebSocket: ws,
158
+ WebSocketOptions: {},
159
+ reconnectionDelayGrowFactor: 1.3,
160
+ maxReconnectionDelay: 4000,
161
+ minReconnectionDelay: 1000,
162
+ connectionTimeout: 4000,
163
+ minUptime: 5000
164
+ })
165
+ this.client.addEventListener("open", (ev: Event) => {
166
+ this.log("info", `connection opened to URL ${this.params.connect}`)
167
+ })
168
+ this.client.addEventListener("close", (ev: Event) => {
169
+ this.log("info", `connection closed to URL ${this.params.connect}`)
170
+ })
171
+ this.client.addEventListener("error", (ev: ErrorEvent) => {
172
+ this.log("error", `error of connection on URL ${this.params.connect}: ${ev.error.message}`)
173
+ })
174
+ const chunkQueue = new utils.SingleQueue<SpeechFlowChunk>()
175
+ this.client.addEventListener("message", (ev: MessageEvent) => {
176
+ if (this.params.mode === "w") {
177
+ this.log("warning", `connection to URL ${this.params.listen}: ` +
178
+ "received remote data on write-only node")
179
+ return
180
+ }
181
+ if (!(ev.data instanceof ArrayBuffer)) {
182
+ this.log("warning", `connection to URL ${this.params.listen}: ` +
183
+ "received non-binary message")
184
+ return
185
+ }
186
+ const buffer = Buffer.from(ev.data)
187
+ const chunk = utils.streamChunkDecode(buffer)
188
+ chunkQueue.write(chunk)
189
+ })
190
+ const client = this.client
191
+ client.binaryType = "arraybuffer"
192
+ const type = this.params.type
193
+ const mode = this.params.mode
194
+ this.stream = new Stream.Duplex({
195
+ writableObjectMode: true,
196
+ readableObjectMode: true,
197
+ decodeStrings: false,
198
+ write (chunk: SpeechFlowChunk, encoding, callback) {
199
+ if (mode === "r")
200
+ callback(new Error("write operation on read-only node"))
201
+ else if (chunk.type !== type)
202
+ callback(new Error(`written chunk is not of ${type} type`))
203
+ else if (!client.OPEN)
204
+ callback(new Error("still no Websocket connection available"))
205
+ const data = utils.streamChunkEncode(chunk)
206
+ client.send(data)
207
+ callback()
208
+ },
209
+ read (size: number) {
210
+ if (mode === "w")
211
+ throw new Error("read operation on write-only node")
212
+ if (!client.OPEN)
213
+ throw new Error("still no Websocket connection available")
214
+ chunkQueue.read().then((chunk) => {
215
+ this.push(chunk, "binary")
216
+ })
217
+ }
218
+ })
219
+ }
220
+ }
221
+
222
+ /* close node */
223
+ async close () {
224
+ /* close Websocket server */
225
+ if (this.server !== null) {
226
+ await new Promise<void>((resolve, reject) => {
227
+ this.server!.close((error) => {
228
+ if (error) reject(error)
229
+ else resolve()
230
+ })
231
+ })
232
+ this.server = null
233
+ }
234
+
235
+ /* close Websocket client */
236
+ if (this.client !== null) {
237
+ this.client!.close()
238
+ this.client = null
239
+ }
240
+
241
+ /* close stream */
242
+ if (this.stream !== null) {
243
+ this.stream.destroy()
244
+ this.stream = null
245
+ }
246
+ }
247
+ }
248
+
@@ -7,17 +7,44 @@
7
7
  /* standard dependencies */
8
8
  import Events from "node:events"
9
9
  import Stream from "node:stream"
10
+ import { DateTime, Duration } from "luxon"
11
+
12
+ /* the definition of a single payload chunk passed through the SpeechFlow nodes */
13
+ export class SpeechFlowChunk {
14
+ constructor (
15
+ public timestampStart: Duration,
16
+ public timestampEnd: Duration,
17
+ public kind: "intermediate" | "final",
18
+ public type: "audio" | "text",
19
+ public payload: Buffer | string
20
+ ) {}
21
+ clone () {
22
+ let payload: Buffer | string
23
+ if (Buffer.isBuffer(this.payload))
24
+ payload = Buffer.from(this.payload)
25
+ else
26
+ payload = String(this.payload)
27
+ return new SpeechFlowChunk(
28
+ Duration.fromMillis(this.timestampStart.toMillis()),
29
+ Duration.fromMillis(this.timestampEnd.toMillis()),
30
+ this.kind,
31
+ this.type,
32
+ payload
33
+ )
34
+ }
35
+ }
10
36
 
11
37
  /* the base class for all SpeechFlow nodes */
12
38
  export default class SpeechFlowNode extends Events.EventEmitter {
13
39
  /* general constant configuration (for reference) */
14
40
  config = {
15
- audioChannels: 1, /* audio mono channel */
16
- audioBitDepth: 16, /* audio PCM 16-bit integer */
17
- audioLittleEndian: true, /* audio PCM little-endian */
18
- audioSampleRate: 48000, /* audio 48kHz sample rate */
19
- textEncoding: "utf8" /* UTF-8 text encoding */
20
- } as const
41
+ audioChannels: 1, /* audio mono channel */
42
+ audioBitDepth: 16 as (1 | 8 | 16 | 24 | 32), /* audio PCM 16-bit integer */
43
+ audioLittleEndian: true, /* audio PCM little-endian */
44
+ audioSampleRate: 48000, /* audio 48kHz sample rate */
45
+ textEncoding: "utf8" as BufferEncoding, /* UTF-8 text encoding */
46
+ cacheDir: "" /* directory for cache files */
47
+ }
21
48
 
22
49
  /* announced information */
23
50
  input = "none"
@@ -26,18 +53,35 @@ export default class SpeechFlowNode extends Events.EventEmitter {
26
53
  stream: Stream.Writable | Stream.Readable | Stream.Duplex | null = null
27
54
  connectionsIn = new Set<SpeechFlowNode>()
28
55
  connectionsOut = new Set<SpeechFlowNode>()
56
+ timeOpen: DateTime<boolean> | undefined
57
+ timeZero: DateTime<boolean> = DateTime.fromMillis(0)
58
+ timeZeroOffset: Duration<boolean> = Duration.fromMillis(0)
29
59
 
30
60
  /* the default constructor */
31
61
  constructor (
32
62
  public id: string,
63
+ private cfg: { [ id: string ]: any },
33
64
  private opts: { [ id: string ]: any },
34
65
  private args: any[]
35
66
  ) {
36
67
  super()
68
+ for (const key of Object.keys(cfg)) {
69
+ const idx = key as keyof typeof this.config
70
+ if (this.config[idx] !== undefined)
71
+ (this.config[idx] as any) = cfg[key]
72
+ }
73
+ }
74
+
75
+ /* set base/zero time for relative timestamp calculations */
76
+ setTimeZero (time: DateTime) {
77
+ this.timeZero = time
78
+ if (this.timeOpen === undefined)
79
+ this.timeOpen = this.timeZero
80
+ this.timeZeroOffset = this.timeZero.diff(this.timeOpen)
37
81
  }
38
82
 
39
83
  /* INTERNAL: utility function: create "params" attribute from constructor of sub-classes */
40
- configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp } }) {
84
+ configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp | ((x: any) => boolean) } }) {
41
85
  for (const name of Object.keys(spec)) {
42
86
  if (this.opts[name] !== undefined) {
43
87
  /* named parameter */
@@ -45,9 +89,11 @@ export default class SpeechFlowNode extends Events.EventEmitter {
45
89
  throw new Error(`invalid type of named parameter "${name}" ` +
46
90
  `(has to be ${spec[name].type})`)
47
91
  if ("match" in spec[name]
48
- && this.opts[name].match(spec[name].match) === null)
49
- throw new Error(`invalid value of named parameter "${name}" ` +
50
- `(has to match ${spec[name].match})`)
92
+ && ( ( spec[name].match instanceof RegExp
93
+ && this.opts[name].match(spec[name].match) === null)
94
+ || ( typeof spec[name].match === "function"
95
+ && !spec[name].match(this.opts[name]) ) ))
96
+ throw new Error(`invalid value "${this.opts[name]}" of named parameter "${name}"`)
51
97
  this.params[name] = this.opts[name]
52
98
  }
53
99
  else if (this.opts[name] === undefined
@@ -55,14 +101,20 @@ export default class SpeechFlowNode extends Events.EventEmitter {
55
101
  && typeof spec[name].pos === "number"
56
102
  && spec[name].pos < this.args.length) {
57
103
  /* positional argument */
58
- if (typeof this.args[spec[name].pos!] !== spec[name].type)
104
+ if (typeof this.args[spec[name].pos] !== spec[name].type)
59
105
  throw new Error(`invalid type of positional parameter "${name}" ` +
60
106
  `(has to be ${spec[name].type})`)
61
107
  if ("match" in spec[name]
62
- && this.args[spec[name].pos!].match(spec[name].match) === null)
108
+ && this.args[spec[name].pos].match(spec[name].match) === null)
63
109
  throw new Error(`invalid value of positional parameter "${name}" ` +
64
110
  `(has to match ${spec[name].match})`)
65
- this.params[name] = this.args[spec[name].pos!]
111
+ if ("match" in spec[name]
112
+ && ( ( spec[name].match instanceof RegExp
113
+ && this.args[spec[name].pos].match(spec[name].match) === null)
114
+ || ( typeof spec[name].match === "function"
115
+ && !spec[name].match(this.args[spec[name].pos]) ) ))
116
+ throw new Error(`invalid value "${this.opts[name]}" of positional parameter "${name}"`)
117
+ this.params[name] = this.args[spec[name].pos]
66
118
  }
67
119
  else if ("val" in spec[name] && spec[name].val !== undefined)
68
120
  /* default argument */
@@ -70,6 +122,19 @@ export default class SpeechFlowNode extends Events.EventEmitter {
70
122
  else
71
123
  throw new Error(`required parameter "${name}" not given`)
72
124
  }
125
+ for (const name of Object.keys(this.opts)) {
126
+ if (spec[name] === undefined)
127
+ throw new Error(`named parameter "${name}" not known`)
128
+ }
129
+ for (let i = 0; i < this.args.length; i++) {
130
+ let found = false
131
+ for (const name of Object.keys(spec))
132
+ if (spec[name].pos === i)
133
+ found = true
134
+ if (!found)
135
+ throw new Error(`positional parameter #${i} ("${this.args[i]}") ` +
136
+ "not mappable to any known argument")
137
+ }
73
138
  }
74
139
 
75
140
  /* connect node to another one */
@@ -0,0 +1,212 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* external dependencies */
8
+ import Stream from "node:stream"
9
+ import { EventEmitter } from "node:events"
10
+ import { DateTime, Duration } from "luxon"
11
+ import CBOR from "cbor2"
12
+
13
+ /* internal dependencies */
14
+ import { SpeechFlowChunk } from "./speechflow-node"
15
+
16
+ /* calculate duration of an audio buffer */
17
+ export function audioBufferDuration (
18
+ buffer: Buffer,
19
+ sampleRate = 48000,
20
+ bitDepth = 16,
21
+ channels = 1,
22
+ littleEndian = true
23
+ ) {
24
+ if (!Buffer.isBuffer(buffer))
25
+ throw new Error("invalid input (Buffer expected)")
26
+ if (littleEndian !== true)
27
+ throw new Error("only Little Endian supported")
28
+
29
+ const bytesPerSample = bitDepth / 8
30
+ const totalSamples = buffer.length / (bytesPerSample * channels)
31
+ return totalSamples / sampleRate
32
+ }
33
+
34
+ /* create a Duplex/Transform stream which has
35
+ object-mode on Writable side and buffer/string-mode on Readable side */
36
+ export function createTransformStreamForWritableSide () {
37
+ return new Stream.Transform({
38
+ readableObjectMode: true,
39
+ writableObjectMode: true,
40
+ decodeStrings: false,
41
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
42
+ this.push(chunk.payload)
43
+ callback()
44
+ }
45
+ })
46
+ }
47
+
48
+ /* create a Duplex/Transform stream which has
49
+ object-mode on Readable side and buffer/string-mode on Writable side */
50
+ export function createTransformStreamForReadableSide (type: "text" | "audio", getTimeZero: () => DateTime) {
51
+ return new Stream.Transform({
52
+ readableObjectMode: true,
53
+ writableObjectMode: true,
54
+ decodeStrings: false,
55
+ transform (chunk: Buffer | string, encoding, callback) {
56
+ const timeZero = getTimeZero()
57
+ const start = DateTime.now().diff(timeZero)
58
+ let end = start
59
+ if (type === "audio") {
60
+ const duration = audioBufferDuration(chunk as Buffer)
61
+ end = start.plus(duration * 1000)
62
+ }
63
+ const obj = new SpeechFlowChunk(start, end, "final", type, chunk)
64
+ this.push(obj)
65
+ callback()
66
+ }
67
+ })
68
+ }
69
+
70
+ /* ensure a chunk is of a certain type and format */
71
+ export function ensureStreamChunk (type: "audio" | "text", chunk: SpeechFlowChunk | Buffer | string) {
72
+ if (chunk instanceof SpeechFlowChunk) {
73
+ if (chunk.type !== type)
74
+ throw new Error(`invalid payload chunk (expected ${type} type, received ${chunk.type} type)`)
75
+ }
76
+ else {
77
+ if (type === "text" && Buffer.isBuffer(chunk))
78
+ chunk = chunk.toString("utf8")
79
+ else if (type === "audio" && !Buffer.isBuffer(chunk))
80
+ chunk = Buffer.from(chunk)
81
+ }
82
+ return chunk
83
+ }
84
+
85
+ /* type of a serialized SpeechFlow chunk */
86
+ type SpeechFlowChunkSerialized = {
87
+ timestampStart: number,
88
+ timestampEnd: number,
89
+ kind: string,
90
+ type: string,
91
+ payload: Uint8Array
92
+ }
93
+
94
+ /* encode/serialize chunk of data */
95
+ export function streamChunkEncode (chunk: SpeechFlowChunk) {
96
+ let payload: Uint8Array
97
+ if (Buffer.isBuffer(chunk.payload))
98
+ payload = new Uint8Array(chunk.payload)
99
+ else {
100
+ const encoder = new TextEncoder()
101
+ payload = encoder.encode(chunk.payload)
102
+ }
103
+ const data = {
104
+ timestampStart: chunk.timestampStart.toMillis(),
105
+ timestampEnd: chunk.timestampEnd.toMillis(),
106
+ kind: chunk.kind,
107
+ type: chunk.type,
108
+ payload
109
+ } satisfies SpeechFlowChunkSerialized
110
+ const _data = CBOR.encode(data)
111
+ return _data
112
+ }
113
+
114
+ /* decode/unserialize chunk of data */
115
+ export function streamChunkDecode (_data: Uint8Array) {
116
+ let data: SpeechFlowChunkSerialized
117
+ try {
118
+ data = CBOR.decode<SpeechFlowChunkSerialized>(_data)
119
+ }
120
+ catch (err: any) {
121
+ throw new Error(`CBOR decoding failed: ${err}`)
122
+ }
123
+ let payload: Buffer | string
124
+ if (data.type === "audio")
125
+ payload = Buffer.from(data.payload)
126
+ else
127
+ payload = (new TextDecoder()).decode(data.payload)
128
+ const chunk = new SpeechFlowChunk(
129
+ Duration.fromMillis(data.timestampStart),
130
+ Duration.fromMillis(data.timestampEnd),
131
+ data.kind as "intermediate" | "final",
132
+ data.type as "audio" | "text",
133
+ payload
134
+ )
135
+ return chunk
136
+ }
137
+
138
+ /* helper class for single item queue */
139
+ export class SingleQueue<T> extends EventEmitter {
140
+ private queue = new Array<T>()
141
+ write (item: T) {
142
+ this.queue.unshift(item)
143
+ this.emit("dequeue")
144
+ }
145
+ read () {
146
+ return new Promise<T>((resolve, reject) => {
147
+ const consume = () => {
148
+ if (this.queue.length > 0)
149
+ return this.queue.pop()!
150
+ else
151
+ return null
152
+ }
153
+ let item = consume()
154
+ if (item !== null)
155
+ resolve(item)
156
+ else {
157
+ const tryToConsume = () => {
158
+ item = consume()
159
+ if (item !== null)
160
+ resolve(item)
161
+ else
162
+ this.once("dequeue", tryToConsume)
163
+ }
164
+ this.once("dequeue", tryToConsume)
165
+ }
166
+ })
167
+ }
168
+ }
169
+
170
+ /* helper class for double-item queue */
171
+ export class DoubleQueue<T0, T1> extends EventEmitter {
172
+ private queue0 = new Array<T0>()
173
+ private queue1 = new Array<T1>()
174
+ private notify () {
175
+ if (this.queue0.length > 0 && this.queue1.length > 0)
176
+ this.emit("dequeue")
177
+ }
178
+ write0 (item: T0) {
179
+ this.queue0.unshift(item)
180
+ this.notify()
181
+ }
182
+ write1 (item: T1) {
183
+ this.queue1.unshift(item)
184
+ this.notify()
185
+ }
186
+ read () {
187
+ return new Promise<[ T0, T1 ]>((resolve, reject) => {
188
+ const consume = (): [ T0, T1 ] | null => {
189
+ if (this.queue0.length > 0 && this.queue1.length > 0) {
190
+ const item0 = this.queue0.pop() as T0
191
+ const item1 = this.queue1.pop() as T1
192
+ return [ item0, item1 ]
193
+ }
194
+ else
195
+ return null
196
+ }
197
+ let items = consume()
198
+ if (items !== null)
199
+ resolve(items)
200
+ else {
201
+ const tryToConsume = () => {
202
+ items = consume()
203
+ if (items !== null)
204
+ resolve(items)
205
+ else
206
+ this.once("dequeue", tryToConsume)
207
+ }
208
+ this.once("dequeue", tryToConsume)
209
+ }
210
+ })
211
+ }
212
+ }