speechflow 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +37 -3
  3. package/dst/speechflow-node-a2a-gender.d.ts +17 -0
  4. package/dst/speechflow-node-a2a-gender.js +272 -0
  5. package/dst/speechflow-node-a2a-gender.js.map +1 -0
  6. package/dst/speechflow-node-a2a-meter.js +2 -2
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  8. package/dst/speechflow-node-a2a-mute.js +1 -0
  9. package/dst/speechflow-node-a2a-mute.js.map +1 -1
  10. package/dst/speechflow-node-a2a-vad.js +47 -63
  11. package/dst/speechflow-node-a2a-vad.js.map +1 -1
  12. package/dst/speechflow-node-a2a-wav.js +145 -122
  13. package/dst/speechflow-node-a2a-wav.js.map +1 -1
  14. package/dst/speechflow-node-a2t-deepgram.js +13 -3
  15. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  16. package/dst/speechflow-node-t2a-elevenlabs.js +10 -5
  17. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  18. package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  19. package/dst/speechflow-node-t2t-deepl.js.map +1 -1
  20. package/dst/speechflow-node-t2t-format.js.map +1 -1
  21. package/dst/speechflow-node-t2t-ollama.js.map +1 -1
  22. package/dst/speechflow-node-t2t-openai.js.map +1 -1
  23. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  24. package/dst/speechflow-node-t2t-transformers.js.map +1 -1
  25. package/dst/speechflow-node-x2x-filter.d.ts +11 -0
  26. package/dst/speechflow-node-x2x-filter.js +113 -0
  27. package/dst/speechflow-node-x2x-filter.js.map +1 -0
  28. package/dst/speechflow-node-x2x-trace.js +24 -10
  29. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  30. package/dst/speechflow-node-xio-device.js +14 -5
  31. package/dst/speechflow-node-xio-device.js.map +1 -1
  32. package/dst/speechflow-node-xio-file.js +58 -27
  33. package/dst/speechflow-node-xio-file.js.map +1 -1
  34. package/dst/speechflow-node-xio-mqtt.js.map +1 -1
  35. package/dst/speechflow-node-xio-websocket.js.map +1 -1
  36. package/dst/speechflow-node.js +1 -0
  37. package/dst/speechflow-node.js.map +1 -1
  38. package/dst/speechflow-utils.d.ts +14 -1
  39. package/dst/speechflow-utils.js +110 -2
  40. package/dst/speechflow-utils.js.map +1 -1
  41. package/dst/speechflow.js +56 -53
  42. package/dst/speechflow.js.map +1 -1
  43. package/etc/speechflow.yaml +51 -24
  44. package/package.json +6 -5
  45. package/src/speechflow-node-a2a-gender.ts +272 -0
  46. package/src/speechflow-node-a2a-meter.ts +3 -3
  47. package/src/speechflow-node-a2a-mute.ts +1 -0
  48. package/src/speechflow-node-a2a-vad.ts +58 -68
  49. package/src/speechflow-node-a2a-wav.ts +128 -91
  50. package/src/speechflow-node-a2t-deepgram.ts +15 -4
  51. package/src/speechflow-node-t2a-elevenlabs.ts +13 -8
  52. package/src/speechflow-node-t2a-kokoro.ts +3 -3
  53. package/src/speechflow-node-t2t-deepl.ts +2 -2
  54. package/src/speechflow-node-t2t-format.ts +2 -2
  55. package/src/speechflow-node-t2t-ollama.ts +2 -2
  56. package/src/speechflow-node-t2t-openai.ts +2 -2
  57. package/src/speechflow-node-t2t-subtitle.ts +1 -1
  58. package/src/speechflow-node-t2t-transformers.ts +2 -2
  59. package/src/speechflow-node-x2x-filter.ts +122 -0
  60. package/src/speechflow-node-x2x-trace.ts +28 -11
  61. package/src/speechflow-node-xio-device.ts +20 -8
  62. package/src/speechflow-node-xio-file.ts +74 -36
  63. package/src/speechflow-node-xio-mqtt.ts +3 -3
  64. package/src/speechflow-node-xio-websocket.ts +1 -1
  65. package/src/speechflow-node.ts +2 -0
  66. package/src/speechflow-utils.ts +81 -2
  67. package/src/speechflow.ts +84 -81
@@ -32,7 +32,8 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
32
32
  /* declare node configuration parameters */
33
33
  this.configure({
34
34
  device: { type: "string", pos: 0, val: "", match: /^(.+?):(.+)$/ },
35
- mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ }
35
+ mode: { type: "string", pos: 1, val: "rw", match: /^(?:r|w|rw)$/ },
36
+ chunk: { type: "number", pos: 2, val: 200, match: (n: number) => n >= 10 && n <= 1000 }
36
37
  })
37
38
 
38
39
  /* declare node input/output format */
@@ -98,6 +99,13 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
98
99
  throw new Error(`audio device sample rate ${device.defaultSampleRate} is ` +
99
100
  `incompatible with required sample rate ${this.config.audioSampleRate}`)
100
101
 
102
+ /* determine how many bytes we need per chunk when
103
+ the chunk should be the requested duration */
104
+ const highwaterMark = (
105
+ this.config.audioSampleRate *
106
+ (this.config.audioBitDepth / 8)
107
+ ) / (1000 / this.params.chunk)
108
+
101
109
  /* establish device connection
102
110
  Notice: "naudion" actually implements Stream.{Readable,Writable,Duplex}, but
103
111
  declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
@@ -115,13 +123,15 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
115
123
  deviceId: device.id,
116
124
  channelCount: this.config.audioChannels,
117
125
  sampleRate: this.config.audioSampleRate,
118
- sampleFormat: this.config.audioBitDepth
126
+ sampleFormat: this.config.audioBitDepth,
127
+ highwaterMark
119
128
  },
120
129
  outOptions: {
121
130
  deviceId: device.id,
122
131
  channelCount: this.config.audioChannels,
123
132
  sampleRate: this.config.audioSampleRate,
124
- sampleFormat: this.config.audioBitDepth
133
+ sampleFormat: this.config.audioBitDepth,
134
+ highwaterMark
125
135
  }
126
136
  })
127
137
  this.stream = this.io as unknown as Stream.Duplex
@@ -138,10 +148,11 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
138
148
  this.log("info", `resolved "${this.params.device}" to input device "${device.id}"`)
139
149
  this.io = PortAudio.AudioIO({
140
150
  inOptions: {
141
- deviceId: device.id,
142
- channelCount: this.config.audioChannels,
143
- sampleRate: this.config.audioSampleRate,
144
- sampleFormat: this.config.audioBitDepth
151
+ deviceId: device.id,
152
+ channelCount: this.config.audioChannels,
153
+ sampleRate: this.config.audioSampleRate,
154
+ sampleFormat: this.config.audioBitDepth,
155
+ highwaterMark
145
156
  }
146
157
  })
147
158
  this.stream = this.io as unknown as Stream.Readable
@@ -161,7 +172,8 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
161
172
  deviceId: device.id,
162
173
  channelCount: this.config.audioChannels,
163
174
  sampleRate: this.config.audioSampleRate,
164
- sampleFormat: this.config.audioBitDepth
175
+ sampleFormat: this.config.audioBitDepth,
176
+ highwaterMark
165
177
  }
166
178
  })
167
179
  this.stream = this.io as unknown as Stream.Writable
@@ -23,9 +23,11 @@ export default class SpeechFlowNodeFile extends SpeechFlowNode {
23
23
 
24
24
  /* declare node configuration parameters */
25
25
  this.configure({
26
- path: { type: "string", pos: 0, val: "" },
27
- mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
28
- type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ }
26
+ path: { type: "string", pos: 0, val: "" },
27
+ mode: { type: "string", pos: 1, val: "r", match: /^(?:r|w|rw)$/ },
28
+ type: { type: "string", pos: 2, val: "audio", match: /^(?:audio|text)$/ },
29
+ chunka: { type: "number", val: 200, match: (n: number) => n >= 10 && n <= 1000 },
30
+ chunkt: { type: "number", val: 65536, match: (n: number) => n >= 1024 && n <= 131072 }
29
31
  })
30
32
 
31
33
  /* declare node input/output format */
@@ -45,92 +47,128 @@ export default class SpeechFlowNodeFile extends SpeechFlowNode {
45
47
 
46
48
  /* open node */
47
49
  async open () {
50
+ /* determine how many bytes we need per chunk when
51
+ the chunk should be of the required duration/size */
52
+ const highWaterMarkAudio = (
53
+ this.config.audioSampleRate *
54
+ (this.config.audioBitDepth / 8)
55
+ ) / (1000 / this.params.chunka)
56
+ const highWaterMarkText = this.params.chunkt
57
+
58
+ /* sanity check */
48
59
  if (this.params.path === "")
49
60
  throw new Error("required parameter \"path\" has to be given")
61
+
62
+ /* dispatch according to mode and path */
50
63
  if (this.params.mode === "rw") {
51
64
  if (this.params.path === "-") {
52
65
  /* standard I/O */
53
66
  if (this.params.type === "audio") {
54
67
  process.stdin.setEncoding()
55
68
  process.stdout.setEncoding()
69
+ const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
70
+ process.stdin.pipe(streamR)
71
+ const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
72
+ streamW.pipe(process.stdout)
73
+ this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
56
74
  }
57
75
  else {
58
76
  process.stdin.setEncoding(this.config.textEncoding)
59
77
  process.stdout.setEncoding(this.config.textEncoding)
78
+ const streamR = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
79
+ process.stdin.pipe(streamR)
80
+ const streamW = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
81
+ streamW.pipe(process.stdout)
82
+ this.stream = Stream.Duplex.from({ readable: streamR, writable: streamW })
60
83
  }
61
- this.stream = Stream.Duplex.from({
62
- readable: process.stdin,
63
- writable: process.stdout
64
- })
65
84
  }
66
85
  else {
67
86
  /* file I/O */
68
87
  if (this.params.type === "audio") {
69
88
  this.stream = Stream.Duplex.from({
70
- readable: fs.createReadStream(this.params.path),
71
- writable: fs.createWriteStream(this.params.path)
89
+ readable: fs.createReadStream(this.params.path,
90
+ { highWaterMark: highWaterMarkAudio }),
91
+ writable: fs.createWriteStream(this.params.path,
92
+ { highWaterMark: highWaterMarkAudio })
72
93
  })
73
94
  }
74
95
  else {
75
96
  this.stream = Stream.Duplex.from({
76
- readable: fs.createReadStream(this.params.path,
77
- { encoding: this.config.textEncoding }),
78
- writable: fs.createWriteStream(this.params.path,
79
- { encoding: this.config.textEncoding })
97
+ readable: fs.createReadStream(this.params.path, {
98
+ highWaterMark: highWaterMarkText,
99
+ encoding: this.config.textEncoding
100
+ }),
101
+ writable: fs.createWriteStream(this.params.path, {
102
+ highWaterMark: highWaterMarkText,
103
+ encoding: this.config.textEncoding
104
+ })
80
105
  })
81
106
  }
82
107
  }
83
108
 
84
109
  /* convert regular stream into object-mode stream */
85
110
  const wrapper1 = utils.createTransformStreamForWritableSide()
86
- const wrapper2 = utils.createTransformStreamForReadableSide(this.params.type, () => this.timeZero)
111
+ const wrapper2 = utils.createTransformStreamForReadableSide(
112
+ this.params.type, () => this.timeZero)
87
113
  this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
88
114
  }
89
115
  else if (this.params.mode === "r") {
90
116
  if (this.params.path === "-") {
91
117
  /* standard I/O */
92
- if (this.params.type === "audio")
118
+ let chunker: Stream.PassThrough
119
+ if (this.params.type === "audio") {
93
120
  process.stdin.setEncoding()
94
- else
121
+ chunker = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
122
+ }
123
+ else {
95
124
  process.stdin.setEncoding(this.config.textEncoding)
96
- this.stream = process.stdin
125
+ chunker = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
126
+ }
127
+ const wrapper = utils.createTransformStreamForReadableSide(
128
+ this.params.type, () => this.timeZero)
129
+ this.stream = Stream.compose(process.stdin, chunker, wrapper)
97
130
  }
98
131
  else {
99
132
  /* file I/O */
133
+ let readable: Stream.Readable
100
134
  if (this.params.type === "audio")
101
- this.stream = fs.createReadStream(this.params.path)
135
+ readable = fs.createReadStream(this.params.path,
136
+ { highWaterMark: highWaterMarkAudio })
102
137
  else
103
- this.stream = fs.createReadStream(this.params.path,
104
- { encoding: this.config.textEncoding })
138
+ readable = fs.createReadStream(this.params.path,
139
+ { highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
140
+ const wrapper = utils.createTransformStreamForReadableSide(
141
+ this.params.type, () => this.timeZero)
142
+ this.stream = Stream.compose(readable, wrapper)
105
143
  }
106
-
107
- /* convert regular stream into object-mode stream */
108
- const wrapper = utils.createTransformStreamForReadableSide(this.params.type, () => this.timeZero)
109
- this.stream.pipe(wrapper)
110
- this.stream = wrapper
111
144
  }
112
145
  else if (this.params.mode === "w") {
113
146
  if (this.params.path === "-") {
114
147
  /* standard I/O */
115
- if (this.params.type === "audio")
148
+ let chunker: Stream.PassThrough
149
+ if (this.params.type === "audio") {
116
150
  process.stdout.setEncoding()
117
- else
151
+ chunker = new Stream.PassThrough({ highWaterMark: highWaterMarkAudio })
152
+ }
153
+ else {
118
154
  process.stdout.setEncoding(this.config.textEncoding)
119
- this.stream = process.stdout
155
+ chunker = new Stream.PassThrough({ highWaterMark: highWaterMarkText })
156
+ }
157
+ const wrapper = utils.createTransformStreamForWritableSide()
158
+ this.stream = Stream.compose(wrapper, chunker, process.stdout)
120
159
  }
121
160
  else {
122
161
  /* file I/O */
162
+ let writable: Stream.Writable
123
163
  if (this.params.type === "audio")
124
- this.stream = fs.createWriteStream(this.params.path)
164
+ writable = fs.createWriteStream(this.params.path,
165
+ { highWaterMark: highWaterMarkAudio })
125
166
  else
126
- this.stream = fs.createWriteStream(this.params.path,
127
- { encoding: this.config.textEncoding })
167
+ writable = fs.createWriteStream(this.params.path,
168
+ { highWaterMark: highWaterMarkText, encoding: this.config.textEncoding })
169
+ const wrapper = utils.createTransformStreamForWritableSide()
170
+ this.stream = Stream.compose(wrapper, writable)
128
171
  }
129
-
130
- /* convert regular stream into object-mode stream */
131
- const wrapper = utils.createTransformStreamForWritableSide()
132
- wrapper.pipe(this.stream as Stream.Writable)
133
- this.stream = wrapper
134
172
  }
135
173
  else
136
174
  throw new Error(`invalid file mode "${this.params.mode}"`)
@@ -5,11 +5,11 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import MQTT from "mqtt"
12
- import UUID from "pure-uuid"
11
+ import MQTT from "mqtt"
12
+ import UUID from "pure-uuid"
13
13
 
14
14
  /* internal dependencies */
15
15
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import ws from "ws"
@@ -7,6 +7,8 @@
7
7
  /* standard dependencies */
8
8
  import Events from "node:events"
9
9
  import Stream from "node:stream"
10
+
11
+ /* external dependencies */
10
12
  import { DateTime, Duration } from "luxon"
11
13
 
12
14
  /* the definition of a single payload chunk passed through the SpeechFlow nodes */
@@ -4,14 +4,17 @@
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
7
- /* external dependencies */
7
+ /* standard dependencies */
8
8
  import Stream from "node:stream"
9
9
  import { EventEmitter } from "node:events"
10
+
11
+ /* external dependencies */
10
12
  import { DateTime, Duration } from "luxon"
11
13
  import CBOR from "cbor2"
14
+ import * as IntervalTree from "node-interval-tree"
12
15
 
13
16
  /* internal dependencies */
14
- import { SpeechFlowChunk } from "./speechflow-node"
17
+ import { SpeechFlowChunk } from "./speechflow-node"
15
18
 
16
19
  /* calculate duration of an audio buffer */
17
20
  export function audioBufferDuration (
@@ -68,6 +71,10 @@ export function createTransformStreamForWritableSide () {
68
71
  transform (chunk: SpeechFlowChunk, encoding, callback) {
69
72
  this.push(chunk.payload)
70
73
  callback()
74
+ },
75
+ final (callback) {
76
+ this.push(null)
77
+ callback()
71
78
  }
72
79
  })
73
80
  }
@@ -90,6 +97,10 @@ export function createTransformStreamForReadableSide (type: "text" | "audio", ge
90
97
  const obj = new SpeechFlowChunk(start, end, "final", type, chunk)
91
98
  this.push(obj)
92
99
  callback()
100
+ },
101
+ final (callback) {
102
+ this.push(null)
103
+ callback()
93
104
  }
94
105
  })
95
106
  }
@@ -252,6 +263,7 @@ export class QueuePointer<T extends QueueElement> extends EventEmitter {
252
263
  private queue: Queue<T>
253
264
  ) {
254
265
  super()
266
+ this.setMaxListeners(100)
255
267
  }
256
268
 
257
269
  /* positioning operations */
@@ -379,6 +391,10 @@ export class QueuePointer<T extends QueueElement> extends EventEmitter {
379
391
  export class Queue<T extends QueueElement> extends EventEmitter {
380
392
  public elements: T[] = []
381
393
  private pointers = new Map<string, QueuePointer<T>>()
394
+ constructor () {
395
+ super()
396
+ this.setMaxListeners(100)
397
+ }
382
398
  pointerUse (name: string): QueuePointer<T> {
383
399
  if (!this.pointers.has(name))
384
400
  this.pointers.set(name, new QueuePointer<T>(name, this))
@@ -405,3 +421,66 @@ export class Queue<T extends QueueElement> extends EventEmitter {
405
421
  }
406
422
  }
407
423
 
424
+ /* utility class for wrapping a custom stream into a regular Transform stream */
425
+ export class StreamWrapper extends Stream.Transform {
426
+ private foreignStream: any
427
+ constructor (foreignStream: any, options: Stream.TransformOptions = {}) {
428
+ options.readableObjectMode = true
429
+ options.writableObjectMode = true
430
+ super(options)
431
+ this.foreignStream = foreignStream
432
+ this.foreignStream.on("data", (chunk: any) => {
433
+ this.push(chunk)
434
+ })
435
+ this.foreignStream.on("error", (err: Error) => {
436
+ this.emit("error", err)
437
+ })
438
+ this.foreignStream.on("end", () => {
439
+ this.push(null)
440
+ })
441
+ }
442
+ _transform (chunk: any, encoding: BufferEncoding, callback: Stream.TransformCallback): void {
443
+ try {
444
+ const canContinue = this.foreignStream.write(chunk)
445
+ if (canContinue)
446
+ callback()
447
+ else
448
+ this.foreignStream.once("drain", callback)
449
+ }
450
+ catch (err) {
451
+ callback(err as Error)
452
+ }
453
+ }
454
+ _flush (callback: Stream.TransformCallback): void {
455
+ try {
456
+ if (typeof this.foreignStream.end === "function")
457
+ this.foreignStream.end()
458
+ callback()
459
+ }
460
+ catch (err) {
461
+ callback(err as Error)
462
+ }
463
+ }
464
+ }
465
+
466
+ /* meta store */
467
+ interface TimeStoreInterval<T> extends IntervalTree.Interval {
468
+ item: T
469
+ }
470
+ export class TimeStore<T> extends EventEmitter {
471
+ private tree = new IntervalTree.IntervalTree<TimeStoreInterval<T>>()
472
+ store (start: Duration, end: Duration, item: T): void {
473
+ this.tree.insert({ low: start.toMillis(), high: end.toMillis(), item })
474
+ }
475
+ fetch (start: Duration, end: Duration): T[] {
476
+ const intervals = this.tree.search(start.toMillis(), end.toMillis())
477
+ return intervals.map((interval) => interval.item)
478
+ }
479
+ prune (_before: Duration): void {
480
+ const before = _before.toMillis()
481
+ const intervals = this.tree.search(0, before - 1)
482
+ for (const interval of intervals)
483
+ if (interval.low < before && interval.high < before)
484
+ this.tree.remove(interval)
485
+ }
486
+ }