speechflow 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +37 -3
  3. package/dst/speechflow-node-a2a-gender.d.ts +17 -0
  4. package/dst/speechflow-node-a2a-gender.js +272 -0
  5. package/dst/speechflow-node-a2a-gender.js.map +1 -0
  6. package/dst/speechflow-node-a2a-meter.js +2 -2
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  8. package/dst/speechflow-node-a2a-mute.js +1 -0
  9. package/dst/speechflow-node-a2a-mute.js.map +1 -1
  10. package/dst/speechflow-node-a2a-vad.js +47 -63
  11. package/dst/speechflow-node-a2a-vad.js.map +1 -1
  12. package/dst/speechflow-node-a2a-wav.js +145 -122
  13. package/dst/speechflow-node-a2a-wav.js.map +1 -1
  14. package/dst/speechflow-node-a2t-deepgram.js +13 -3
  15. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  16. package/dst/speechflow-node-t2a-elevenlabs.js +10 -5
  17. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  18. package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  19. package/dst/speechflow-node-t2t-deepl.js.map +1 -1
  20. package/dst/speechflow-node-t2t-format.js.map +1 -1
  21. package/dst/speechflow-node-t2t-ollama.js.map +1 -1
  22. package/dst/speechflow-node-t2t-openai.js.map +1 -1
  23. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  24. package/dst/speechflow-node-t2t-transformers.js.map +1 -1
  25. package/dst/speechflow-node-x2x-filter.d.ts +11 -0
  26. package/dst/speechflow-node-x2x-filter.js +113 -0
  27. package/dst/speechflow-node-x2x-filter.js.map +1 -0
  28. package/dst/speechflow-node-x2x-trace.js +24 -10
  29. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  30. package/dst/speechflow-node-xio-device.js +14 -5
  31. package/dst/speechflow-node-xio-device.js.map +1 -1
  32. package/dst/speechflow-node-xio-file.js +58 -27
  33. package/dst/speechflow-node-xio-file.js.map +1 -1
  34. package/dst/speechflow-node-xio-mqtt.js.map +1 -1
  35. package/dst/speechflow-node-xio-websocket.js.map +1 -1
  36. package/dst/speechflow-node.js +1 -0
  37. package/dst/speechflow-node.js.map +1 -1
  38. package/dst/speechflow-utils.d.ts +14 -1
  39. package/dst/speechflow-utils.js +110 -2
  40. package/dst/speechflow-utils.js.map +1 -1
  41. package/dst/speechflow.js +56 -53
  42. package/dst/speechflow.js.map +1 -1
  43. package/etc/speechflow.yaml +51 -24
  44. package/package.json +6 -5
  45. package/src/speechflow-node-a2a-gender.ts +272 -0
  46. package/src/speechflow-node-a2a-meter.ts +3 -3
  47. package/src/speechflow-node-a2a-mute.ts +1 -0
  48. package/src/speechflow-node-a2a-vad.ts +58 -68
  49. package/src/speechflow-node-a2a-wav.ts +128 -91
  50. package/src/speechflow-node-a2t-deepgram.ts +15 -4
  51. package/src/speechflow-node-t2a-elevenlabs.ts +13 -8
  52. package/src/speechflow-node-t2a-kokoro.ts +3 -3
  53. package/src/speechflow-node-t2t-deepl.ts +2 -2
  54. package/src/speechflow-node-t2t-format.ts +2 -2
  55. package/src/speechflow-node-t2t-ollama.ts +2 -2
  56. package/src/speechflow-node-t2t-openai.ts +2 -2
  57. package/src/speechflow-node-t2t-subtitle.ts +1 -1
  58. package/src/speechflow-node-t2t-transformers.ts +2 -2
  59. package/src/speechflow-node-x2x-filter.ts +122 -0
  60. package/src/speechflow-node-x2x-trace.ts +28 -11
  61. package/src/speechflow-node-xio-device.ts +20 -8
  62. package/src/speechflow-node-xio-file.ts +74 -36
  63. package/src/speechflow-node-xio-mqtt.ts +3 -3
  64. package/src/speechflow-node-xio-websocket.ts +1 -1
  65. package/src/speechflow-node.ts +2 -0
  66. package/src/speechflow-utils.ts +81 -2
  67. package/src/speechflow.ts +84 -81
@@ -7,52 +7,69 @@
7
7
  /* standard dependencies */
8
8
  import Stream from "node:stream"
9
9
 
10
- /* external dependencies */
11
- import wav from "wav"
12
-
13
10
  /* internal dependencies */
14
- import SpeechFlowNode from "./speechflow-node"
15
- import * as utils from "./speechflow-utils"
16
-
17
- /* utility class for wrapping a custom stream into a regular Transform stream */
18
- class StreamWrapper extends Stream.Transform {
19
- private foreignStream: any
20
- constructor (foreignStream: any, options: Stream.TransformOptions = {}) {
21
- options.readableObjectMode = true
22
- options.writableObjectMode = true
23
- super(options)
24
- this.foreignStream = foreignStream
25
- this.foreignStream.on("data", (chunk: any) => {
26
- this.push(chunk)
27
- })
28
- this.foreignStream.on("error", (err: Error) => {
29
- this.emit("error", err)
30
- })
31
- this.foreignStream.on("end", () => {
32
- this.push(null)
33
- })
34
- }
35
- _transform (chunk: any, encoding: BufferEncoding, callback: Stream.TransformCallback): void {
36
- try {
37
- const canContinue = this.foreignStream.write(chunk)
38
- if (canContinue)
39
- callback()
40
- else
41
- this.foreignStream.once("drain", callback)
42
- }
43
- catch (err) {
44
- callback(err as Error)
45
- }
46
- }
47
- _flush (callback: Stream.TransformCallback): void {
48
- try {
49
- if (typeof this.foreignStream.end === "function")
50
- this.foreignStream.end()
51
- callback()
52
- }
53
- catch (err) {
54
- callback(err as Error)
55
- }
11
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+
13
+ /* write WAV header */
14
+ const writeWavHeader = (
15
+ length: number,
16
+ options?: { audioFormat?: number, channels?: number, sampleRate?: number, bitDepth?: number }
17
+ ) => {
18
+ const audioFormat = options?.audioFormat ?? 0x001 /* PCM */
19
+ const channels = options?.channels ?? 1 /* mono */
20
+ const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
21
+ const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
22
+
23
+ const headerLength = 44
24
+ const dataLength = length || (4294967295 - 100)
25
+ const fileSize = dataLength + headerLength
26
+ const header = Buffer.alloc(headerLength)
27
+
28
+ const RIFF = Buffer.alloc(4, "RIFF")
29
+ const WAVE = Buffer.alloc(4, "WAVE")
30
+ const fmt = Buffer.alloc(4, "fmt ")
31
+ const data = Buffer.alloc(4, "data")
32
+ const byteRate = (sampleRate * channels * bitDepth) / 8
33
+ const blockAlign = (channels * bitDepth) / 8
34
+
35
+ let offset = 0
36
+ RIFF.copy(header, offset); offset += RIFF.length
37
+ header.writeUInt32LE(fileSize - 8, offset); offset += 4
38
+ WAVE.copy(header, offset); offset += WAVE.length
39
+ fmt.copy(header, offset); offset += fmt.length
40
+ header.writeUInt32LE(16, offset); offset += 4
41
+ header.writeUInt16LE(audioFormat, offset); offset += 2
42
+ header.writeUInt16LE(channels, offset); offset += 2
43
+ header.writeUInt32LE(sampleRate, offset); offset += 4
44
+ header.writeUInt32LE(byteRate, offset); offset += 4
45
+ header.writeUInt16LE(blockAlign, offset); offset += 2
46
+ header.writeUInt16LE(bitDepth, offset); offset += 2
47
+ data.copy(header, offset); offset += data.length
48
+ header.writeUInt32LE(dataLength, offset); offset += 4
49
+
50
+ return header
51
+ }
52
+
53
+ /* read WAV header */
54
+ const readWavHeader = (buffer: Buffer) => {
55
+ let offset = 0
56
+ const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
57
+ const fileSize = buffer.readUInt32LE(offset); offset += 4
58
+ const waveHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
59
+ const fmtHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
60
+ const formatLength = buffer.readUInt32LE(offset); offset += 4
61
+ const audioFormat = buffer.readUInt16LE(offset); offset += 2
62
+ const channels = buffer.readUInt16LE(offset); offset += 2
63
+ const sampleRate = buffer.readUInt32LE(offset); offset += 4
64
+ const byteRate = buffer.readUInt32LE(offset); offset += 4
65
+ const blockAlign = buffer.readUInt16LE(offset); offset += 2
66
+ const bitDepth = buffer.readUInt16LE(offset); offset += 2
67
+ const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
68
+ const dataLength = buffer.readUInt32LE(offset); offset += 4
69
+
70
+ return {
71
+ riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
72
+ channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
56
73
  }
57
74
  }
58
75
 
@@ -77,52 +94,72 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
77
94
 
78
95
  /* open node */
79
96
  async open () {
80
- if (this.params.mode === "encode") {
81
- /* convert raw/PCM to WAV/PCM */
82
- /* NOTICE: as this is a continuous stream, the resulting WAV header is not 100%
83
- conforming to the WAV standard, as it has to use a zero duration information.
84
- This cannot be changed in a stream-based processing. */
85
- const writer = new wav.Writer({
86
- format: 0x0001 /* PCM */,
87
- channels: this.config.audioChannels,
88
- sampleRate: this.config.audioSampleRate,
89
- bitDepth: this.config.audioBitDepth
90
- })
91
- this.stream = new StreamWrapper(writer)
92
- }
93
- else if (this.params.mode === "decode") {
94
- /* convert WAV/PCM to raw/PCM */
95
- const reader = new wav.Reader()
96
- reader.on("format", (format: any) => {
97
- this.log("info", `WAV audio stream: format=${format.audioFormat === 0x0001 ? "PCM" :
98
- "0x" + (format.audioFormat as number).toString(16).padStart(4, "0")} ` +
99
- `bitDepth=${format.bitDepth} ` +
100
- `signed=${format.signed ? "yes" : "no"} ` +
101
- `endian=${format.endianness} ` +
102
- `sampleRate=${format.sampleRate} ` +
103
- `channels=${format.channels}`)
104
- if (format.audioFormat !== 0x0001 /* PCM */)
105
- throw new Error("WAV not based on PCM format")
106
- if (format.bitDepth !== 16)
107
- throw new Error("WAV not based on 16 bit samples")
108
- if (!format.signed)
109
- throw new Error("WAV not based on signed integers")
110
- if (format.endianness !== "LE")
111
- throw new Error("WAV not based on little endianness")
112
- if (format.sampleRate !== 48000)
113
- throw new Error("WAV not based on 48Khz sample rate")
114
- if (format.channels !== 1)
115
- throw new Error("WAV not based on mono channel")
116
- })
117
- this.stream = new StreamWrapper(reader)
118
- }
119
- else
120
- throw new Error(`invalid operation mode "${this.params.mode}"`)
121
-
122
- /* convert regular stream into object-mode stream */
123
- const wrapper1 = utils.createTransformStreamForWritableSide()
124
- const wrapper2 = utils.createTransformStreamForReadableSide("audio", () => this.timeZero)
125
- this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
97
+ /* establish a transform stream */
98
+ const self = this
99
+ let firstChunk = true
100
+ this.stream = new Stream.Transform({
101
+ readableObjectMode: true,
102
+ writableObjectMode: true,
103
+ decodeStrings: false,
104
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
105
+ if (!Buffer.isBuffer(chunk.payload))
106
+ callback(new Error("invalid chunk payload type"))
107
+ else if (firstChunk) {
108
+ if (self.params.mode === "encode") {
109
+ /* convert raw/PCM to WAV/PCM
110
+ (NOTICE: as this is a continuous stream, the
111
+ resulting WAV header is not 100% conforming
112
+ to the WAV standard, as it has to use a zero
113
+ duration information. This cannot be changed in
114
+ a stream-based processing.) */
115
+ const headerBuffer = writeWavHeader(0, {
116
+ audioFormat: 0x0001 /* PCM */,
117
+ channels: self.config.audioChannels,
118
+ sampleRate: self.config.audioSampleRate,
119
+ bitDepth: self.config.audioBitDepth
120
+ })
121
+ const headerChunk = chunk.clone()
122
+ headerChunk.payload = headerBuffer
123
+ this.push(headerChunk)
124
+ this.push(chunk)
125
+ callback()
126
+ }
127
+ else if (self.params.mode === "decode") {
128
+ /* convert WAV/PCM to raw/PCM */
129
+ const header = readWavHeader(chunk.payload)
130
+ self.log("info", "WAV audio stream: " +
131
+ `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
132
+ "0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
133
+ `channels=${header.channels} ` +
134
+ `sampleRate=${header.sampleRate} ` +
135
+ `bitDepth=${header.bitDepth}`)
136
+ if (header.audioFormat !== 0x0001 /* PCM */)
137
+ throw new Error("WAV not based on PCM format")
138
+ if (header.bitDepth !== 16)
139
+ throw new Error("WAV not based on 16 bit samples")
140
+ if (header.sampleRate !== 48000)
141
+ throw new Error("WAV not based on 48Khz sample rate")
142
+ if (header.channels !== 1)
143
+ throw new Error("WAV not based on mono channel")
144
+ chunk.payload = chunk.payload.subarray(44)
145
+ this.push(chunk)
146
+ callback()
147
+ }
148
+ else
149
+ throw new Error(`invalid operation mode "${self.params.mode}"`)
150
+ }
151
+ else {
152
+ /* pass-through original chunk */
153
+ this.push(chunk)
154
+ callback()
155
+ }
156
+ firstChunk = false
157
+ },
158
+ final (callback) {
159
+ this.push(null)
160
+ callback()
161
+ }
162
+ })
126
163
  }
127
164
 
128
165
  /* close node */
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import * as Deepgram from "@deepgram/sdk"
@@ -65,6 +65,9 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
65
65
  /* create queue for results */
66
66
  const queue = new utils.SingleQueue<SpeechFlowChunk>()
67
67
 
68
+ /* create a store for the meta information */
69
+ const metastore = new utils.TimeStore<Map<string, any>>()
70
+
68
71
  /* connect to Deepgram API */
69
72
  const deepgram = Deepgram.createClient(this.params.key)
70
73
  let language = "en"
@@ -86,21 +89,27 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
86
89
  smart_format: true,
87
90
  punctuate: true,
88
91
  filler_words: true,
89
- diarize: true, /* still not used by us */
92
+ diarize: false,
90
93
  numerals: true,
91
94
  profanity_filter: false
92
95
  })
93
96
 
94
97
  /* hook onto Deepgram API events */
95
98
  this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
96
- const text = (data.channel?.alternatives[0].transcript as string) ?? ""
99
+ const text = (data.channel?.alternatives[0]?.transcript as string) ?? ""
97
100
  if (text === "")
98
101
  this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`)
99
102
  else {
100
103
  this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`)
101
104
  const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
102
105
  const end = start.plus({ seconds: data.duration })
103
- const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
106
+ const metas = metastore.fetch(start, end)
107
+ const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
108
+ curr.forEach((val, key) => { prev.set(key, val) })
109
+ return prev
110
+ }, new Map<string, any>())
111
+ metastore.prune(start)
112
+ const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
104
113
  queue.write(chunk)
105
114
  }
106
115
  })
@@ -180,6 +189,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
180
189
  if (chunk.payload.byteLength > 0) {
181
190
  log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`)
182
191
  initTimeoutStart()
192
+ if (chunk.meta.size > 0)
193
+ metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
183
194
  dg.send(chunk.payload.buffer) /* intentionally discard all time information */
184
195
  }
185
196
  callback()
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
@@ -30,11 +30,13 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
30
30
 
31
31
  /* declare node configuration parameters */
32
32
  this.configure({
33
- key: { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
34
- voice: { type: "string", val: "Brian", pos: 0, match: /^(?:.+)$/ },
35
- language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
36
- speed: { type: "number", val: 1.05, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
37
- optimize: { type: "string", val: "latency", pos: 3, match: /^(?:latency|quality)$/ }
33
+ key: { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
34
+ voice: { type: "string", val: "Brian", pos: 0, match: /^(?:Brittney|Cassidy|Leonie|Mark|Brian)$/ },
35
+ language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
36
+ speed: { type: "number", val: 1.00, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
37
+ stability: { type: "number", val: 0.5, pos: 3, match: (n: number) => n >= 0.0 && n <= 1.0 },
38
+ similarity: { type: "number", val: 0.75, pos: 4, match: (n: number) => n >= 0.0 && n <= 1.0 },
39
+ optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
38
40
  })
39
41
 
40
42
  /* declare node input/output format */
@@ -90,7 +92,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
90
92
 
91
93
  /* perform text-to-speech operation with Elevenlabs API */
92
94
  const model = this.params.optimize === "quality" ?
93
- "eleven_multilingual_v2" :
95
+ "eleven_turbo_v2_5" :
94
96
  "eleven_flash_v2_5"
95
97
  const speechStream = (text: string) => {
96
98
  this.log("info", `ElevenLabs: send text "${text}"`)
@@ -101,7 +103,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
101
103
  outputFormat: `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
102
104
  seed: 815, /* arbitrary, but fixated by us */
103
105
  voiceSettings: {
104
- speed: this.params.speed
106
+ speed: this.params.speed,
107
+ stability: this.params.stability,
108
+ similarityBoost: this.params.similarity
105
109
  }
106
110
  }, {
107
111
  timeoutInSeconds: 30,
@@ -128,6 +132,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
128
132
  if (Buffer.isBuffer(chunk.payload))
129
133
  callback(new Error("invalid chunk payload type"))
130
134
  else {
135
+ log("info", `ElevenLabs: send text: ${JSON.stringify(chunk.payload)}`)
131
136
  speechStream(chunk.payload).then((stream) => {
132
137
  getStreamAsBuffer(stream).then((buffer) => {
133
138
  const bufferResampled = resampler.processChunk(buffer)
@@ -5,11 +5,11 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import { KokoroTTS } from "kokoro-js"
12
- import SpeexResampler from "speex-resampler"
11
+ import { KokoroTTS } from "kokoro-js"
12
+ import SpeexResampler from "speex-resampler"
13
13
 
14
14
  /* internal dependencies */
15
15
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,10 +5,10 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import * as DeepL from "deepl-node"
11
+ import * as DeepL from "deepl-node"
12
12
 
13
13
  /* internal dependencies */
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,10 +5,10 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import wrapText from "wrap-text"
11
+ import wrapText from "wrap-text"
12
12
 
13
13
  /* internal dependencies */
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,10 +5,10 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import { Ollama } from "ollama"
11
+ import { Ollama } from "ollama"
12
12
 
13
13
  /* internal dependencies */
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,10 +5,10 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import OpenAI from "openai"
11
+ import OpenAI from "openai"
12
12
 
13
13
  /* internal dependencies */
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* internal dependencies */
11
11
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,8 +5,8 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import path from "node:path"
9
- import Stream from "node:stream"
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
10
 
11
11
  /* external dependencies */
12
12
  import * as Transformers from "@huggingface/transformers"
@@ -0,0 +1,122 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* internal dependencies */
11
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+
13
+ /* SpeechFlow node for data flow filtering (based on meta information) */
14
+ export default class SpeechFlowNodeFilter extends SpeechFlowNode {
15
+ /* declare official node name */
16
+ public static name = "filter"
17
+
18
+ /* construct node */
19
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
20
+ super(id, cfg, opts, args)
21
+
22
+ /* declare node configuration parameters */
23
+ this.configure({
24
+ type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
25
+ var: { type: "string", pos: 1, val: "", match: /^(?:meta:.+|payload:(?:length|text)|time:(?:start|end))$/ },
26
+ op: { type: "string", pos: 2, val: "==", match: /^(?:<|<=|==|!=|~~|!~|>=|>)$/ },
27
+ val: { type: "string", pos: 3, val: "", match: /^.*$/ }
28
+ })
29
+
30
+ /* declare node input/output format */
31
+ this.input = this.params.type
32
+ this.output = this.params.type
33
+ }
34
+
35
+ /* open node */
36
+ async open () {
37
+ /* helper function for comparing two values */
38
+ const comparison = (val1: any, op: string, val2: any) => {
39
+ if (op === "==" || op === "!=") {
40
+ /* equal comparison */
41
+ const str1 = (typeof val1 === "string" ? val1 : val1.toString()) as string
42
+ const str2 = (typeof val2 === "string" ? val2 : val2.toString()) as string
43
+ return (op === "==" ? (str1 === str2) : (str1 !== str2))
44
+ }
45
+ else if (op === "~~" || op === "!~") {
46
+ /* regular expression comparison */
47
+ const str = (typeof val1 === "string" ? val1 : val1.toString()) as string
48
+ const regexp = (
49
+ val2 instanceof RegExp ?
50
+ val2 :
51
+ typeof val2 === "string" ?
52
+ new RegExp(val2) :
53
+ new RegExp(val2.toString()))
54
+ return (op === "~~" ? regexp.test(str) : !regexp.test(str))
55
+ }
56
+ else {
57
+ /* non-equal comparison */
58
+ const coerceNum = (val: any) => {
59
+ return typeof val === "number" ? val : (
60
+ typeof val === "string" && val.match(/^[\d+-]+$/) ? parseInt(val) : (
61
+ typeof val === "string" && val.match(/^[\d.+-]+$/) ?
62
+ parseFloat(val) :
63
+ Number(val)
64
+ )
65
+ )
66
+ }
67
+ const num1 = coerceNum(val1)
68
+ const num2 = coerceNum(val2)
69
+ return (
70
+ op === "<" ?
71
+ (num1 < num2) :
72
+ op === "<=" ?
73
+ (num1 <= num2) :
74
+ op === ">=" ?
75
+ (num1 >= num2) :
76
+ op === ">" ?
77
+ (num1 > num2) :
78
+ false
79
+ )
80
+ }
81
+ }
82
+
83
+ /* provide Transform stream */
84
+ const self = this
85
+ this.stream = new Stream.Transform({
86
+ writableObjectMode: true,
87
+ readableObjectMode: true,
88
+ decodeStrings: false,
89
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
90
+ let val1: any
91
+ const val2: any = self.params.val
92
+ const m = self.params.var.match(/^meta:(.+)$/)
93
+ if (m !== null)
94
+ val1 = chunk.meta.get(m[1])
95
+ else if (self.params.key === "payload:length")
96
+ val1 = chunk.payload.length
97
+ else if (self.params.key === "payload:text")
98
+ val1 = (self.params.type === "text" ? chunk.payload as string : "")
99
+ else if (self.params.key === "time:start")
100
+ val1 = chunk.timestampStart.toMillis()
101
+ else if (self.params.key === "time:end")
102
+ val1 = chunk.timestampEnd.toMillis()
103
+ if (comparison(val1, self.params.ops, val2))
104
+ this.push(chunk)
105
+ callback()
106
+ },
107
+ final (callback) {
108
+ this.push(null)
109
+ callback()
110
+ }
111
+ })
112
+ }
113
+
114
+ /* close node */
115
+ async close () {
116
+ /* close stream */
117
+ if (this.stream !== null) {
118
+ this.stream.destroy()
119
+ this.stream = null
120
+ }
121
+ }
122
+ }
@@ -5,7 +5,9 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
9
11
  import { Duration } from "luxon"
10
12
 
11
13
  /* internal dependencies */
@@ -41,7 +43,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
41
43
  this.log(level, msg)
42
44
  }
43
45
 
44
- /* provide Duplex stream and internally attach to Deepgram API */
46
+ /* provide Transform stream */
45
47
  const type = this.params.type
46
48
  this.stream = new Stream.Transform({
47
49
  writableObjectMode: true,
@@ -49,23 +51,38 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
49
51
  decodeStrings: false,
50
52
  transform (chunk: SpeechFlowChunk, encoding, callback) {
51
53
  let error: Error | undefined
52
- const fmt = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
54
+ const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
55
+ const fmtMeta = (meta: Map<string, any>) => {
56
+ if (meta.size === 0)
57
+ return "none"
58
+ else
59
+ return `{ ${Array.from(meta.entries())
60
+ .map(([ k, v ]) => `${k}: ${JSON.stringify(v)}`)
61
+ .join(", ")
62
+ } }`
63
+ }
53
64
  if (Buffer.isBuffer(chunk.payload)) {
54
65
  if (type === "audio")
55
- log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
56
- `end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type} ` +
57
- `payload-type=Buffer payload-bytes=${chunk.payload.byteLength}`)
66
+ log("debug", `chunk: type=${chunk.type} ` +
67
+ `kind=${chunk.kind} ` +
68
+ `start=${fmtTime(chunk.timestampStart)} ` +
69
+ `end=${fmtTime(chunk.timestampEnd)} ` +
70
+ `payload-type=Buffer payload-length=${chunk.payload.byteLength} ` +
71
+ `meta=${fmtMeta(chunk.meta)}`)
58
72
  else
59
- error = new Error(`writing ${type} chunk: seen Buffer instead of String chunk type`)
73
+ error = new Error(`${type} chunk: seen Buffer instead of String chunk type`)
60
74
  }
61
75
  else {
62
76
  if (type === "text")
63
- log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
64
- `end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type}` +
77
+ log("debug", `${type} chunk: type=${chunk.type}` +
78
+ `kind=${chunk.kind} ` +
79
+ `start=${fmtTime(chunk.timestampStart)} ` +
80
+ `end=${fmtTime(chunk.timestampEnd)} ` +
65
81
  `payload-type=String payload-length=${chunk.payload.length} ` +
66
- `payload-encoding=${encoding} payload-content="${chunk.payload.toString()}"`)
82
+ `payload-encoding=${encoding} payload-content="${chunk.payload.toString()}" ` +
83
+ `meta=${fmtMeta(chunk.meta)}`)
67
84
  else
68
- error = new Error(`writing ${type} chunk: seen String instead of Buffer chunk type`)
85
+ error = new Error(`${type} chunk: seen String instead of Buffer chunk type`)
69
86
  }
70
87
  if (error !== undefined)
71
88
  callback(error)