speechflow 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/etc/stx.conf +54 -58
  3. package/package.json +25 -106
  4. package/speechflow-cli/etc/stx.conf +77 -0
  5. package/speechflow-cli/package.json +116 -0
  6. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
  7. package/{src → speechflow-cli/src}/speechflow-node-a2a-vad.ts +14 -21
  8. package/{src → speechflow-cli/src}/speechflow-node-a2t-deepgram.ts +21 -38
  9. package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +10 -16
  10. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
  11. package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +5 -1
  12. package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +15 -7
  13. package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
  14. package/{src → speechflow-cli/src}/speechflow.ts +81 -25
  15. package/speechflow-ui-db/etc/eslint.mjs +106 -0
  16. package/speechflow-ui-db/etc/htmllint.json +55 -0
  17. package/speechflow-ui-db/etc/stx.conf +79 -0
  18. package/speechflow-ui-db/etc/stylelint.js +46 -0
  19. package/speechflow-ui-db/etc/stylelint.yaml +33 -0
  20. package/speechflow-ui-db/etc/tsc-client.json +30 -0
  21. package/speechflow-ui-db/etc/tsc.node.json +9 -0
  22. package/speechflow-ui-db/etc/vite-client.mts +63 -0
  23. package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
  24. package/speechflow-ui-db/package.json +75 -0
  25. package/speechflow-ui-db/src/app-icon.ai +1989 -4
  26. package/speechflow-ui-db/src/app-icon.svg +26 -0
  27. package/speechflow-ui-db/src/app.styl +64 -0
  28. package/speechflow-ui-db/src/app.vue +221 -0
  29. package/speechflow-ui-db/src/index.html +23 -0
  30. package/speechflow-ui-db/src/index.ts +26 -0
  31. package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
  32. package/speechflow-ui-db/src/tsconfig.json +3 -0
  33. package/speechflow-ui-st/etc/eslint.mjs +106 -0
  34. package/speechflow-ui-st/etc/htmllint.json +55 -0
  35. package/speechflow-ui-st/etc/stx.conf +79 -0
  36. package/speechflow-ui-st/etc/stylelint.js +46 -0
  37. package/speechflow-ui-st/etc/stylelint.yaml +33 -0
  38. package/speechflow-ui-st/etc/tsc-client.json +30 -0
  39. package/speechflow-ui-st/etc/tsc.node.json +9 -0
  40. package/speechflow-ui-st/etc/vite-client.mts +63 -0
  41. package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
  42. package/speechflow-ui-st/package.json +79 -0
  43. package/speechflow-ui-st/src/app-icon.ai +1989 -4
  44. package/speechflow-ui-st/src/app-icon.svg +26 -0
  45. package/speechflow-ui-st/src/app.styl +64 -0
  46. package/speechflow-ui-st/src/app.vue +142 -0
  47. package/speechflow-ui-st/src/index.html +23 -0
  48. package/speechflow-ui-st/src/index.ts +26 -0
  49. package/speechflow-ui-st/src/lib.d.ts +9 -0
  50. package/speechflow-ui-st/src/tsconfig.json +3 -0
  51. package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
  52. package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
  53. package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
  54. package/dst/speechflow-node-a2a-gender.d.ts +0 -20
  55. package/dst/speechflow-node-a2a-gender.js +0 -349
  56. package/dst/speechflow-node-a2a-gender.js.map +0 -1
  57. package/dst/speechflow-node-a2a-meter.d.ts +0 -14
  58. package/dst/speechflow-node-a2a-meter.js +0 -196
  59. package/dst/speechflow-node-a2a-meter.js.map +0 -1
  60. package/dst/speechflow-node-a2a-mute.d.ts +0 -17
  61. package/dst/speechflow-node-a2a-mute.js +0 -117
  62. package/dst/speechflow-node-a2a-mute.js.map +0 -1
  63. package/dst/speechflow-node-a2a-vad.d.ts +0 -19
  64. package/dst/speechflow-node-a2a-vad.js +0 -383
  65. package/dst/speechflow-node-a2a-vad.js.map +0 -1
  66. package/dst/speechflow-node-a2a-wav.d.ts +0 -11
  67. package/dst/speechflow-node-a2a-wav.js +0 -211
  68. package/dst/speechflow-node-a2a-wav.js.map +0 -1
  69. package/dst/speechflow-node-a2t-deepgram.d.ts +0 -19
  70. package/dst/speechflow-node-a2t-deepgram.js +0 -345
  71. package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
  72. package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -18
  73. package/dst/speechflow-node-t2a-elevenlabs.js +0 -244
  74. package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
  75. package/dst/speechflow-node-t2a-kokoro.d.ts +0 -14
  76. package/dst/speechflow-node-t2a-kokoro.js +0 -155
  77. package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
  78. package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
  79. package/dst/speechflow-node-t2t-deepl.js +0 -146
  80. package/dst/speechflow-node-t2t-deepl.js.map +0 -1
  81. package/dst/speechflow-node-t2t-format.d.ts +0 -11
  82. package/dst/speechflow-node-t2t-format.js +0 -82
  83. package/dst/speechflow-node-t2t-format.js.map +0 -1
  84. package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
  85. package/dst/speechflow-node-t2t-ollama.js +0 -247
  86. package/dst/speechflow-node-t2t-ollama.js.map +0 -1
  87. package/dst/speechflow-node-t2t-openai.d.ts +0 -13
  88. package/dst/speechflow-node-t2t-openai.js +0 -227
  89. package/dst/speechflow-node-t2t-openai.js.map +0 -1
  90. package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
  91. package/dst/speechflow-node-t2t-sentence.js +0 -250
  92. package/dst/speechflow-node-t2t-sentence.js.map +0 -1
  93. package/dst/speechflow-node-t2t-subtitle.d.ts +0 -12
  94. package/dst/speechflow-node-t2t-subtitle.js +0 -166
  95. package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
  96. package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
  97. package/dst/speechflow-node-t2t-transformers.js +0 -265
  98. package/dst/speechflow-node-t2t-transformers.js.map +0 -1
  99. package/dst/speechflow-node-x2x-filter.d.ts +0 -11
  100. package/dst/speechflow-node-x2x-filter.js +0 -117
  101. package/dst/speechflow-node-x2x-filter.js.map +0 -1
  102. package/dst/speechflow-node-x2x-trace.d.ts +0 -11
  103. package/dst/speechflow-node-x2x-trace.js +0 -104
  104. package/dst/speechflow-node-x2x-trace.js.map +0 -1
  105. package/dst/speechflow-node-xio-device.d.ts +0 -13
  106. package/dst/speechflow-node-xio-device.js +0 -230
  107. package/dst/speechflow-node-xio-device.js.map +0 -1
  108. package/dst/speechflow-node-xio-file.d.ts +0 -11
  109. package/dst/speechflow-node-xio-file.js +0 -216
  110. package/dst/speechflow-node-xio-file.js.map +0 -1
  111. package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
  112. package/dst/speechflow-node-xio-mqtt.js +0 -188
  113. package/dst/speechflow-node-xio-mqtt.js.map +0 -1
  114. package/dst/speechflow-node-xio-websocket.d.ts +0 -13
  115. package/dst/speechflow-node-xio-websocket.js +0 -278
  116. package/dst/speechflow-node-xio-websocket.js.map +0 -1
  117. package/dst/speechflow-node.d.ts +0 -63
  118. package/dst/speechflow-node.js +0 -177
  119. package/dst/speechflow-node.js.map +0 -1
  120. package/dst/speechflow-utils.d.ts +0 -74
  121. package/dst/speechflow-utils.js +0 -519
  122. package/dst/speechflow-utils.js.map +0 -1
  123. package/dst/speechflow.js +0 -787
  124. package/dst/speechflow.js.map +0 -1
  125. package/src/speechflow-node-a2a-meter.ts +0 -177
  126. package/src/speechflow-node-t2t-subtitle.ts +0 -149
  127. /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
  128. /package/{etc → speechflow-cli/etc}/eslint.mjs +0 -0
  129. /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
  130. /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
  131. /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
  132. /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
  133. /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
  134. /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
  135. /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
  136. /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
  137. /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
  138. /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
  139. /package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +0 -0
  140. /package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +0 -0
  141. /package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +0 -0
  142. /package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +0 -0
  143. /package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +0 -0
  144. /package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +0 -0
  145. /package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +0 -0
  146. /package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +0 -0
  147. /package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +0 -0
  148. /package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +0 -0
  149. /package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +0 -0
  150. /package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +0 -0
  151. /package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +0 -0
  152. /package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +0 -0
  153. /package/{src → speechflow-cli/src}/speechflow-utils.ts +0 -0
  154. /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
@@ -0,0 +1,217 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { getLUFS, getRMS, AudioData } from "audio-inspect"
12
+
13
+ /* internal dependencies */
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
+ import * as utils from "./speechflow-utils"
16
+
17
+ /* SpeechFlow node for audio metering */
18
+ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
19
+ /* declare official node name */
20
+ public static name = "meter"
21
+
22
+ /* internal state */
23
+ private emitInterval: ReturnType<typeof setInterval> | null = null
24
+ private calcInterval: ReturnType<typeof setInterval> | null = null
25
+ private pendingCalculations = new Set<ReturnType<typeof setTimeout>>()
26
+ private chunkBuffer = new Float32Array(0)
27
+ private destroyed = false
28
+
29
+ /* construct node */
30
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
31
+ super(id, cfg, opts, args)
32
+
33
+ /* declare node configuration parameters */
34
+ this.configure({
35
+ interval: { type: "number", pos: 0, val: 250 },
36
+ dashboard: { type: "string", val: "" }
37
+ })
38
+
39
+ /* declare node input/output format */
40
+ this.input = "audio"
41
+ this.output = "audio"
42
+ }
43
+
44
+ /* open node */
45
+ async open () {
46
+ /* sanity check situation */
47
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
48
+ throw new Error("meter node currently supports PCM-S16LE audio only")
49
+
50
+ /* clear destruction flag */
51
+ this.destroyed = false
52
+
53
+ /* internal state */
54
+ const sampleWindowDuration = 3 /* LUFS-S requires 3s */
55
+ const sampleWindowSize = Math.floor(this.config.audioSampleRate * sampleWindowDuration)
56
+ let sampleWindow = new Float32Array(sampleWindowSize)
57
+ sampleWindow.fill(0, 0, sampleWindowSize)
58
+ let lufss = -60
59
+ let rms = -60
60
+
61
+ /* chunk processing state */
62
+ const chunkDuration = 0.050 /* meter update frequency is about 50ms */
63
+ const samplesPerChunk = Math.floor(this.config.audioSampleRate * chunkDuration)
64
+ this.chunkBuffer = new Float32Array(0)
65
+
66
+ /* define chunk processing function */
67
+ const processChunk = (chunkData: Float32Array) => {
68
+ /* update internal audio sample sliding window */
69
+ const newWindow = new Float32Array(sampleWindowSize)
70
+ const keepSize = sampleWindowSize - chunkData.length
71
+ newWindow.set(sampleWindow.slice(sampleWindow.length - keepSize), 0)
72
+ newWindow.set(chunkData, keepSize)
73
+ sampleWindow = newWindow
74
+
75
+ /* asynchronously calculate the LUFS-S metric */
76
+ const calculator = setTimeout(() => {
77
+ if (this.destroyed)
78
+ return
79
+ try {
80
+ this.pendingCalculations.delete(calculator)
81
+ const audioData = {
82
+ sampleRate: this.config.audioSampleRate,
83
+ numberOfChannels: this.config.audioChannels,
84
+ channelData: [ sampleWindow ],
85
+ duration: sampleWindowDuration,
86
+ length: sampleWindow.length
87
+ } satisfies AudioData
88
+ const lufs = getLUFS(audioData, {
89
+ channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
90
+ calculateShortTerm: true,
91
+ calculateMomentary: false,
92
+ calculateLoudnessRange: false,
93
+ calculateTruePeak: false
94
+ })
95
+ if (!this.destroyed) {
96
+ if (timer !== null) {
97
+ clearTimeout(timer)
98
+ timer = null
99
+ }
100
+ lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
101
+ rms = getRMS(audioData, { asDB: true })
102
+ timer = setTimeout(() => {
103
+ lufss = -60
104
+ rms = -60
105
+ }, 500)
106
+ }
107
+ }
108
+ catch (error) {
109
+ if (!this.destroyed)
110
+ this.log("warning", `meter calculation error: ${error}`)
111
+ }
112
+ }, 0)
113
+ this.pendingCalculations.add(calculator)
114
+ }
115
+
116
+ /* setup chunking interval */
117
+ this.calcInterval = setInterval(() => {
118
+ if (this.destroyed)
119
+ return
120
+
121
+ /* process one single 50ms chunk if available */
122
+ if (this.chunkBuffer.length >= samplesPerChunk) {
123
+ const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
124
+ processChunk(chunkData)
125
+ this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
126
+ }
127
+ }, chunkDuration * 1000)
128
+
129
+ /* setup loudness emitting interval */
130
+ this.emitInterval = setInterval(() => {
131
+ if (this.destroyed)
132
+ return
133
+ this.log("debug", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
134
+ this.sendResponse([ "meter", "LUFS-S", lufss ])
135
+ this.sendResponse([ "meter", "RMS", rms ])
136
+ if (this.params.dashboard !== "")
137
+ this.dashboardInfo("audio", this.params.dashboard, "final", lufss)
138
+ }, this.params.interval)
139
+
140
+ /* provide Duplex stream and internally attach to meter */
141
+ const self = this
142
+ let timer: ReturnType<typeof setTimeout> | null = null
143
+ this.stream = new Stream.Transform({
144
+ writableObjectMode: true,
145
+ readableObjectMode: true,
146
+ decodeStrings: false,
147
+ highWaterMark: 1,
148
+
149
+ /* transform audio chunk */
150
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
151
+ if (self.destroyed) {
152
+ callback(new Error("stream already destroyed"))
153
+ return
154
+ }
155
+ if (!Buffer.isBuffer(chunk.payload))
156
+ callback(new Error("expected audio input as Buffer chunks"))
157
+ else if (chunk.payload.byteLength === 0)
158
+ callback()
159
+ else {
160
+ try {
161
+ /* convert audio samples from PCM/I16 to PCM/F32 */
162
+ const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
163
+
164
+ /* append new data to buffer */
165
+ const combinedLength = self.chunkBuffer.length + data.length
166
+ const newBuffer = new Float32Array(combinedLength)
167
+ newBuffer.set(self.chunkBuffer, 0)
168
+ newBuffer.set(data, self.chunkBuffer.length)
169
+ self.chunkBuffer = newBuffer
170
+
171
+ /* pass-through original audio chunk */
172
+ this.push(chunk)
173
+ callback()
174
+ }
175
+ catch (error) {
176
+ callback(error instanceof Error ? error : new Error("Meter processing failed"))
177
+ }
178
+ }
179
+ },
180
+ final (callback) {
181
+ if (self.destroyed) {
182
+ callback()
183
+ return
184
+ }
185
+ this.push(null)
186
+ callback()
187
+ }
188
+ })
189
+ }
190
+
191
+ /* close node */
192
+ async close () {
193
+ /* indicate destruction */
194
+ this.destroyed = true
195
+
196
+ /* clear all pending calculations */
197
+ for (const timeout of this.pendingCalculations)
198
+ clearTimeout(timeout)
199
+ this.pendingCalculations.clear()
200
+
201
+ /* stop intervals */
202
+ if (this.emitInterval !== null) {
203
+ clearInterval(this.emitInterval)
204
+ this.emitInterval = null
205
+ }
206
+ if (this.calcInterval !== null) {
207
+ clearInterval(this.calcInterval)
208
+ this.calcInterval = null
209
+ }
210
+
211
+ /* close stream */
212
+ if (this.stream !== null) {
213
+ this.stream.destroy()
214
+ this.stream = null
215
+ }
216
+ }
217
+ }
@@ -77,6 +77,14 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
77
77
  const vadSampleRateTarget = 16000 /* internal target of VAD */
78
78
  const vadSamplesPerFrame = 512 /* required for VAD v5 */
79
79
 
80
+ /* helper function for timer cleanup */
81
+ const clearTailTimer = () => {
82
+ if (this.tailTimer !== null) {
83
+ clearTimeout(this.tailTimer)
84
+ this.tailTimer = null
85
+ }
86
+ }
87
+
80
88
  /* establish Voice Activity Detection (VAD) facility */
81
89
  let tail = false
82
90
  try {
@@ -95,10 +103,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
95
103
  this.log("info", "VAD: speech start")
96
104
  if (this.params.mode === "unplugged") {
97
105
  tail = false
98
- if (this.tailTimer !== null) {
99
- clearTimeout(this.tailTimer)
100
- this.tailTimer = null
101
- }
106
+ clearTailTimer()
102
107
  }
103
108
  },
104
109
  onSpeechEnd: (audio) => {
@@ -108,10 +113,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
108
113
  this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
109
114
  if (this.params.mode === "unplugged") {
110
115
  tail = true
111
- if (this.tailTimer !== null) {
112
- clearTimeout(this.tailTimer)
113
- this.tailTimer = null
114
- }
116
+ clearTailTimer()
115
117
  this.tailTimer = setTimeout(() => {
116
118
  if (this.destroyed || this.tailTimer === null)
117
119
  return
@@ -121,14 +123,12 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
121
123
  }
122
124
  },
123
125
  onVADMisfire: () => {
124
- if (this.destroyed) return
126
+ if (this.destroyed)
127
+ return
125
128
  this.log("info", "VAD: speech end (segment too short)")
126
129
  if (this.params.mode === "unplugged") {
127
130
  tail = true
128
- if (this.tailTimer !== null) {
129
- clearTimeout(this.tailTimer)
130
- this.tailTimer = null
131
- }
131
+ clearTailTimer()
132
132
  this.tailTimer = setTimeout(() => {
133
133
  if (this.destroyed || this.tailTimer === null)
134
134
  return
@@ -152,14 +152,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
152
152
 
153
153
  /* annotate the entire audio chunk */
154
154
  if (element.segmentIdx >= element.segmentData.length) {
155
- let isSpeech = false
156
- for (const segment of element.segmentData) {
157
- if (segment.isSpeech) {
158
- isSpeech = true
159
- break
160
- }
161
- }
162
- element.isSpeech = isSpeech
155
+ element.isSpeech = element.segmentData.some(segment => segment.isSpeech)
163
156
  this.queueVAD.touch()
164
157
  this.queueVAD.walk(+1)
165
158
  }
@@ -33,11 +33,12 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
33
33
 
34
34
  /* declare node configuration parameters */
35
35
  this.configure({
36
- key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
37
- keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
38
- model: { type: "string", val: "nova-3", pos: 0 },
39
- version: { type: "string", val: "latest", pos: 1 },
40
- language: { type: "string", val: "multi", pos: 2 }
36
+ key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
37
+ keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
38
+ model: { type: "string", val: "nova-2", pos: 0 },
39
+ version: { type: "string", val: "latest", pos: 1 },
40
+ language: { type: "string", val: "multi", pos: 2 },
41
+ interim: { type: "boolean", val: false, pos: 3 }
41
42
  })
42
43
 
43
44
  /* declare node input/output format */
@@ -96,14 +97,15 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
96
97
  sample_rate: this.config.audioSampleRate,
97
98
  encoding: "linear16",
98
99
  multichannel: false,
99
- endpointing: 10,
100
- interim_results: false,
100
+ endpointing: false,
101
+ interim_results: this.params.interim,
101
102
  smart_format: true,
102
103
  punctuate: true,
103
104
  filler_words: true,
104
- diarize: false,
105
105
  numerals: true,
106
- profanity_filter: false
106
+ diarize: false,
107
+ profanity_filter: false,
108
+ redact: false
107
109
  })
108
110
 
109
111
  /* hook onto Deepgram API events */
@@ -113,6 +115,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
113
115
  const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
114
116
  const words = (data.channel?.alternatives[0]?.words ?? []) as
115
117
  { word: string, punctuated_word?: string, start: number, end: number }[]
118
+ const isFinal = (data.is_final ?? false) as boolean
116
119
  if (text === "")
117
120
  this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
118
121
  else {
@@ -130,10 +133,17 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
130
133
  const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
131
134
  return { word: word.punctuated_word ?? word.word, start, end }
132
135
  }))
133
- const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
136
+ const chunk = new SpeechFlowChunk(start, end,
137
+ isFinal ? "final" : "intermediate", "text", text, meta)
134
138
  this.queue.write(chunk)
135
139
  }
136
140
  })
141
+ this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => {
142
+ this.log("info", "speech started", data)
143
+ })
144
+ this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => {
145
+ this.log("info", "utterance end received", data)
146
+ })
137
147
  this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
138
148
  this.log("info", "metadata received")
139
149
  })
@@ -170,31 +180,6 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
170
180
  /* remember opening time to receive time zero offset */
171
181
  this.timeOpen = DateTime.now()
172
182
 
173
- /* workaround Deepgram initialization problems */
174
- let initDone = false
175
- const initTimeoutStart = () => {
176
- if (initDone || this.destroyed)
177
- return
178
- this.initTimeout = setTimeout(async () => {
179
- if (this.initTimeout === null || this.destroyed)
180
- return
181
- this.initTimeout = null
182
- this.log("warning", "initialization timeout -- restarting service usage")
183
- await this.close()
184
- if (!this.destroyed)
185
- await this.open()
186
- }, 3 * 1000)
187
- }
188
- const initTimeoutStop = () => {
189
- if (initDone)
190
- return
191
- initDone = true
192
- if (this.initTimeout !== null) {
193
- clearTimeout(this.initTimeout)
194
- this.initTimeout = null
195
- }
196
- }
197
-
198
183
  /* provide Duplex stream and internally attach to Deepgram API */
199
184
  const self = this
200
185
  this.stream = new Stream.Duplex({
@@ -214,7 +199,6 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
214
199
  else {
215
200
  if (chunk.payload.byteLength > 0) {
216
201
  self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
217
- initTimeoutStart()
218
202
  if (chunk.meta.size > 0)
219
203
  metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
220
204
  try {
@@ -256,8 +240,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
256
240
  this.push(null)
257
241
  }
258
242
  else {
259
- self.log("info", `received data (${chunk.payload.length} bytes)`)
260
- initTimeoutStop()
243
+ self.log("debug", `received data (${chunk.payload.length} bytes)`)
261
244
  this.push(chunk, self.config.textEncoding)
262
245
  }
263
246
  }).catch((error) => {
@@ -151,22 +151,22 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
151
151
  processTimeout = null
152
152
  callback(new Error("ElevenLabs API timeout"))
153
153
  }, 60 * 1000)
154
+ const clearProcessTimeout = () => {
155
+ if (processTimeout !== null) {
156
+ clearTimeout(processTimeout)
157
+ processTimeout = null
158
+ }
159
+ }
154
160
  try {
155
161
  const stream = await speechStream(chunk.payload as string)
156
162
  if (self.destroyed) {
157
- if (processTimeout !== null) {
158
- clearTimeout(processTimeout)
159
- processTimeout = null
160
- }
163
+ clearProcessTimeout()
161
164
  callback(new Error("stream destroyed during processing"))
162
165
  return
163
166
  }
164
167
  const buffer = await getStreamAsBuffer(stream)
165
168
  if (self.destroyed) {
166
- if (processTimeout !== null) {
167
- clearTimeout(processTimeout)
168
- processTimeout = null
169
- }
169
+ clearProcessTimeout()
170
170
  callback(new Error("stream destroyed during processing"))
171
171
  return
172
172
  }
@@ -175,18 +175,12 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
175
175
  const chunkNew = chunk.clone()
176
176
  chunkNew.type = "audio"
177
177
  chunkNew.payload = bufferResampled
178
- if (processTimeout !== null) {
179
- clearTimeout(processTimeout)
180
- processTimeout = null
181
- }
178
+ clearProcessTimeout()
182
179
  this.push(chunkNew)
183
180
  callback()
184
181
  }
185
182
  catch (error) {
186
- if (processTimeout !== null) {
187
- clearTimeout(processTimeout)
188
- processTimeout = null
189
- }
183
+ clearProcessTimeout()
190
184
  callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
191
185
  }
192
186
  })()