speechflow 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +85 -67
  3. package/dst/speechflow-node-a2a-gender.js +33 -39
  4. package/dst/speechflow-node-a2a-gender.js.map +1 -1
  5. package/dst/speechflow-node-a2a-meter.js +1 -0
  6. package/dst/speechflow-node-a2a-meter.js.map +1 -1
  7. package/dst/speechflow-node-a2a-vad.js +32 -3
  8. package/dst/speechflow-node-a2a-vad.js.map +1 -1
  9. package/dst/speechflow-node-a2a-wav.js +1 -0
  10. package/dst/speechflow-node-a2a-wav.js.map +1 -1
  11. package/dst/speechflow-node-a2t-deepgram.js +1 -0
  12. package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  13. package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
  14. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  15. package/dst/speechflow-node-t2a-kokoro.js +1 -0
  16. package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  17. package/dst/speechflow-node-t2t-deepl.js +1 -0
  18. package/dst/speechflow-node-t2t-deepl.js.map +1 -1
  19. package/dst/speechflow-node-t2t-format.js +1 -0
  20. package/dst/speechflow-node-t2t-format.js.map +1 -1
  21. package/dst/speechflow-node-t2t-ollama.js +1 -0
  22. package/dst/speechflow-node-t2t-ollama.js.map +1 -1
  23. package/dst/speechflow-node-t2t-openai.js +1 -0
  24. package/dst/speechflow-node-t2t-openai.js.map +1 -1
  25. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  26. package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  27. package/dst/speechflow-node-t2t-transformers.js +1 -0
  28. package/dst/speechflow-node-t2t-transformers.js.map +1 -1
  29. package/dst/speechflow-node-x2x-filter.js +3 -2
  30. package/dst/speechflow-node-x2x-filter.js.map +1 -1
  31. package/dst/speechflow-node-x2x-trace.js +3 -2
  32. package/dst/speechflow-node-x2x-trace.js.map +1 -1
  33. package/dst/speechflow-node-xio-device.js +1 -0
  34. package/dst/speechflow-node-xio-device.js.map +1 -1
  35. package/dst/speechflow-node-xio-mqtt.js +1 -0
  36. package/dst/speechflow-node-xio-mqtt.js.map +1 -1
  37. package/dst/speechflow-node-xio-websocket.js +2 -0
  38. package/dst/speechflow-node-xio-websocket.js.map +1 -1
  39. package/dst/speechflow-utils.js +2 -0
  40. package/dst/speechflow-utils.js.map +1 -1
  41. package/dst/speechflow.js +18 -22
  42. package/dst/speechflow.js.map +1 -1
  43. package/etc/speechflow.yaml +28 -31
  44. package/etc/stx.conf +14 -0
  45. package/package.json +7 -7
  46. package/src/lib.d.ts +0 -14
  47. package/src/speechflow-node-a2a-gender.ts +34 -42
  48. package/src/speechflow-node-a2a-meter.ts +1 -0
  49. package/src/speechflow-node-a2a-vad.ts +32 -3
  50. package/src/speechflow-node-a2a-wav.ts +1 -0
  51. package/src/speechflow-node-a2t-deepgram.ts +1 -0
  52. package/src/speechflow-node-t2a-elevenlabs.ts +1 -0
  53. package/src/speechflow-node-t2a-kokoro.ts +1 -0
  54. package/src/speechflow-node-t2t-deepl.ts +1 -0
  55. package/src/speechflow-node-t2t-format.ts +1 -0
  56. package/src/speechflow-node-t2t-ollama.ts +1 -0
  57. package/src/speechflow-node-t2t-openai.ts +1 -0
  58. package/src/speechflow-node-t2t-subtitle.ts +1 -0
  59. package/src/speechflow-node-t2t-transformers.ts +1 -0
  60. package/src/speechflow-node-x2x-filter.ts +3 -2
  61. package/src/speechflow-node-x2x-trace.ts +3 -2
  62. package/src/speechflow-node-xio-device.ts +1 -0
  63. package/src/speechflow-node-xio-mqtt.ts +1 -0
  64. package/src/speechflow-node-xio-websocket.ts +2 -0
  65. package/src/speechflow-utils.ts +2 -0
  66. package/src/speechflow.ts +12 -17
@@ -47,12 +47,13 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
47
47
 
48
48
  /* declare node configuration parameters */
49
49
  this.configure({
50
- mode: { type: "string", val: "unplugged", match: /^(?:silenced|unplugged)$/ },
50
+ mode: { type: "string", val: "silenced", match: /^(?:silenced|unplugged)$/ },
51
51
  posSpeechThreshold: { type: "number", val: 0.50 },
52
52
  negSpeechThreshold: { type: "number", val: 0.35 },
53
53
  minSpeechFrames: { type: "number", val: 2 },
54
54
  redemptionFrames: { type: "number", val: 12 },
55
- preSpeechPadFrames: { type: "number", val: 1 }
55
+ preSpeechPadFrames: { type: "number", val: 1 },
56
+ postSpeechTail: { type: "number", val: 1500 }
56
57
  })
57
58
 
58
59
  /* declare node input/output format */
@@ -74,6 +75,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
74
75
  const vadSamplesPerFrame = 512 /* required for VAD v5 */
75
76
 
76
77
  /* establish Voice Activity Detection (VAD) facility */
78
+ let tail = false
79
+ let tailTimer: ReturnType<typeof setTimeout> | null = null
77
80
  this.vad = await RealTimeVAD.new({
78
81
  model: "v5",
79
82
  sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
@@ -85,13 +88,38 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
85
88
  preSpeechPadFrames: this.params.preSpeechPadFrames,
86
89
  onSpeechStart: () => {
87
90
  log("info", "VAD: speech start")
91
+ if (this.params.mode === "unlugged") {
92
+ tail = false
93
+ if (tailTimer !== null) {
94
+ clearTimeout(tailTimer)
95
+ tailTimer = null
96
+ }
97
+ }
88
98
  },
89
99
  onSpeechEnd: (audio) => {
90
100
  const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
91
101
  log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
102
+ if (this.params.mode === "unlugged") {
103
+ tail = true
104
+ if (tailTimer !== null)
105
+ clearTimeout(tailTimer)
106
+ tailTimer = setTimeout(() => {
107
+ tail = false
108
+ tailTimer = null
109
+ }, this.params.postSpeechTail)
110
+ }
92
111
  },
93
112
  onVADMisfire: () => {
94
113
  log("info", "VAD: speech end (segment too short)")
114
+ if (this.params.mode === "unlugged") {
115
+ tail = true
116
+ if (tailTimer !== null)
117
+ clearTimeout(tailTimer)
118
+ tailTimer = setTimeout(() => {
119
+ tail = false
120
+ tailTimer = null
121
+ }, this.params.postSpeechTail)
122
+ }
95
123
  },
96
124
  onFrameProcessed: (audio) => {
97
125
  /* annotate the current audio segment */
@@ -99,7 +127,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
99
127
  if (element === undefined || element.type !== "audio-frame")
100
128
  throw new Error("internal error which cannot happen: no more queued element")
101
129
  const segment = element.segmentData[element.segmentIdx++]
102
- segment.isSpeech = (audio.isSpeech > audio.notSpeech)
130
+ segment.isSpeech = (audio.isSpeech > audio.notSpeech) || tail
103
131
 
104
132
  /* annotate the entire audio chunk */
105
133
  if (element.segmentIdx >= element.segmentData.length) {
@@ -124,6 +152,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
124
152
  writableObjectMode: true,
125
153
  readableObjectMode: true,
126
154
  decodeStrings: false,
155
+ highWaterMark: 1,
127
156
 
128
157
  /* receive audio chunk (writable side of stream) */
129
158
  write (chunk: SpeechFlowChunk, encoding, callback) {
@@ -101,6 +101,7 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
101
101
  readableObjectMode: true,
102
102
  writableObjectMode: true,
103
103
  decodeStrings: false,
104
+ highWaterMark: 1,
104
105
  transform (chunk: SpeechFlowChunk, encoding, callback) {
105
106
  if (!Buffer.isBuffer(chunk.payload))
106
107
  callback(new Error("invalid chunk payload type"))
@@ -180,6 +180,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
180
180
  writableObjectMode: true,
181
181
  readableObjectMode: true,
182
182
  decodeStrings: false,
183
+ highWaterMark: 1,
183
184
  write (chunk: SpeechFlowChunk, encoding, callback) {
184
185
  if (chunk.type !== "audio")
185
186
  callback(new Error("expected audio input chunk"))
@@ -128,6 +128,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
128
128
  writableObjectMode: true,
129
129
  readableObjectMode: true,
130
130
  decodeStrings: false,
131
+ highWaterMark: 1,
131
132
  transform (chunk: SpeechFlowChunk, encoding, callback) {
132
133
  if (Buffer.isBuffer(chunk.payload))
133
134
  callback(new Error("invalid chunk payload type"))
@@ -121,6 +121,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
121
121
  writableObjectMode: true,
122
122
  readableObjectMode: true,
123
123
  decodeStrings: false,
124
+ highWaterMark: 1,
124
125
  transform (chunk: SpeechFlowChunk, encoding, callback) {
125
126
  if (Buffer.isBuffer(chunk.payload))
126
127
  callback(new Error("invalid chunk payload type"))
@@ -74,6 +74,7 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
74
74
  readableObjectMode: true,
75
75
  writableObjectMode: true,
76
76
  decodeStrings: false,
77
+ highWaterMark: 1,
77
78
  transform (chunk: SpeechFlowChunk, encoding, callback) {
78
79
  if (Buffer.isBuffer(chunk.payload))
79
80
  callback(new Error("invalid chunk payload type"))
@@ -46,6 +46,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
46
46
  readableObjectMode: true,
47
47
  writableObjectMode: true,
48
48
  decodeStrings: false,
49
+ highWaterMark: 1,
49
50
  transform (chunk: SpeechFlowChunk, encoding, callback) {
50
51
  if (Buffer.isBuffer(chunk.payload))
51
52
  callback(new Error("invalid chunk payload type"))
@@ -222,6 +222,7 @@ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
222
222
  readableObjectMode: true,
223
223
  writableObjectMode: true,
224
224
  decodeStrings: false,
225
+ highWaterMark: 1,
225
226
  transform (chunk: SpeechFlowChunk, encoding, callback) {
226
227
  if (Buffer.isBuffer(chunk.payload))
227
228
  callback(new Error("invalid chunk payload type"))
@@ -203,6 +203,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
203
203
  readableObjectMode: true,
204
204
  writableObjectMode: true,
205
205
  decodeStrings: false,
206
+ highWaterMark: 1,
206
207
  transform (chunk: SpeechFlowChunk, encoding, callback) {
207
208
  if (Buffer.isBuffer(chunk.payload))
208
209
  callback(new Error("invalid chunk payload type"))
@@ -63,6 +63,7 @@ export default class SpeechFlowNodeSubtitle extends SpeechFlowNode {
63
63
  readableObjectMode: true,
64
64
  writableObjectMode: true,
65
65
  decodeStrings: false,
66
+ highWaterMark: 1,
66
67
  transform (chunk: SpeechFlowChunk, encoding, callback) {
67
68
  if (Buffer.isBuffer(chunk.payload))
68
69
  callback(new Error("invalid chunk payload type"))
@@ -200,6 +200,7 @@ export default class SpeechFlowNodeTransformers extends SpeechFlowNode {
200
200
  readableObjectMode: true,
201
201
  writableObjectMode: true,
202
202
  decodeStrings: false,
203
+ highWaterMark: 1,
203
204
  transform (chunk: SpeechFlowChunk, encoding, callback) {
204
205
  if (Buffer.isBuffer(chunk.payload))
205
206
  callback(new Error("invalid chunk payload type"))
@@ -86,12 +86,13 @@ export default class SpeechFlowNodeFilter extends SpeechFlowNode {
86
86
  writableObjectMode: true,
87
87
  readableObjectMode: true,
88
88
  decodeStrings: false,
89
+ highWaterMark: 1,
89
90
  transform (chunk: SpeechFlowChunk, encoding, callback) {
90
91
  let val1: any
91
92
  const val2: any = self.params.val
92
93
  const m = self.params.var.match(/^meta:(.+)$/)
93
94
  if (m !== null)
94
- val1 = chunk.meta.get(m[1])
95
+ val1 = chunk.meta.get(m[1]) ?? ""
95
96
  else if (self.params.key === "payload:length")
96
97
  val1 = chunk.payload.length
97
98
  else if (self.params.key === "payload:text")
@@ -100,7 +101,7 @@ export default class SpeechFlowNodeFilter extends SpeechFlowNode {
100
101
  val1 = chunk.timestampStart.toMillis()
101
102
  else if (self.params.key === "time:end")
102
103
  val1 = chunk.timestampEnd.toMillis()
103
- if (comparison(val1, self.params.ops, val2))
104
+ if (comparison(val1, self.params.op, val2))
104
105
  this.push(chunk)
105
106
  callback()
106
107
  },
@@ -49,6 +49,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
49
49
  writableObjectMode: true,
50
50
  readableObjectMode: true,
51
51
  decodeStrings: false,
52
+ highWaterMark: 1,
52
53
  transform (chunk: SpeechFlowChunk, encoding, callback) {
53
54
  let error: Error | undefined
54
55
  const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
@@ -74,12 +75,12 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
74
75
  }
75
76
  else {
76
77
  if (type === "text")
77
- log("debug", `${type} chunk: type=${chunk.type}` +
78
+ log("debug", `${type} chunk: type=${chunk.type} ` +
78
79
  `kind=${chunk.kind} ` +
79
80
  `start=${fmtTime(chunk.timestampStart)} ` +
80
81
  `end=${fmtTime(chunk.timestampEnd)} ` +
81
82
  `payload-type=String payload-length=${chunk.payload.length} ` +
82
- `payload-encoding=${encoding} payload-content="${chunk.payload.toString()}" ` +
83
+ `payload-content="${chunk.payload.toString()}" ` +
83
84
  `meta=${fmtMeta(chunk.meta)}`)
84
85
  else
85
86
  error = new Error(`${type} chunk: seen String instead of Buffer chunk type`)
@@ -199,6 +199,7 @@ export default class SpeechFlowNodeDevice extends SpeechFlowNode {
199
199
  async close () {
200
200
  /* shutdown PortAudio */
201
201
  if (this.io !== null) {
202
+ this.io.abort()
202
203
  this.io.quit()
203
204
  this.io = null
204
205
  }
@@ -111,6 +111,7 @@ export default class SpeechFlowNodeMQTT extends SpeechFlowNode {
111
111
  writableObjectMode: true,
112
112
  readableObjectMode: true,
113
113
  decodeStrings: false,
114
+ highWaterMark: 1,
114
115
  write (chunk: SpeechFlowChunk, encoding, callback) {
115
116
  if (mode === "r")
116
117
  callback(new Error("write operation on read-only node"))
@@ -114,6 +114,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
114
114
  writableObjectMode: true,
115
115
  readableObjectMode: true,
116
116
  decodeStrings: false,
117
+ highWaterMark: 1,
117
118
  write (chunk: SpeechFlowChunk, encoding, callback) {
118
119
  if (mode === "r")
119
120
  callback(new Error("write operation on read-only node"))
@@ -195,6 +196,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
195
196
  writableObjectMode: true,
196
197
  readableObjectMode: true,
197
198
  decodeStrings: false,
199
+ highWaterMark: 1,
198
200
  write (chunk: SpeechFlowChunk, encoding, callback) {
199
201
  if (mode === "r")
200
202
  callback(new Error("write operation on read-only node"))
@@ -68,6 +68,7 @@ export function createTransformStreamForWritableSide () {
68
68
  readableObjectMode: true,
69
69
  writableObjectMode: true,
70
70
  decodeStrings: false,
71
+ highWaterMark: 1,
71
72
  transform (chunk: SpeechFlowChunk, encoding, callback) {
72
73
  this.push(chunk.payload)
73
74
  callback()
@@ -86,6 +87,7 @@ export function createTransformStreamForReadableSide (type: "text" | "audio", ge
86
87
  readableObjectMode: true,
87
88
  writableObjectMode: true,
88
89
  decodeStrings: false,
90
+ highWaterMark: (type === "audio" ? 19200 : 65536),
89
91
  transform (chunk: Buffer | string, encoding, callback) {
90
92
  const timeZero = getTimeZero()
91
93
  const start = DateTime.now().diff(timeZero)
package/src/speechflow.ts CHANGED
@@ -61,11 +61,11 @@ type wsPeerInfo = {
61
61
  "Usage: $0 " +
62
62
  "[-h|--help] " +
63
63
  "[-V|--version] " +
64
+ "[-S|--status] " +
64
65
  "[-v|--verbose <level>] " +
65
66
  "[-a|--address <ip-address>] " +
66
67
  "[-p|--port <tcp-port>] " +
67
68
  "[-C|--cache <directory>] " +
68
- "[-S|--status] " +
69
69
  "[-e|--expression <expression>] " +
70
70
  "[-f|--file <file>] " +
71
71
  "[-c|--config <id>@<yaml-config-file>] " +
@@ -80,6 +80,14 @@ type wsPeerInfo = {
80
80
  default: false,
81
81
  describe: "show program version information"
82
82
  })
83
+ .option("S", {
84
+ alias: "status",
85
+ type: "boolean",
86
+ array: false,
87
+ coerce,
88
+ default: false,
89
+ describe: "show one-time status of nodes"
90
+ })
83
91
  .option("v", {
84
92
  alias: "log-level",
85
93
  type: "string",
@@ -116,14 +124,6 @@ type wsPeerInfo = {
116
124
  default: path.join(dataDir, "cache"),
117
125
  describe: "directory for cached files (primarily AI model files)"
118
126
  })
119
- .option("S", {
120
- alias: "status",
121
- type: "boolean",
122
- array: false,
123
- coerce,
124
- default: false,
125
- describe: "show one-time status of nodes"
126
- })
127
127
  .option("e", {
128
128
  alias: "expression",
129
129
  type: "string",
@@ -423,6 +423,7 @@ type wsPeerInfo = {
423
423
  }
424
424
 
425
425
  /* graph processing: PASS 3: open nodes */
426
+ const timeZero = DateTime.now()
426
427
  for (const node of graphNodes) {
427
428
  /* connect node events */
428
429
  node.on("log", (level: string, msg: string, data?: any) => {
@@ -434,20 +435,14 @@ type wsPeerInfo = {
434
435
 
435
436
  /* open node */
436
437
  cli!.log("info", `open node <${node.id}>`)
438
+ node.setTimeZero(timeZero)
437
439
  await node.open().catch((err: Error) => {
438
440
  cli!.log("error", `[${node.id}]: ${err.message}`)
439
441
  throw new Error(`failed to open node <${node.id}>`)
440
442
  })
441
443
  }
442
444
 
443
- /* graph processing: PASS 4: set time zero in all nodes */
444
- const timeZero = DateTime.now()
445
- for (const node of graphNodes) {
446
- cli!.log("info", `set time zero in node <${node.id}>`)
447
- node.setTimeZero(timeZero)
448
- }
449
-
450
- /* graph processing: PASS 5: connect node streams */
445
+ /* graph processing: PASS 4: connect node streams */
451
446
  for (const node of graphNodes) {
452
447
  if (node.stream === null)
453
448
  throw new Error(`stream of node <${node.id}> still not initialized`)