speechflow 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.ase/service.log +10272 -0
  2. package/.claude/CLAUDE.md +1 -0
  3. package/AGENTS.md +1 -1
  4. package/CHANGELOG.md +10 -0
  5. package/README.md +10 -37
  6. package/package.json +6 -6
  7. package/speechflow-cli/dst/speechflow-main-graph.js +14 -5
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +7 -0
  10. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +145 -62
  11. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-node-a2t-google.js +10 -4
  13. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +10 -4
  15. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  16. package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.d.ts → speechflow-node-t2t-proofread.d.ts} +1 -1
  17. package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.js → speechflow-node-t2t-proofread.js} +38 -54
  18. package/speechflow-cli/dst/speechflow-node-t2t-proofread.js.map +1 -0
  19. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +44 -11
  20. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
  22. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
  24. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
  25. package/speechflow-cli/dst/speechflow-node-xio-device.js +4 -1
  26. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-xio-exec.js +4 -1
  28. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-util-llm.d.ts +1 -0
  30. package/speechflow-cli/dst/speechflow-util-llm.js +6 -2
  31. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-util-misc.js +28 -14
  33. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow.js.map +1 -1
  35. package/speechflow-cli/etc/oxlint.jsonc +9 -2
  36. package/speechflow-cli/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
  37. package/speechflow-cli/package.json +30 -30
  38. package/speechflow-cli/src/speechflow-main-graph.ts +14 -5
  39. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +156 -66
  40. package/speechflow-cli/src/speechflow-node-a2t-google.ts +10 -4
  41. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +10 -4
  42. package/speechflow-cli/src/{speechflow-node-t2t-spellcheck.ts → speechflow-node-t2t-proofread.ts} +46 -59
  43. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
  44. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
  45. package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -1
  46. package/speechflow-cli/src/speechflow-node-xio-exec.ts +4 -1
  47. package/speechflow-cli/src/speechflow-util-llm.ts +7 -2
  48. package/speechflow-cli/src/speechflow-util-misc.ts +33 -16
  49. package/speechflow-cli/src/speechflow.ts +1 -0
  50. package/speechflow-ui-db/dst/index.js +19 -16
  51. package/speechflow-ui-db/etc/oxlint.jsonc +0 -1
  52. package/speechflow-ui-db/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
  53. package/speechflow-ui-db/package.json +15 -15
  54. package/speechflow-ui-st/dst/index.js +33 -33
  55. package/speechflow-ui-st/etc/oxlint.jsonc +2 -2
  56. package/speechflow-ui-st/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
  57. package/speechflow-ui-st/package.json +15 -15
  58. package/.claude/settings.local.json +0 -3
  59. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +0 -13
  60. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +0 -219
  61. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +0 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +0 -1
  63. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +0 -200
@@ -23,8 +23,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
23
23
  /* internal state */
24
24
  private dg: Deepgram.LiveClient | null = null
25
25
  private closing = false
26
+ private reconfiguring = false
26
27
  private connectionTimeout: ReturnType<typeof setTimeout> | null = null
27
28
  private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
29
+ private metastore: util.TimeStore<Map<string, any>> | null = null
30
+ private suspended = false
28
31
 
29
32
  /* construct node */
30
33
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -39,7 +42,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
39
42
  language: { type: "string", val: "multi", pos: 2 },
40
43
  interim: { type: "boolean", val: false, pos: 3 },
41
44
  endpointing: { type: "number", val: 0, pos: 4 },
42
- keywords: { type: "string", val: "", pos: 5 }
45
+ keywords: { type: "string", val: "", pos: 5 },
46
+ suspended: { type: "boolean", val: false, pos: 6 }
43
47
  })
44
48
 
45
49
  /* sanity check parameters */
@@ -51,43 +55,55 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
51
55
  this.output = "text"
52
56
  }
53
57
 
54
- /* one-time status of node */
55
- async status () {
56
- let balance = 0
58
+ /* receive external request */
59
+ async receiveRequest (params: any[]) {
60
+ if (this.closing)
61
+ throw new Error("deepgram: node already destroyed")
57
62
  try {
58
- const deepgram = Deepgram.createClient(this.params.keyAdm)
59
- const response = await deepgram.manage.getProjects()
60
- if (response !== null && response.error === null && response.result?.projects) {
61
- for (const project of response.result.projects) {
62
- const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id)
63
- if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances)
64
- balance += balanceResponse.result.balances[0]?.amount ?? 0
65
- }
63
+ if (params.length === 2 && params[0] === "suspended") {
64
+ if (typeof params[1] !== "boolean")
65
+ throw new Error("deepgram: invalid suspended argument in external request")
66
+ const suspended = params[1]
67
+ await this.setSuspended(suspended)
68
+ this.sendResponse([ "deepgram", "suspended", suspended ])
66
69
  }
67
- else if (response !== null && response.error !== null)
68
- this.log("warning", `API error fetching projects: ${response.error}`)
70
+ else
71
+ throw new Error("deepgram: invalid arguments in external request")
69
72
  }
70
73
  catch (error) {
71
- this.log("warning", `failed to fetch balance: ${error}`)
74
+ this.log("error", `receive request error: ${error}`)
75
+ throw error
72
76
  }
73
- return { balance: balance.toFixed(2) }
74
77
  }
75
78
 
76
- /* open node */
77
- async open () {
78
- /* sanity check situation */
79
- if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
80
- throw new Error("Deepgram node currently supports PCM-S16LE audio only")
81
-
82
- /* clear destruction flag */
83
- this.closing = false
84
-
85
- /* create queue for results */
86
- this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
87
-
88
- /* create a store for the meta information */
89
- const metastore = new util.TimeStore<Map<string, any>>()
79
+ /* change suspended flag */
80
+ async setSuspended (suspended: boolean) {
81
+ if (this.closing) {
82
+ this.log("warning", "attempted to set suspended flag on destroyed node")
83
+ return
84
+ }
85
+ if (suspended === this.suspended)
86
+ return
87
+ this.log("info", `switching to ${suspended ? "SUSPENDED" : "UNSUSPENDED"} operation`)
88
+ this.suspended = suspended
89
+ if (suspended) {
90
+ /* going suspended -- tear down Deepgram API connection */
91
+ this.reconfiguring = true
92
+ try {
93
+ await this.closeConnection()
94
+ }
95
+ finally {
96
+ this.reconfiguring = false
97
+ }
98
+ }
99
+ else {
100
+ /* going unsuspended -- re-establish Deepgram API connection */
101
+ await this.openConnection()
102
+ }
103
+ }
90
104
 
105
+ /* open Deepgram API connection */
106
+ private async openConnection () {
91
107
  /* configure Deepgram connection options */
92
108
  const interim = this.params.interim as boolean
93
109
  const endpointing = this.params.endpointing as number
@@ -138,7 +154,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
138
154
 
139
155
  /* hook onto Deepgram API events */
140
156
  this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
141
- if (this.closing || this.queue === null)
157
+ if (this.closing || this.queue === null || this.metastore === null)
142
158
  return
143
159
  const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
144
160
  const words = (data.channel?.alternatives[0]?.words ?? []) as
@@ -154,12 +170,12 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
154
170
  `"${text}"`)
155
171
  const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
156
172
  const end = start.plus({ seconds: data.duration })
157
- const metas = metastore.fetch(start, end)
173
+ const metas = this.metastore.fetch(start, end)
158
174
  const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
159
175
  curr.forEach((val, key) => { prev.set(key, val) })
160
176
  return prev
161
177
  }, new Map<string, any>())
162
- metastore.prune(start)
178
+ this.metastore.prune(start)
163
179
  meta.set("words", words.map((word) => {
164
180
  const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
165
181
  const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
@@ -180,14 +196,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
180
196
  })
181
197
  this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
182
198
  this.log("info", "connection close")
183
- if (!this.closing && this.queue !== null)
184
- this.queue.write(null)
185
- })
186
- this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
187
- this.log("error", `error: ${error.message}`)
188
- if (!this.closing && this.queue !== null)
199
+ /* NOTICE: suppress EOF signalling while reconfiguring (mute toggle),
200
+ since the connection is being torn down deliberately and the
201
+ graph must keep running */
202
+ if (!this.closing && !this.reconfiguring && this.queue !== null)
189
203
  this.queue.write(null)
190
- this.emit("error", error)
191
204
  })
192
205
 
193
206
  /* wait for Deepgram API to be available */
@@ -215,6 +228,90 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
215
228
  })
216
229
  })
217
230
 
231
+ /* NOTICE: register permanent Error handler only AFTER the open
232
+ handshake -- during open, the transient .once above is the sole
233
+ Error listener so the caller's promise rejects without a parallel
234
+ stream emission tearing down the graph prematurely. */
235
+ this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
236
+ this.log("warning", `error: ${error.message}`)
237
+ /* NOTICE: do not write null to the queue here -- a transient error
238
+ must not be misinterpreted as end-of-stream by downstream nodes;
239
+ the subsequent Deepgram Close event will signal real EOF. Also
240
+ do not emit("error") on the node itself, since nothing listens
241
+ for it and it would become an uncaughtException tearing down
242
+ the whole graph. Route via the stream instead, where it is
243
+ downgraded to a warning by the graph supervisor. */
244
+ if (!this.closing && this.stream !== null)
245
+ this.stream.emit("error", error)
246
+ })
247
+ }
248
+
249
+ /* close Deepgram API connection */
250
+ private async closeConnection () {
251
+ /* cleanup pending connection timer */
252
+ if (this.connectionTimeout !== null) {
253
+ clearTimeout(this.connectionTimeout)
254
+ this.connectionTimeout = null
255
+ }
256
+
257
+ /* close Deepgram connection and remove listeners */
258
+ if (this.dg !== null) {
259
+ try {
260
+ this.dg.removeAllListeners()
261
+ this.dg.requestClose()
262
+ this.log("info", "connection closed")
263
+ }
264
+ catch (error) {
265
+ this.log("warning", `error during Deepgram cleanup: ${error}`)
266
+ }
267
+ this.dg = null
268
+ }
269
+ }
270
+
271
+ /* one-time status of node */
272
+ async status () {
273
+ let balance = 0
274
+ try {
275
+ const deepgram = Deepgram.createClient(this.params.keyAdm)
276
+ const response = await deepgram.manage.getProjects()
277
+ if (response !== null && response.error === null && response.result?.projects) {
278
+ for (const project of response.result.projects) {
279
+ const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id)
280
+ if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances)
281
+ balance += balanceResponse.result.balances[0]?.amount ?? 0
282
+ }
283
+ }
284
+ else if (response !== null && response.error !== null)
285
+ this.log("warning", `API error fetching projects: ${response.error}`)
286
+ }
287
+ catch (error) {
288
+ this.log("warning", `failed to fetch balance: ${error}`)
289
+ }
290
+ return { balance: balance.toFixed(2) }
291
+ }
292
+
293
+ /* open node */
294
+ async open () {
295
+ /* sanity check situation */
296
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
297
+ throw new Error("Deepgram node currently supports PCM-S16LE audio only")
298
+
299
+ /* clear destruction flag */
300
+ this.closing = false
301
+
302
+ /* create queue for results */
303
+ this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
304
+
305
+ /* create a store for the meta information */
306
+ this.metastore = new util.TimeStore<Map<string, any>>()
307
+
308
+ /* determine initial suspended state from configuration */
309
+ this.suspended = this.params.suspended as boolean
310
+
311
+ /* establish Deepgram API connection (unless starting suspended) */
312
+ if (!this.suspended)
313
+ await this.openConnection()
314
+
218
315
  /* remember opening time to receive time zero offset */
219
316
  this.timeOpen = DateTime.now()
220
317
 
@@ -227,7 +324,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
227
324
  decodeStrings: false,
228
325
  highWaterMark: 1,
229
326
  write (chunk: SpeechFlowChunk, encoding, callback) {
230
- if (self.closing || self.dg === null) {
327
+ if (self.closing) {
231
328
  callback(new Error("stream already destroyed"))
232
329
  return
233
330
  }
@@ -235,11 +332,14 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
235
332
  callback(new Error("expected audio input chunk"))
236
333
  else if (!Buffer.isBuffer(chunk.payload))
237
334
  callback(new Error("expected Buffer input chunk"))
335
+ else if (self.suspended || self.dg === null)
336
+ /* drop audio entirely -- do not forward to Deepgram */
337
+ callback()
238
338
  else {
239
339
  if (chunk.payload.byteLength > 0) {
240
340
  self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
241
- if (chunk.meta.size > 0)
242
- metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
341
+ if (chunk.meta.size > 0 && self.metastore !== null)
342
+ self.metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
243
343
  try {
244
344
  /* send buffer (and intentionally discard all time information) */
245
345
  self.dg.send(chunk.payload.buffer.slice(
@@ -257,17 +357,19 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
257
357
  },
258
358
  async final (callback) {
259
359
  /* short-circuiting in case of own closing */
260
- if (self.closing || self.dg === null) {
360
+ if (self.closing) {
261
361
  callback()
262
362
  return
263
363
  }
264
364
 
265
365
  /* close Deepgram API */
266
- try {
267
- self.dg.requestClose()
268
- }
269
- catch (error) {
270
- self.log("warning", `error closing Deepgram connection: ${error}`)
366
+ if (self.dg !== null) {
367
+ try {
368
+ self.dg.requestClose()
369
+ }
370
+ catch (error) {
371
+ self.log("warning", `error closing Deepgram connection: ${error}`)
372
+ }
271
373
  }
272
374
 
273
375
  /* await all read operations */
@@ -307,34 +409,22 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
307
409
  /* indicate closing first to stop all async operations */
308
410
  this.closing = true
309
411
 
310
- /* cleanup all timers */
311
- if (this.connectionTimeout !== null) {
312
- clearTimeout(this.connectionTimeout)
313
- this.connectionTimeout = null
314
- }
315
-
316
412
  /* shutdown stream */
317
413
  if (this.stream !== null) {
318
414
  await util.destroyStream(this.stream)
319
415
  this.stream = null
320
416
  }
321
417
 
322
- /* close Deepgram connection and remove listeners */
323
- if (this.dg !== null) {
324
- try {
325
- this.dg.removeAllListeners()
326
- this.dg.requestClose()
327
- }
328
- catch (error) {
329
- this.log("warning", `error during Deepgram cleanup: ${error}`)
330
- }
331
- this.dg = null
332
- }
418
+ /* close Deepgram API connection */
419
+ await this.closeConnection()
333
420
 
334
421
  /* signal EOF to any pending read operations */
335
422
  if (this.queue !== null) {
336
423
  this.queue.write(null)
337
424
  this.queue = null
338
425
  }
426
+
427
+ /* discard meta information store */
428
+ this.metastore = null
339
429
  }
340
430
  }
@@ -190,10 +190,16 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
190
190
  }
191
191
  })
192
192
  this.recognizeStream.on("error", (error: Error) => {
193
- this.log("error", `error: ${error.message}`)
194
- if (!this.closing && this.queue !== null)
195
- this.queue.write(null)
196
- this.emit("error", error)
193
+ this.log("warning", `error: ${error.message}`)
194
+ /* NOTICE: do not write null to the queue here -- a transient error
195
+ must not be misinterpreted as end-of-stream by downstream nodes;
196
+ the subsequent recognize stream end event will signal real EOF.
197
+ Also do not emit("error") on the node itself, since nothing
198
+ listens for it and it would become an uncaughtException tearing
199
+ down the whole graph. Route via the stream instead, where it is
200
+ downgraded to a warning by the graph supervisor. */
201
+ if (!this.closing && this.stream !== null)
202
+ this.stream.emit("error", error)
197
203
  })
198
204
  this.recognizeStream.on("end", () => {
199
205
  this.log("info", "stream ended")
@@ -145,10 +145,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
145
145
  this.queue.write(null)
146
146
  })
147
147
  this.ws.on("error", (err) => {
148
- this.log("error", `WebSocket connection error: ${err}`)
149
- if (!this.closing && this.queue !== null)
150
- this.queue.write(null)
151
- this.emit("error", err)
148
+ this.log("warning", `WebSocket connection error: ${err}`)
149
+ /* NOTICE: do not write null to the queue here -- a transient error
150
+ must not be misinterpreted as end-of-stream by downstream nodes;
151
+ the subsequent WebSocket close event will signal real EOF. Also
152
+ do not emit("error") on the node itself, since nothing listens
153
+ for it and it would become an uncaughtException tearing down
154
+ the whole graph. Route via the stream instead, where it is
155
+ downgraded to a warning by the graph supervisor. */
156
+ if (!this.closing && this.stream !== null)
157
+ this.stream.emit("error", err)
152
158
  })
153
159
 
154
160
  /* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
@@ -13,79 +13,65 @@ import * as util from "./speechflow-util"
13
13
  import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
14
14
 
15
15
  /* internal utility types */
16
- type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
17
- type Config = { [ key: string ]: ConfigEntry }
16
+ type PromptParts = {
17
+ systemPrompt: string,
18
+ chat: LLMCompleteMessage[]
19
+ }
20
+ type Config = { [ key: string ]: PromptParts }
18
21
 
19
- /* SpeechFlow node for LLM-based text-to-text spellchecking */
20
- export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
22
+ /* SpeechFlow node for LLM-based text-to-text proofreading */
23
+ export default class SpeechFlowNodeT2TProofread extends SpeechFlowNode {
21
24
  /* declare official node name */
22
- public static name = "t2t-spellcheck"
25
+ public static name = "t2t-proofread"
23
26
 
24
27
  /* internal state */
25
28
  private llm: LLM | null = null
26
29
 
27
- /* internal LLM setup */
30
+ /* internal LLM setup: prompt parts per language */
28
31
  private setup: Config = {
29
- /* English (EN) spellchecking */
32
+ /* English (EN) proofreading */
30
33
  "en": {
31
34
  systemPrompt:
32
- "You are a proofreader and spellchecker for English.\n" +
35
+ "You are a strict text corrector for English.\n" +
36
+ "Correct spelling, punctuation and grammar.\n" +
33
37
  "Output only the corrected text.\n" +
34
- "Do NOT use markdown.\n" +
35
- "Do NOT give any explanations.\n" +
36
- "Do NOT give any introduction.\n" +
37
- "Do NOT give any comments.\n" +
38
- "Do NOT give any preamble.\n" +
39
- "Do NOT give any prolog.\n" +
40
- "Do NOT give any epilog.\n" +
41
- "Do NOT change the grammar.\n" +
42
- "Do NOT use synonyms for words.\n" +
43
- "Keep all words.\n" +
44
- "Fill in missing commas.\n" +
45
- "Fill in missing points.\n" +
46
- "Fill in missing question marks.\n" +
47
- "Fill in missing hyphens.\n" +
48
- "Focus ONLY on the word spelling.\n" +
49
38
  "The text you have to correct is:\n",
50
39
  chat: [
51
- { role: "user", content: "I luve my wyfe" },
40
+ { role: "user", content: "I luve my wyfe." },
52
41
  { role: "assistant", content: "I love my wife." },
53
42
  { role: "user", content: "The weether is wunderfull!" },
54
43
  { role: "assistant", content: "The weather is wonderful!" },
55
- { role: "user", content: "The life awesome but I'm hungry." },
56
- { role: "assistant", content: "The life is awesome, but I'm hungry." }
44
+ { role: "user", content: "Hello how are you today" },
45
+ { role: "assistant", content: "Hello, how are you today?" },
46
+ { role: "user", content: "I went to the store and bought some milk eggs and bread" },
47
+ { role: "assistant", content: "I went to the store and bought some milk, eggs, and bread." },
48
+ { role: "user", content: "She don't likes apples." },
49
+ { role: "assistant", content: "She doesn't like apples." },
50
+ { role: "user", content: "Yesterday I go to the park." },
51
+ { role: "assistant", content: "Yesterday I went to the park." }
57
52
  ]
58
53
  },
59
54
 
60
- /* German (DE) spellchecking */
55
+ /* German (DE) proofreading */
61
56
  "de": {
62
57
  systemPrompt:
63
- "Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
58
+ "Du bist ein strikter Textkorrektor für Deutsch.\n" +
59
+ "Korrigiere die Rechtschreibung, die Zeichensetzung und die Grammatik.\n" +
64
60
  "Gib nur den korrigierten Text aus.\n" +
65
- "Benutze KEIN Markdown.\n" +
66
- "Gib KEINE Erklärungen.\n" +
67
- "Gib KEINE Einleitung.\n" +
68
- "Gib KEINE Kommentare.\n" +
69
- "Gib KEINE Präambel.\n" +
70
- "Gib KEINEN Prolog.\n" +
71
- "Gib KEINEN Epilog.\n" +
72
- "Ändere NICHT die Grammatik.\n" +
73
- "Verwende KEINE Synonyme für Wörter.\n" +
74
- "Behalte alle Wörter bei.\n" +
75
- "Füge fehlende Kommas ein.\n" +
76
- "Füge fehlende Punkte ein.\n" +
77
- "Füge fehlende Fragezeichen ein.\n" +
78
- "Füge fehlende Bindestriche ein.\n" +
79
- "Füge fehlende Gedankenstriche ein.\n" +
80
- "Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
81
61
  "Der von dir zu korrigierende Text ist:\n",
82
62
  chat: [
83
- { role: "user", content: "Ich ljebe meine Frao" },
63
+ { role: "user", content: "Ich ljebe meine Frao." },
84
64
  { role: "assistant", content: "Ich liebe meine Frau." },
85
65
  { role: "user", content: "Die Wedter ist wunderschoen." },
86
66
  { role: "assistant", content: "Das Wetter ist wunderschön." },
87
- { role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
88
- { role: "assistant", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
67
+ { role: "user", content: "Hallo wie geht es dir heute" },
68
+ { role: "assistant", content: "Hallo, wie geht es dir heute?" },
69
+ { role: "user", content: "Ich bin in den Laden gegangen und habe Milch Eier und Brot gekauft" },
70
+ { role: "assistant", content: "Ich bin in den Laden gegangen und habe Milch, Eier und Brot gekauft." },
71
+ { role: "user", content: "Er gehen nach Hause." },
72
+ { role: "assistant", content: "Er geht nach Hause." },
73
+ { role: "user", content: "Gestern ich gehe in den Park." },
74
+ { role: "assistant", content: "Gestern ging ich in den Park." }
89
75
  ]
90
76
  }
91
77
  }
@@ -96,17 +82,13 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
96
82
 
97
83
  /* declare node configuration parameters */
98
84
  this.configure({
99
- lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
100
- provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
101
- api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
102
- model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
103
- key: { type: "string", val: "", match: /^.*$/ }
85
+ lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
86
+ provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
87
+ api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
88
+ model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
89
+ key: { type: "string", val: "", match: /^.*$/ }
104
90
  })
105
91
 
106
- /* tell effective mode */
107
- this.log("info", `spellchecking language "${this.params.lang}" ` +
108
- `via ${this.params.provider} LLM (model: ${this.params.model})`)
109
-
110
92
  /* declare node input/output format */
111
93
  this.input = "text"
112
94
  this.output = "text"
@@ -114,6 +96,10 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
114
96
 
115
97
  /* open node */
116
98
  async open () {
99
+ /* tell effective mode */
100
+ this.log("info", `proofreading language "${this.params.lang}" ` +
101
+ `via ${this.params.provider} LLM (model: ${this.params.model})`)
102
+
117
103
  /* instantiate LLM */
118
104
  this.llm = new LLM({
119
105
  provider: this.params.provider,
@@ -127,12 +113,13 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
127
113
  })
128
114
  await this.llm.open()
129
115
 
130
- /* provide text-to-text spellchecking */
116
+ /* provide text-to-text proofreading */
131
117
  const llm = this.llm
132
- const spellcheck = async (text: string) => {
118
+ const proofread = async (text: string) => {
133
119
  const cfg = this.setup[this.params.lang]
134
120
  if (!cfg)
135
121
  throw new Error(`unsupported language: ${this.params.lang}`)
122
+ this.log("info", `PROMPT "${cfg.systemPrompt}", chat: ${JSON.stringify(cfg.chat)}`)
136
123
  return llm.complete({
137
124
  system: cfg.systemPrompt,
138
125
  messages: cfg.chat,
@@ -154,7 +141,7 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
154
141
  callback()
155
142
  }
156
143
  else {
157
- spellcheck(chunk.payload).then((payload) => {
144
+ proofread(chunk.payload).then((payload) => {
158
145
  const chunkNew = chunk.clone()
159
146
  chunkNew.payload = payload
160
147
  this.push(chunkNew)
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
83
83
  this.configure({
84
84
  provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
85
85
  api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
86
- model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
86
+ model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
87
87
  key: { type: "string", val: "", match: /^.*$/ },
88
88
  lang: { type: "string", pos: 0, val: "en", match: /^(?:en|de)$/ },
89
89
  size: { type: "number", pos: 1, val: 4, match: (n: number) => n >= 1 && n <= 20 },
@@ -109,7 +109,7 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
109
109
  dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
110
110
  provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
111
111
  api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
112
- model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
112
+ model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
113
113
  key: { type: "string", val: "", match: /^.*$/ }
114
114
  })
115
115
 
@@ -192,7 +192,10 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
192
192
 
193
193
  /* pass-through PortAudio errors */
194
194
  this.io!.on("error", (err) => {
195
- this.emit("error", err)
195
+ /* NOTICE: do not emit("error") on the node itself, since nothing
196
+ listens for it and it would become an uncaughtException tearing
197
+ down the whole graph. Route via the stream instead, where it is
198
+ handled by the graph supervisor. */
196
199
  this.stream?.emit("error", err)
197
200
  })
198
201
 
@@ -95,7 +95,10 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
95
95
  /* handle subprocess errors */
96
96
  this.subprocess.on("error", (err) => {
97
97
  this.log("error", `subprocess error: ${err.message}`)
98
- this.emit("error", err)
98
+ /* NOTICE: do not emit("error") on the node itself, since nothing
99
+ listens for it and it would become an uncaughtException tearing
100
+ down the whole graph. Route via the stream instead, where it is
101
+ handled by the graph supervisor. */
99
102
  if (this.stream !== null && !this.stream.destroyed)
100
103
  this.stream.emit("error", err)
101
104
  })
@@ -31,6 +31,7 @@ export type LLMConfig = {
31
31
  temperature?: number
32
32
  maxTokens?: number
33
33
  cacheDir?: string
34
+ thinking?: boolean
34
35
  }
35
36
  export type LLMCompleteOptions = {
36
37
  system?: string
@@ -64,6 +65,7 @@ export class LLM extends EventEmitter {
64
65
  temperature: 0.7,
65
66
  maxTokens: 1024,
66
67
  cacheDir: "",
68
+ thinking: false,
67
69
  ...config
68
70
  } as Required<LLMConfig>
69
71
 
@@ -255,7 +257,8 @@ export class LLM extends EventEmitter {
255
257
  max_tokens: this.config.maxTokens,
256
258
  temperature: this.config.temperature,
257
259
  system: systemMessage?.content,
258
- messages: chatMessages as Anthropic.MessageParam[]
260
+ messages: chatMessages as Anthropic.MessageParam[],
261
+ ...(!this.config.thinking ? { thinking: { type: "disabled" } } : {})
259
262
  }).catch((err) => {
260
263
  throw new Error(`failed to perform Anthropic chat completion: ${err}`, { cause: err })
261
264
  })
@@ -284,7 +287,8 @@ export class LLM extends EventEmitter {
284
287
  config: {
285
288
  maxOutputTokens: this.config.maxTokens,
286
289
  temperature: this.config.temperature,
287
- ...(systemInstruction ? { systemInstruction } : {})
290
+ ...(systemInstruction ? { systemInstruction } : {}),
291
+ ...(!this.config.thinking ? { thinkingConfig: { thinkingBudget: 0 } } : {})
288
292
  }
289
293
  }).catch((err) => {
290
294
  throw new Error(`failed to perform Google chat completion: ${err}`, { cause: err })
@@ -303,6 +307,7 @@ export class LLM extends EventEmitter {
303
307
  model: this.config.model,
304
308
  messages,
305
309
  keep_alive: "10m",
310
+ think: this.config.thinking,
306
311
  options: {
307
312
  num_predict: this.config.maxTokens,
308
313
  temperature: this.config.temperature