speechflow 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ase/service.log +10272 -0
- package/.claude/CLAUDE.md +1 -0
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +10 -0
- package/README.md +10 -37
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-graph.js +14 -5
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +7 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +145 -62
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +10 -4
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +10 -4
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.d.ts → speechflow-node-t2t-proofread.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.js → speechflow-node-t2t-proofread.js} +38 -54
- package/speechflow-cli/dst/speechflow-node-t2t-proofread.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +44 -11
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +4 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +4 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +6 -2
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.js +28 -14
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +9 -2
- package/speechflow-cli/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
- package/speechflow-cli/package.json +30 -30
- package/speechflow-cli/src/speechflow-main-graph.ts +14 -5
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +156 -66
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +10 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +10 -4
- package/speechflow-cli/src/{speechflow-node-t2t-spellcheck.ts → speechflow-node-t2t-proofread.ts} +46 -59
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -1
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +4 -1
- package/speechflow-cli/src/speechflow-util-llm.ts +7 -2
- package/speechflow-cli/src/speechflow-util-misc.ts +33 -16
- package/speechflow-cli/src/speechflow.ts +1 -0
- package/speechflow-ui-db/dst/index.js +19 -16
- package/speechflow-ui-db/etc/oxlint.jsonc +0 -1
- package/speechflow-ui-db/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
- package/speechflow-ui-db/package.json +15 -15
- package/speechflow-ui-st/dst/index.js +33 -33
- package/speechflow-ui-st/etc/oxlint.jsonc +2 -2
- package/speechflow-ui-st/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.59.3.patch} +1 -1
- package/speechflow-ui-st/package.json +15 -15
- package/.claude/settings.local.json +0 -3
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +0 -13
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +0 -219
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +0 -200
|
@@ -23,8 +23,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private dg: Deepgram.LiveClient | null = null
|
|
25
25
|
private closing = false
|
|
26
|
+
private reconfiguring = false
|
|
26
27
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
28
|
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private metastore: util.TimeStore<Map<string, any>> | null = null
|
|
30
|
+
private suspended = false
|
|
28
31
|
|
|
29
32
|
/* construct node */
|
|
30
33
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -39,7 +42,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
39
42
|
language: { type: "string", val: "multi", pos: 2 },
|
|
40
43
|
interim: { type: "boolean", val: false, pos: 3 },
|
|
41
44
|
endpointing: { type: "number", val: 0, pos: 4 },
|
|
42
|
-
keywords: { type: "string", val: "", pos: 5 }
|
|
45
|
+
keywords: { type: "string", val: "", pos: 5 },
|
|
46
|
+
suspended: { type: "boolean", val: false, pos: 6 }
|
|
43
47
|
})
|
|
44
48
|
|
|
45
49
|
/* sanity check parameters */
|
|
@@ -51,43 +55,55 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
51
55
|
this.output = "text"
|
|
52
56
|
}
|
|
53
57
|
|
|
54
|
-
/*
|
|
55
|
-
async
|
|
56
|
-
|
|
58
|
+
/* receive external request */
|
|
59
|
+
async receiveRequest (params: any[]) {
|
|
60
|
+
if (this.closing)
|
|
61
|
+
throw new Error("deepgram: node already destroyed")
|
|
57
62
|
try {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
balance += balanceResponse.result.balances[0]?.amount ?? 0
|
|
65
|
-
}
|
|
63
|
+
if (params.length === 2 && params[0] === "suspended") {
|
|
64
|
+
if (typeof params[1] !== "boolean")
|
|
65
|
+
throw new Error("deepgram: invalid suspended argument in external request")
|
|
66
|
+
const suspended = params[1]
|
|
67
|
+
await this.setSuspended(suspended)
|
|
68
|
+
this.sendResponse([ "deepgram", "suspended", suspended ])
|
|
66
69
|
}
|
|
67
|
-
else
|
|
68
|
-
|
|
70
|
+
else
|
|
71
|
+
throw new Error("deepgram: invalid arguments in external request")
|
|
69
72
|
}
|
|
70
73
|
catch (error) {
|
|
71
|
-
this.log("
|
|
74
|
+
this.log("error", `receive request error: ${error}`)
|
|
75
|
+
throw error
|
|
72
76
|
}
|
|
73
|
-
return { balance: balance.toFixed(2) }
|
|
74
77
|
}
|
|
75
78
|
|
|
76
|
-
/*
|
|
77
|
-
async
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
79
|
+
/* change suspended flag */
|
|
80
|
+
async setSuspended (suspended: boolean) {
|
|
81
|
+
if (this.closing) {
|
|
82
|
+
this.log("warning", "attempted to set suspended flag on destroyed node")
|
|
83
|
+
return
|
|
84
|
+
}
|
|
85
|
+
if (suspended === this.suspended)
|
|
86
|
+
return
|
|
87
|
+
this.log("info", `switching to ${suspended ? "SUSPENDED" : "UNSUSPENDED"} operation`)
|
|
88
|
+
this.suspended = suspended
|
|
89
|
+
if (suspended) {
|
|
90
|
+
/* going suspended -- tear down Deepgram API connection */
|
|
91
|
+
this.reconfiguring = true
|
|
92
|
+
try {
|
|
93
|
+
await this.closeConnection()
|
|
94
|
+
}
|
|
95
|
+
finally {
|
|
96
|
+
this.reconfiguring = false
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
/* going unsuspended -- re-establish Deepgram API connection */
|
|
101
|
+
await this.openConnection()
|
|
102
|
+
}
|
|
103
|
+
}
|
|
90
104
|
|
|
105
|
+
/* open Deepgram API connection */
|
|
106
|
+
private async openConnection () {
|
|
91
107
|
/* configure Deepgram connection options */
|
|
92
108
|
const interim = this.params.interim as boolean
|
|
93
109
|
const endpointing = this.params.endpointing as number
|
|
@@ -138,7 +154,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
138
154
|
|
|
139
155
|
/* hook onto Deepgram API events */
|
|
140
156
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
141
|
-
if (this.closing || this.queue === null)
|
|
157
|
+
if (this.closing || this.queue === null || this.metastore === null)
|
|
142
158
|
return
|
|
143
159
|
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
144
160
|
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
@@ -154,12 +170,12 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
154
170
|
`"${text}"`)
|
|
155
171
|
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
156
172
|
const end = start.plus({ seconds: data.duration })
|
|
157
|
-
const metas = metastore.fetch(start, end)
|
|
173
|
+
const metas = this.metastore.fetch(start, end)
|
|
158
174
|
const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
159
175
|
curr.forEach((val, key) => { prev.set(key, val) })
|
|
160
176
|
return prev
|
|
161
177
|
}, new Map<string, any>())
|
|
162
|
-
metastore.prune(start)
|
|
178
|
+
this.metastore.prune(start)
|
|
163
179
|
meta.set("words", words.map((word) => {
|
|
164
180
|
const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
|
|
165
181
|
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
@@ -180,14 +196,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
180
196
|
})
|
|
181
197
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
182
198
|
this.log("info", "connection close")
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
this.log("error", `error: ${error.message}`)
|
|
188
|
-
if (!this.closing && this.queue !== null)
|
|
199
|
+
/* NOTICE: suppress EOF signalling while reconfiguring (mute toggle),
|
|
200
|
+
since the connection is being torn down deliberately and the
|
|
201
|
+
graph must keep running */
|
|
202
|
+
if (!this.closing && !this.reconfiguring && this.queue !== null)
|
|
189
203
|
this.queue.write(null)
|
|
190
|
-
this.emit("error", error)
|
|
191
204
|
})
|
|
192
205
|
|
|
193
206
|
/* wait for Deepgram API to be available */
|
|
@@ -215,6 +228,90 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
215
228
|
})
|
|
216
229
|
})
|
|
217
230
|
|
|
231
|
+
/* NOTICE: register permanent Error handler only AFTER the open
|
|
232
|
+
handshake -- during open, the transient .once above is the sole
|
|
233
|
+
Error listener so the caller's promise rejects without a parallel
|
|
234
|
+
stream emission tearing down the graph prematurely. */
|
|
235
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
236
|
+
this.log("warning", `error: ${error.message}`)
|
|
237
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
238
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
239
|
+
the subsequent Deepgram Close event will signal real EOF. Also
|
|
240
|
+
do not emit("error") on the node itself, since nothing listens
|
|
241
|
+
for it and it would become an uncaughtException tearing down
|
|
242
|
+
the whole graph. Route via the stream instead, where it is
|
|
243
|
+
downgraded to a warning by the graph supervisor. */
|
|
244
|
+
if (!this.closing && this.stream !== null)
|
|
245
|
+
this.stream.emit("error", error)
|
|
246
|
+
})
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* close Deepgram API connection */
|
|
250
|
+
private async closeConnection () {
|
|
251
|
+
/* cleanup pending connection timer */
|
|
252
|
+
if (this.connectionTimeout !== null) {
|
|
253
|
+
clearTimeout(this.connectionTimeout)
|
|
254
|
+
this.connectionTimeout = null
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/* close Deepgram connection and remove listeners */
|
|
258
|
+
if (this.dg !== null) {
|
|
259
|
+
try {
|
|
260
|
+
this.dg.removeAllListeners()
|
|
261
|
+
this.dg.requestClose()
|
|
262
|
+
this.log("info", "connection closed")
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
266
|
+
}
|
|
267
|
+
this.dg = null
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/* one-time status of node */
|
|
272
|
+
async status () {
|
|
273
|
+
let balance = 0
|
|
274
|
+
try {
|
|
275
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
276
|
+
const response = await deepgram.manage.getProjects()
|
|
277
|
+
if (response !== null && response.error === null && response.result?.projects) {
|
|
278
|
+
for (const project of response.result.projects) {
|
|
279
|
+
const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id)
|
|
280
|
+
if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances)
|
|
281
|
+
balance += balanceResponse.result.balances[0]?.amount ?? 0
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
else if (response !== null && response.error !== null)
|
|
285
|
+
this.log("warning", `API error fetching projects: ${response.error}`)
|
|
286
|
+
}
|
|
287
|
+
catch (error) {
|
|
288
|
+
this.log("warning", `failed to fetch balance: ${error}`)
|
|
289
|
+
}
|
|
290
|
+
return { balance: balance.toFixed(2) }
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/* open node */
|
|
294
|
+
async open () {
|
|
295
|
+
/* sanity check situation */
|
|
296
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
297
|
+
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
298
|
+
|
|
299
|
+
/* clear destruction flag */
|
|
300
|
+
this.closing = false
|
|
301
|
+
|
|
302
|
+
/* create queue for results */
|
|
303
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
304
|
+
|
|
305
|
+
/* create a store for the meta information */
|
|
306
|
+
this.metastore = new util.TimeStore<Map<string, any>>()
|
|
307
|
+
|
|
308
|
+
/* determine initial suspended state from configuration */
|
|
309
|
+
this.suspended = this.params.suspended as boolean
|
|
310
|
+
|
|
311
|
+
/* establish Deepgram API connection (unless starting suspended) */
|
|
312
|
+
if (!this.suspended)
|
|
313
|
+
await this.openConnection()
|
|
314
|
+
|
|
218
315
|
/* remember opening time to receive time zero offset */
|
|
219
316
|
this.timeOpen = DateTime.now()
|
|
220
317
|
|
|
@@ -227,7 +324,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
227
324
|
decodeStrings: false,
|
|
228
325
|
highWaterMark: 1,
|
|
229
326
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
230
|
-
if (self.closing
|
|
327
|
+
if (self.closing) {
|
|
231
328
|
callback(new Error("stream already destroyed"))
|
|
232
329
|
return
|
|
233
330
|
}
|
|
@@ -235,11 +332,14 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
235
332
|
callback(new Error("expected audio input chunk"))
|
|
236
333
|
else if (!Buffer.isBuffer(chunk.payload))
|
|
237
334
|
callback(new Error("expected Buffer input chunk"))
|
|
335
|
+
else if (self.suspended || self.dg === null)
|
|
336
|
+
/* drop audio entirely -- do not forward to Deepgram */
|
|
337
|
+
callback()
|
|
238
338
|
else {
|
|
239
339
|
if (chunk.payload.byteLength > 0) {
|
|
240
340
|
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
241
|
-
if (chunk.meta.size > 0)
|
|
242
|
-
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
341
|
+
if (chunk.meta.size > 0 && self.metastore !== null)
|
|
342
|
+
self.metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
243
343
|
try {
|
|
244
344
|
/* send buffer (and intentionally discard all time information) */
|
|
245
345
|
self.dg.send(chunk.payload.buffer.slice(
|
|
@@ -257,17 +357,19 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
257
357
|
},
|
|
258
358
|
async final (callback) {
|
|
259
359
|
/* short-circuiting in case of own closing */
|
|
260
|
-
if (self.closing
|
|
360
|
+
if (self.closing) {
|
|
261
361
|
callback()
|
|
262
362
|
return
|
|
263
363
|
}
|
|
264
364
|
|
|
265
365
|
/* close Deepgram API */
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
366
|
+
if (self.dg !== null) {
|
|
367
|
+
try {
|
|
368
|
+
self.dg.requestClose()
|
|
369
|
+
}
|
|
370
|
+
catch (error) {
|
|
371
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
372
|
+
}
|
|
271
373
|
}
|
|
272
374
|
|
|
273
375
|
/* await all read operations */
|
|
@@ -307,34 +409,22 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
307
409
|
/* indicate closing first to stop all async operations */
|
|
308
410
|
this.closing = true
|
|
309
411
|
|
|
310
|
-
/* cleanup all timers */
|
|
311
|
-
if (this.connectionTimeout !== null) {
|
|
312
|
-
clearTimeout(this.connectionTimeout)
|
|
313
|
-
this.connectionTimeout = null
|
|
314
|
-
}
|
|
315
|
-
|
|
316
412
|
/* shutdown stream */
|
|
317
413
|
if (this.stream !== null) {
|
|
318
414
|
await util.destroyStream(this.stream)
|
|
319
415
|
this.stream = null
|
|
320
416
|
}
|
|
321
417
|
|
|
322
|
-
/* close Deepgram connection
|
|
323
|
-
|
|
324
|
-
try {
|
|
325
|
-
this.dg.removeAllListeners()
|
|
326
|
-
this.dg.requestClose()
|
|
327
|
-
}
|
|
328
|
-
catch (error) {
|
|
329
|
-
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
330
|
-
}
|
|
331
|
-
this.dg = null
|
|
332
|
-
}
|
|
418
|
+
/* close Deepgram API connection */
|
|
419
|
+
await this.closeConnection()
|
|
333
420
|
|
|
334
421
|
/* signal EOF to any pending read operations */
|
|
335
422
|
if (this.queue !== null) {
|
|
336
423
|
this.queue.write(null)
|
|
337
424
|
this.queue = null
|
|
338
425
|
}
|
|
426
|
+
|
|
427
|
+
/* discard meta information store */
|
|
428
|
+
this.metastore = null
|
|
339
429
|
}
|
|
340
430
|
}
|
|
@@ -190,10 +190,16 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
190
190
|
}
|
|
191
191
|
})
|
|
192
192
|
this.recognizeStream.on("error", (error: Error) => {
|
|
193
|
-
this.log("
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
193
|
+
this.log("warning", `error: ${error.message}`)
|
|
194
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
195
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
196
|
+
the subsequent recognize stream end event will signal real EOF.
|
|
197
|
+
Also do not emit("error") on the node itself, since nothing
|
|
198
|
+
listens for it and it would become an uncaughtException tearing
|
|
199
|
+
down the whole graph. Route via the stream instead, where it is
|
|
200
|
+
downgraded to a warning by the graph supervisor. */
|
|
201
|
+
if (!this.closing && this.stream !== null)
|
|
202
|
+
this.stream.emit("error", error)
|
|
197
203
|
})
|
|
198
204
|
this.recognizeStream.on("end", () => {
|
|
199
205
|
this.log("info", "stream ended")
|
|
@@ -145,10 +145,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
145
145
|
this.queue.write(null)
|
|
146
146
|
})
|
|
147
147
|
this.ws.on("error", (err) => {
|
|
148
|
-
this.log("
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
148
|
+
this.log("warning", `WebSocket connection error: ${err}`)
|
|
149
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
150
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
151
|
+
the subsequent WebSocket close event will signal real EOF. Also
|
|
152
|
+
do not emit("error") on the node itself, since nothing listens
|
|
153
|
+
for it and it would become an uncaughtException tearing down
|
|
154
|
+
the whole graph. Route via the stream instead, where it is
|
|
155
|
+
downgraded to a warning by the graph supervisor. */
|
|
156
|
+
if (!this.closing && this.stream !== null)
|
|
157
|
+
this.stream.emit("error", err)
|
|
152
158
|
})
|
|
153
159
|
|
|
154
160
|
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
package/speechflow-cli/src/{speechflow-node-t2t-spellcheck.ts → speechflow-node-t2t-proofread.ts}
RENAMED
|
@@ -13,79 +13,65 @@ import * as util from "./speechflow-util"
|
|
|
13
13
|
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
14
14
|
|
|
15
15
|
/* internal utility types */
|
|
16
|
-
type
|
|
17
|
-
|
|
16
|
+
type PromptParts = {
|
|
17
|
+
systemPrompt: string,
|
|
18
|
+
chat: LLMCompleteMessage[]
|
|
19
|
+
}
|
|
20
|
+
type Config = { [ key: string ]: PromptParts }
|
|
18
21
|
|
|
19
|
-
/* SpeechFlow node for LLM-based text-to-text
|
|
20
|
-
export default class
|
|
22
|
+
/* SpeechFlow node for LLM-based text-to-text proofreading */
|
|
23
|
+
export default class SpeechFlowNodeT2TProofread extends SpeechFlowNode {
|
|
21
24
|
/* declare official node name */
|
|
22
|
-
public static name = "t2t-
|
|
25
|
+
public static name = "t2t-proofread"
|
|
23
26
|
|
|
24
27
|
/* internal state */
|
|
25
28
|
private llm: LLM | null = null
|
|
26
29
|
|
|
27
|
-
/* internal LLM setup */
|
|
30
|
+
/* internal LLM setup: prompt parts per language */
|
|
28
31
|
private setup: Config = {
|
|
29
|
-
/* English (EN)
|
|
32
|
+
/* English (EN) proofreading */
|
|
30
33
|
"en": {
|
|
31
34
|
systemPrompt:
|
|
32
|
-
"You are a
|
|
35
|
+
"You are a strict text corrector for English.\n" +
|
|
36
|
+
"Correct spelling, punctuation and grammar.\n" +
|
|
33
37
|
"Output only the corrected text.\n" +
|
|
34
|
-
"Do NOT use markdown.\n" +
|
|
35
|
-
"Do NOT give any explanations.\n" +
|
|
36
|
-
"Do NOT give any introduction.\n" +
|
|
37
|
-
"Do NOT give any comments.\n" +
|
|
38
|
-
"Do NOT give any preamble.\n" +
|
|
39
|
-
"Do NOT give any prolog.\n" +
|
|
40
|
-
"Do NOT give any epilog.\n" +
|
|
41
|
-
"Do NOT change the grammar.\n" +
|
|
42
|
-
"Do NOT use synonyms for words.\n" +
|
|
43
|
-
"Keep all words.\n" +
|
|
44
|
-
"Fill in missing commas.\n" +
|
|
45
|
-
"Fill in missing points.\n" +
|
|
46
|
-
"Fill in missing question marks.\n" +
|
|
47
|
-
"Fill in missing hyphens.\n" +
|
|
48
|
-
"Focus ONLY on the word spelling.\n" +
|
|
49
38
|
"The text you have to correct is:\n",
|
|
50
39
|
chat: [
|
|
51
|
-
{ role: "user", content: "I luve my wyfe" },
|
|
40
|
+
{ role: "user", content: "I luve my wyfe." },
|
|
52
41
|
{ role: "assistant", content: "I love my wife." },
|
|
53
42
|
{ role: "user", content: "The weether is wunderfull!" },
|
|
54
43
|
{ role: "assistant", content: "The weather is wonderful!" },
|
|
55
|
-
{ role: "user", content: "
|
|
56
|
-
{ role: "assistant", content: "
|
|
44
|
+
{ role: "user", content: "Hello how are you today" },
|
|
45
|
+
{ role: "assistant", content: "Hello, how are you today?" },
|
|
46
|
+
{ role: "user", content: "I went to the store and bought some milk eggs and bread" },
|
|
47
|
+
{ role: "assistant", content: "I went to the store and bought some milk, eggs, and bread." },
|
|
48
|
+
{ role: "user", content: "She don't likes apples." },
|
|
49
|
+
{ role: "assistant", content: "She doesn't like apples." },
|
|
50
|
+
{ role: "user", content: "Yesterday I go to the park." },
|
|
51
|
+
{ role: "assistant", content: "Yesterday I went to the park." }
|
|
57
52
|
]
|
|
58
53
|
},
|
|
59
54
|
|
|
60
|
-
/* German (DE)
|
|
55
|
+
/* German (DE) proofreading */
|
|
61
56
|
"de": {
|
|
62
57
|
systemPrompt:
|
|
63
|
-
"Du bist ein
|
|
58
|
+
"Du bist ein strikter Textkorrektor für Deutsch.\n" +
|
|
59
|
+
"Korrigiere die Rechtschreibung, die Zeichensetzung und die Grammatik.\n" +
|
|
64
60
|
"Gib nur den korrigierten Text aus.\n" +
|
|
65
|
-
"Benutze KEIN Markdown.\n" +
|
|
66
|
-
"Gib KEINE Erklärungen.\n" +
|
|
67
|
-
"Gib KEINE Einleitung.\n" +
|
|
68
|
-
"Gib KEINE Kommentare.\n" +
|
|
69
|
-
"Gib KEINE Präambel.\n" +
|
|
70
|
-
"Gib KEINEN Prolog.\n" +
|
|
71
|
-
"Gib KEINEN Epilog.\n" +
|
|
72
|
-
"Ändere NICHT die Grammatik.\n" +
|
|
73
|
-
"Verwende KEINE Synonyme für Wörter.\n" +
|
|
74
|
-
"Behalte alle Wörter bei.\n" +
|
|
75
|
-
"Füge fehlende Kommas ein.\n" +
|
|
76
|
-
"Füge fehlende Punkte ein.\n" +
|
|
77
|
-
"Füge fehlende Fragezeichen ein.\n" +
|
|
78
|
-
"Füge fehlende Bindestriche ein.\n" +
|
|
79
|
-
"Füge fehlende Gedankenstriche ein.\n" +
|
|
80
|
-
"Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
|
|
81
61
|
"Der von dir zu korrigierende Text ist:\n",
|
|
82
62
|
chat: [
|
|
83
|
-
{ role: "user", content: "Ich ljebe meine Frao" },
|
|
63
|
+
{ role: "user", content: "Ich ljebe meine Frao." },
|
|
84
64
|
{ role: "assistant", content: "Ich liebe meine Frau." },
|
|
85
65
|
{ role: "user", content: "Die Wedter ist wunderschoen." },
|
|
86
66
|
{ role: "assistant", content: "Das Wetter ist wunderschön." },
|
|
87
|
-
{ role: "user", content: "
|
|
88
|
-
{ role: "assistant", content: "
|
|
67
|
+
{ role: "user", content: "Hallo wie geht es dir heute" },
|
|
68
|
+
{ role: "assistant", content: "Hallo, wie geht es dir heute?" },
|
|
69
|
+
{ role: "user", content: "Ich bin in den Laden gegangen und habe Milch Eier und Brot gekauft" },
|
|
70
|
+
{ role: "assistant", content: "Ich bin in den Laden gegangen und habe Milch, Eier und Brot gekauft." },
|
|
71
|
+
{ role: "user", content: "Er gehen nach Hause." },
|
|
72
|
+
{ role: "assistant", content: "Er geht nach Hause." },
|
|
73
|
+
{ role: "user", content: "Gestern ich gehe in den Park." },
|
|
74
|
+
{ role: "assistant", content: "Gestern ging ich in den Park." }
|
|
89
75
|
]
|
|
90
76
|
}
|
|
91
77
|
}
|
|
@@ -96,17 +82,13 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
96
82
|
|
|
97
83
|
/* declare node configuration parameters */
|
|
98
84
|
this.configure({
|
|
99
|
-
lang:
|
|
100
|
-
provider:
|
|
101
|
-
api:
|
|
102
|
-
model:
|
|
103
|
-
key:
|
|
85
|
+
lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
|
|
86
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
87
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
88
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
89
|
+
key: { type: "string", val: "", match: /^.*$/ }
|
|
104
90
|
})
|
|
105
91
|
|
|
106
|
-
/* tell effective mode */
|
|
107
|
-
this.log("info", `spellchecking language "${this.params.lang}" ` +
|
|
108
|
-
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
109
|
-
|
|
110
92
|
/* declare node input/output format */
|
|
111
93
|
this.input = "text"
|
|
112
94
|
this.output = "text"
|
|
@@ -114,6 +96,10 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
114
96
|
|
|
115
97
|
/* open node */
|
|
116
98
|
async open () {
|
|
99
|
+
/* tell effective mode */
|
|
100
|
+
this.log("info", `proofreading language "${this.params.lang}" ` +
|
|
101
|
+
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
102
|
+
|
|
117
103
|
/* instantiate LLM */
|
|
118
104
|
this.llm = new LLM({
|
|
119
105
|
provider: this.params.provider,
|
|
@@ -127,12 +113,13 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
127
113
|
})
|
|
128
114
|
await this.llm.open()
|
|
129
115
|
|
|
130
|
-
/* provide text-to-text
|
|
116
|
+
/* provide text-to-text proofreading */
|
|
131
117
|
const llm = this.llm
|
|
132
|
-
const
|
|
118
|
+
const proofread = async (text: string) => {
|
|
133
119
|
const cfg = this.setup[this.params.lang]
|
|
134
120
|
if (!cfg)
|
|
135
121
|
throw new Error(`unsupported language: ${this.params.lang}`)
|
|
122
|
+
this.log("info", `PROMPT "${cfg.systemPrompt}", chat: ${JSON.stringify(cfg.chat)}`)
|
|
136
123
|
return llm.complete({
|
|
137
124
|
system: cfg.systemPrompt,
|
|
138
125
|
messages: cfg.chat,
|
|
@@ -154,7 +141,7 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
154
141
|
callback()
|
|
155
142
|
}
|
|
156
143
|
else {
|
|
157
|
-
|
|
144
|
+
proofread(chunk.payload).then((payload) => {
|
|
158
145
|
const chunkNew = chunk.clone()
|
|
159
146
|
chunkNew.payload = payload
|
|
160
147
|
this.push(chunkNew)
|
|
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
83
83
|
this.configure({
|
|
84
84
|
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
85
85
|
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
86
|
-
model: { type: "string", val: "
|
|
86
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
87
87
|
key: { type: "string", val: "", match: /^.*$/ },
|
|
88
88
|
lang: { type: "string", pos: 0, val: "en", match: /^(?:en|de)$/ },
|
|
89
89
|
size: { type: "number", pos: 1, val: 4, match: (n: number) => n >= 1 && n <= 20 },
|
|
@@ -109,7 +109,7 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
|
|
|
109
109
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
110
110
|
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
111
111
|
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
112
|
-
model: { type: "string", val: "
|
|
112
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
113
113
|
key: { type: "string", val: "", match: /^.*$/ }
|
|
114
114
|
})
|
|
115
115
|
|
|
@@ -192,7 +192,10 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
192
192
|
|
|
193
193
|
/* pass-through PortAudio errors */
|
|
194
194
|
this.io!.on("error", (err) => {
|
|
195
|
-
|
|
195
|
+
/* NOTICE: do not emit("error") on the node itself, since nothing
|
|
196
|
+
listens for it and it would become an uncaughtException tearing
|
|
197
|
+
down the whole graph. Route via the stream instead, where it is
|
|
198
|
+
handled by the graph supervisor. */
|
|
196
199
|
this.stream?.emit("error", err)
|
|
197
200
|
})
|
|
198
201
|
|
|
@@ -95,7 +95,10 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
|
|
|
95
95
|
/* handle subprocess errors */
|
|
96
96
|
this.subprocess.on("error", (err) => {
|
|
97
97
|
this.log("error", `subprocess error: ${err.message}`)
|
|
98
|
-
|
|
98
|
+
/* NOTICE: do not emit("error") on the node itself, since nothing
|
|
99
|
+
listens for it and it would become an uncaughtException tearing
|
|
100
|
+
down the whole graph. Route via the stream instead, where it is
|
|
101
|
+
handled by the graph supervisor. */
|
|
99
102
|
if (this.stream !== null && !this.stream.destroyed)
|
|
100
103
|
this.stream.emit("error", err)
|
|
101
104
|
})
|
|
@@ -31,6 +31,7 @@ export type LLMConfig = {
|
|
|
31
31
|
temperature?: number
|
|
32
32
|
maxTokens?: number
|
|
33
33
|
cacheDir?: string
|
|
34
|
+
thinking?: boolean
|
|
34
35
|
}
|
|
35
36
|
export type LLMCompleteOptions = {
|
|
36
37
|
system?: string
|
|
@@ -64,6 +65,7 @@ export class LLM extends EventEmitter {
|
|
|
64
65
|
temperature: 0.7,
|
|
65
66
|
maxTokens: 1024,
|
|
66
67
|
cacheDir: "",
|
|
68
|
+
thinking: false,
|
|
67
69
|
...config
|
|
68
70
|
} as Required<LLMConfig>
|
|
69
71
|
|
|
@@ -255,7 +257,8 @@ export class LLM extends EventEmitter {
|
|
|
255
257
|
max_tokens: this.config.maxTokens,
|
|
256
258
|
temperature: this.config.temperature,
|
|
257
259
|
system: systemMessage?.content,
|
|
258
|
-
messages: chatMessages as Anthropic.MessageParam[]
|
|
260
|
+
messages: chatMessages as Anthropic.MessageParam[],
|
|
261
|
+
...(!this.config.thinking ? { thinking: { type: "disabled" } } : {})
|
|
259
262
|
}).catch((err) => {
|
|
260
263
|
throw new Error(`failed to perform Anthropic chat completion: ${err}`, { cause: err })
|
|
261
264
|
})
|
|
@@ -284,7 +287,8 @@ export class LLM extends EventEmitter {
|
|
|
284
287
|
config: {
|
|
285
288
|
maxOutputTokens: this.config.maxTokens,
|
|
286
289
|
temperature: this.config.temperature,
|
|
287
|
-
...(systemInstruction ? { systemInstruction } : {})
|
|
290
|
+
...(systemInstruction ? { systemInstruction } : {}),
|
|
291
|
+
...(!this.config.thinking ? { thinkingConfig: { thinkingBudget: 0 } } : {})
|
|
288
292
|
}
|
|
289
293
|
}).catch((err) => {
|
|
290
294
|
throw new Error(`failed to perform Google chat completion: ${err}`, { cause: err })
|
|
@@ -303,6 +307,7 @@ export class LLM extends EventEmitter {
|
|
|
303
307
|
model: this.config.model,
|
|
304
308
|
messages,
|
|
305
309
|
keep_alive: "10m",
|
|
310
|
+
think: this.config.thinking,
|
|
306
311
|
options: {
|
|
307
312
|
num_predict: this.config.maxTokens,
|
|
308
313
|
temperature: this.config.temperature
|