speechflow 2.3.1 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ase/service.log +10357 -0
- package/.ase/service.yaml +1 -0
- package/.claude/CLAUDE.md +1 -0
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +25 -1
- package/README.md +10 -37
- package/package.json +8 -8
- package/speechflow-cli/dst/speechflow-main-api.js +8 -1
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +14 -5
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +5 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +7 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +145 -62
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +10 -4
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +10 -4
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +3 -0
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.d.ts → speechflow-node-t2t-proofread.d.ts} +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-spellcheck.js → speechflow-node-t2t-proofread.js} +53 -61
- package/speechflow-cli/dst/speechflow-node-t2t-proofread.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +64 -14
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +4 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +4 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +7 -3
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.js +28 -14
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +10 -2
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.60.0.patch} +1 -1
- package/speechflow-cli/package.json +34 -31
- package/speechflow-cli/src/speechflow-main-api.ts +8 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +14 -5
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +6 -1
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +156 -66
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +10 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +10 -4
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +3 -0
- package/speechflow-cli/src/{speechflow-node-t2t-spellcheck.ts → speechflow-node-t2t-proofread.ts} +61 -66
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +21 -3
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -1
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +4 -1
- package/speechflow-cli/src/speechflow-util-llm.ts +8 -3
- package/speechflow-cli/src/speechflow-util-misc.ts +33 -16
- package/speechflow-cli/src/speechflow.ts +1 -0
- package/speechflow-ui-db/dst/index.js +19 -16
- package/speechflow-ui-db/etc/oxlint.jsonc +0 -1
- package/speechflow-ui-db/etc/stx.conf +2 -2
- package/speechflow-ui-db/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.60.0.patch} +1 -1
- package/speechflow-ui-db/package.json +16 -16
- package/speechflow-ui-st/dst/index.css +1 -1
- package/speechflow-ui-st/dst/index.js +33 -33
- package/speechflow-ui-st/etc/oxlint.jsonc +2 -2
- package/speechflow-ui-st/etc/stx.conf +2 -2
- package/speechflow-ui-st/package.d/{@typescript-eslint+typescript-estree+8.57.2.patch → @typescript-eslint+typescript-estree+8.60.0.patch} +1 -1
- package/speechflow-ui-st/package.json +16 -16
- package/speechflow-ui-st/src/app.vue +3 -3
- package/.claude/settings.local.json +0 -3
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +0 -13
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +0 -219
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +0 -200
|
@@ -23,8 +23,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private dg: Deepgram.LiveClient | null = null
|
|
25
25
|
private closing = false
|
|
26
|
+
private reconfiguring = false
|
|
26
27
|
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
28
|
private queue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private metastore: util.TimeStore<Map<string, any>> | null = null
|
|
30
|
+
private suspended = false
|
|
28
31
|
|
|
29
32
|
/* construct node */
|
|
30
33
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -39,7 +42,8 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
39
42
|
language: { type: "string", val: "multi", pos: 2 },
|
|
40
43
|
interim: { type: "boolean", val: false, pos: 3 },
|
|
41
44
|
endpointing: { type: "number", val: 0, pos: 4 },
|
|
42
|
-
keywords: { type: "string", val: "", pos: 5 }
|
|
45
|
+
keywords: { type: "string", val: "", pos: 5 },
|
|
46
|
+
suspended: { type: "boolean", val: false, pos: 6 }
|
|
43
47
|
})
|
|
44
48
|
|
|
45
49
|
/* sanity check parameters */
|
|
@@ -51,43 +55,55 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
51
55
|
this.output = "text"
|
|
52
56
|
}
|
|
53
57
|
|
|
54
|
-
/*
|
|
55
|
-
async
|
|
56
|
-
|
|
58
|
+
/* receive external request */
|
|
59
|
+
async receiveRequest (params: any[]) {
|
|
60
|
+
if (this.closing)
|
|
61
|
+
throw new Error("deepgram: node already destroyed")
|
|
57
62
|
try {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
balance += balanceResponse.result.balances[0]?.amount ?? 0
|
|
65
|
-
}
|
|
63
|
+
if (params.length === 2 && params[0] === "suspended") {
|
|
64
|
+
if (typeof params[1] !== "boolean")
|
|
65
|
+
throw new Error("deepgram: invalid suspended argument in external request")
|
|
66
|
+
const suspended = params[1]
|
|
67
|
+
await this.setSuspended(suspended)
|
|
68
|
+
this.sendResponse([ "deepgram", "suspended", suspended ])
|
|
66
69
|
}
|
|
67
|
-
else
|
|
68
|
-
|
|
70
|
+
else
|
|
71
|
+
throw new Error("deepgram: invalid arguments in external request")
|
|
69
72
|
}
|
|
70
73
|
catch (error) {
|
|
71
|
-
this.log("
|
|
74
|
+
this.log("error", `receive request error: ${error}`)
|
|
75
|
+
throw error
|
|
72
76
|
}
|
|
73
|
-
return { balance: balance.toFixed(2) }
|
|
74
77
|
}
|
|
75
78
|
|
|
76
|
-
/*
|
|
77
|
-
async
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
79
|
+
/* change suspended flag */
|
|
80
|
+
async setSuspended (suspended: boolean) {
|
|
81
|
+
if (this.closing) {
|
|
82
|
+
this.log("warning", "attempted to set suspended flag on destroyed node")
|
|
83
|
+
return
|
|
84
|
+
}
|
|
85
|
+
if (suspended === this.suspended)
|
|
86
|
+
return
|
|
87
|
+
this.log("info", `switching to ${suspended ? "SUSPENDED" : "UNSUSPENDED"} operation`)
|
|
88
|
+
this.suspended = suspended
|
|
89
|
+
if (suspended) {
|
|
90
|
+
/* going suspended -- tear down Deepgram API connection */
|
|
91
|
+
this.reconfiguring = true
|
|
92
|
+
try {
|
|
93
|
+
await this.closeConnection()
|
|
94
|
+
}
|
|
95
|
+
finally {
|
|
96
|
+
this.reconfiguring = false
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
/* going unsuspended -- re-establish Deepgram API connection */
|
|
101
|
+
await this.openConnection()
|
|
102
|
+
}
|
|
103
|
+
}
|
|
90
104
|
|
|
105
|
+
/* open Deepgram API connection */
|
|
106
|
+
private async openConnection () {
|
|
91
107
|
/* configure Deepgram connection options */
|
|
92
108
|
const interim = this.params.interim as boolean
|
|
93
109
|
const endpointing = this.params.endpointing as number
|
|
@@ -138,7 +154,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
138
154
|
|
|
139
155
|
/* hook onto Deepgram API events */
|
|
140
156
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
141
|
-
if (this.closing || this.queue === null)
|
|
157
|
+
if (this.closing || this.queue === null || this.metastore === null)
|
|
142
158
|
return
|
|
143
159
|
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
144
160
|
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
@@ -154,12 +170,12 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
154
170
|
`"${text}"`)
|
|
155
171
|
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
156
172
|
const end = start.plus({ seconds: data.duration })
|
|
157
|
-
const metas = metastore.fetch(start, end)
|
|
173
|
+
const metas = this.metastore.fetch(start, end)
|
|
158
174
|
const meta = metas.toReversed().reduce((prev: Map<string, any>, curr: Map<string, any>) => {
|
|
159
175
|
curr.forEach((val, key) => { prev.set(key, val) })
|
|
160
176
|
return prev
|
|
161
177
|
}, new Map<string, any>())
|
|
162
|
-
metastore.prune(start)
|
|
178
|
+
this.metastore.prune(start)
|
|
163
179
|
meta.set("words", words.map((word) => {
|
|
164
180
|
const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
|
|
165
181
|
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
@@ -180,14 +196,11 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
180
196
|
})
|
|
181
197
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
182
198
|
this.log("info", "connection close")
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
this.log("error", `error: ${error.message}`)
|
|
188
|
-
if (!this.closing && this.queue !== null)
|
|
199
|
+
/* NOTICE: suppress EOF signalling while reconfiguring (mute toggle),
|
|
200
|
+
since the connection is being torn down deliberately and the
|
|
201
|
+
graph must keep running */
|
|
202
|
+
if (!this.closing && !this.reconfiguring && this.queue !== null)
|
|
189
203
|
this.queue.write(null)
|
|
190
|
-
this.emit("error", error)
|
|
191
204
|
})
|
|
192
205
|
|
|
193
206
|
/* wait for Deepgram API to be available */
|
|
@@ -215,6 +228,90 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
215
228
|
})
|
|
216
229
|
})
|
|
217
230
|
|
|
231
|
+
/* NOTICE: register permanent Error handler only AFTER the open
|
|
232
|
+
handshake -- during open, the transient .once above is the sole
|
|
233
|
+
Error listener so the caller's promise rejects without a parallel
|
|
234
|
+
stream emission tearing down the graph prematurely. */
|
|
235
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
236
|
+
this.log("warning", `error: ${error.message}`)
|
|
237
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
238
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
239
|
+
the subsequent Deepgram Close event will signal real EOF. Also
|
|
240
|
+
do not emit("error") on the node itself, since nothing listens
|
|
241
|
+
for it and it would become an uncaughtException tearing down
|
|
242
|
+
the whole graph. Route via the stream instead, where it is
|
|
243
|
+
downgraded to a warning by the graph supervisor. */
|
|
244
|
+
if (!this.closing && this.stream !== null)
|
|
245
|
+
this.stream.emit("error", error)
|
|
246
|
+
})
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* close Deepgram API connection */
|
|
250
|
+
private async closeConnection () {
|
|
251
|
+
/* cleanup pending connection timer */
|
|
252
|
+
if (this.connectionTimeout !== null) {
|
|
253
|
+
clearTimeout(this.connectionTimeout)
|
|
254
|
+
this.connectionTimeout = null
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/* close Deepgram connection and remove listeners */
|
|
258
|
+
if (this.dg !== null) {
|
|
259
|
+
try {
|
|
260
|
+
this.dg.removeAllListeners()
|
|
261
|
+
this.dg.requestClose()
|
|
262
|
+
this.log("info", "connection closed")
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
266
|
+
}
|
|
267
|
+
this.dg = null
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/* one-time status of node */
|
|
272
|
+
async status () {
|
|
273
|
+
let balance = 0
|
|
274
|
+
try {
|
|
275
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
276
|
+
const response = await deepgram.manage.getProjects()
|
|
277
|
+
if (response !== null && response.error === null && response.result?.projects) {
|
|
278
|
+
for (const project of response.result.projects) {
|
|
279
|
+
const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id)
|
|
280
|
+
if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances)
|
|
281
|
+
balance += balanceResponse.result.balances[0]?.amount ?? 0
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
else if (response !== null && response.error !== null)
|
|
285
|
+
this.log("warning", `API error fetching projects: ${response.error}`)
|
|
286
|
+
}
|
|
287
|
+
catch (error) {
|
|
288
|
+
this.log("warning", `failed to fetch balance: ${error}`)
|
|
289
|
+
}
|
|
290
|
+
return { balance: balance.toFixed(2) }
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/* open node */
|
|
294
|
+
async open () {
|
|
295
|
+
/* sanity check situation */
|
|
296
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
297
|
+
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
298
|
+
|
|
299
|
+
/* clear destruction flag */
|
|
300
|
+
this.closing = false
|
|
301
|
+
|
|
302
|
+
/* create queue for results */
|
|
303
|
+
this.queue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
304
|
+
|
|
305
|
+
/* create a store for the meta information */
|
|
306
|
+
this.metastore = new util.TimeStore<Map<string, any>>()
|
|
307
|
+
|
|
308
|
+
/* determine initial suspended state from configuration */
|
|
309
|
+
this.suspended = this.params.suspended as boolean
|
|
310
|
+
|
|
311
|
+
/* establish Deepgram API connection (unless starting suspended) */
|
|
312
|
+
if (!this.suspended)
|
|
313
|
+
await this.openConnection()
|
|
314
|
+
|
|
218
315
|
/* remember opening time to receive time zero offset */
|
|
219
316
|
this.timeOpen = DateTime.now()
|
|
220
317
|
|
|
@@ -227,7 +324,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
227
324
|
decodeStrings: false,
|
|
228
325
|
highWaterMark: 1,
|
|
229
326
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
230
|
-
if (self.closing
|
|
327
|
+
if (self.closing) {
|
|
231
328
|
callback(new Error("stream already destroyed"))
|
|
232
329
|
return
|
|
233
330
|
}
|
|
@@ -235,11 +332,14 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
235
332
|
callback(new Error("expected audio input chunk"))
|
|
236
333
|
else if (!Buffer.isBuffer(chunk.payload))
|
|
237
334
|
callback(new Error("expected Buffer input chunk"))
|
|
335
|
+
else if (self.suspended || self.dg === null)
|
|
336
|
+
/* drop audio entirely -- do not forward to Deepgram */
|
|
337
|
+
callback()
|
|
238
338
|
else {
|
|
239
339
|
if (chunk.payload.byteLength > 0) {
|
|
240
340
|
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
241
|
-
if (chunk.meta.size > 0)
|
|
242
|
-
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
341
|
+
if (chunk.meta.size > 0 && self.metastore !== null)
|
|
342
|
+
self.metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
243
343
|
try {
|
|
244
344
|
/* send buffer (and intentionally discard all time information) */
|
|
245
345
|
self.dg.send(chunk.payload.buffer.slice(
|
|
@@ -257,17 +357,19 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
257
357
|
},
|
|
258
358
|
async final (callback) {
|
|
259
359
|
/* short-circuiting in case of own closing */
|
|
260
|
-
if (self.closing
|
|
360
|
+
if (self.closing) {
|
|
261
361
|
callback()
|
|
262
362
|
return
|
|
263
363
|
}
|
|
264
364
|
|
|
265
365
|
/* close Deepgram API */
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
366
|
+
if (self.dg !== null) {
|
|
367
|
+
try {
|
|
368
|
+
self.dg.requestClose()
|
|
369
|
+
}
|
|
370
|
+
catch (error) {
|
|
371
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
372
|
+
}
|
|
271
373
|
}
|
|
272
374
|
|
|
273
375
|
/* await all read operations */
|
|
@@ -307,34 +409,22 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
307
409
|
/* indicate closing first to stop all async operations */
|
|
308
410
|
this.closing = true
|
|
309
411
|
|
|
310
|
-
/* cleanup all timers */
|
|
311
|
-
if (this.connectionTimeout !== null) {
|
|
312
|
-
clearTimeout(this.connectionTimeout)
|
|
313
|
-
this.connectionTimeout = null
|
|
314
|
-
}
|
|
315
|
-
|
|
316
412
|
/* shutdown stream */
|
|
317
413
|
if (this.stream !== null) {
|
|
318
414
|
await util.destroyStream(this.stream)
|
|
319
415
|
this.stream = null
|
|
320
416
|
}
|
|
321
417
|
|
|
322
|
-
/* close Deepgram connection
|
|
323
|
-
|
|
324
|
-
try {
|
|
325
|
-
this.dg.removeAllListeners()
|
|
326
|
-
this.dg.requestClose()
|
|
327
|
-
}
|
|
328
|
-
catch (error) {
|
|
329
|
-
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
330
|
-
}
|
|
331
|
-
this.dg = null
|
|
332
|
-
}
|
|
418
|
+
/* close Deepgram API connection */
|
|
419
|
+
await this.closeConnection()
|
|
333
420
|
|
|
334
421
|
/* signal EOF to any pending read operations */
|
|
335
422
|
if (this.queue !== null) {
|
|
336
423
|
this.queue.write(null)
|
|
337
424
|
this.queue = null
|
|
338
425
|
}
|
|
426
|
+
|
|
427
|
+
/* discard meta information store */
|
|
428
|
+
this.metastore = null
|
|
339
429
|
}
|
|
340
430
|
}
|
|
@@ -190,10 +190,16 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
190
190
|
}
|
|
191
191
|
})
|
|
192
192
|
this.recognizeStream.on("error", (error: Error) => {
|
|
193
|
-
this.log("
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
193
|
+
this.log("warning", `error: ${error.message}`)
|
|
194
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
195
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
196
|
+
the subsequent recognize stream end event will signal real EOF.
|
|
197
|
+
Also do not emit("error") on the node itself, since nothing
|
|
198
|
+
listens for it and it would become an uncaughtException tearing
|
|
199
|
+
down the whole graph. Route via the stream instead, where it is
|
|
200
|
+
downgraded to a warning by the graph supervisor. */
|
|
201
|
+
if (!this.closing && this.stream !== null)
|
|
202
|
+
this.stream.emit("error", error)
|
|
197
203
|
})
|
|
198
204
|
this.recognizeStream.on("end", () => {
|
|
199
205
|
this.log("info", "stream ended")
|
|
@@ -145,10 +145,16 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
145
145
|
this.queue.write(null)
|
|
146
146
|
})
|
|
147
147
|
this.ws.on("error", (err) => {
|
|
148
|
-
this.log("
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
148
|
+
this.log("warning", `WebSocket connection error: ${err}`)
|
|
149
|
+
/* NOTICE: do not write null to the queue here -- a transient error
|
|
150
|
+
must not be misinterpreted as end-of-stream by downstream nodes;
|
|
151
|
+
the subsequent WebSocket close event will signal real EOF. Also
|
|
152
|
+
do not emit("error") on the node itself, since nothing listens
|
|
153
|
+
for it and it would become an uncaughtException tearing down
|
|
154
|
+
the whole graph. Route via the stream instead, where it is
|
|
155
|
+
downgraded to a warning by the graph supervisor. */
|
|
156
|
+
if (!this.closing && this.stream !== null)
|
|
157
|
+
this.stream.emit("error", err)
|
|
152
158
|
})
|
|
153
159
|
|
|
154
160
|
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
|
@@ -86,6 +86,7 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
|
|
|
86
86
|
})
|
|
87
87
|
|
|
88
88
|
/* establish a transform stream and connect it to Google Translate */
|
|
89
|
+
const self = this
|
|
89
90
|
this.stream = new Stream.Transform({
|
|
90
91
|
readableObjectMode: true,
|
|
91
92
|
writableObjectMode: true,
|
|
@@ -99,7 +100,9 @@ export default class SpeechFlowNodeT2TGoogle extends SpeechFlowNode {
|
|
|
99
100
|
callback()
|
|
100
101
|
}
|
|
101
102
|
else {
|
|
103
|
+
self.log("info", `receive text (${chunk.kind}): "${chunk.payload}"`)
|
|
102
104
|
translate(chunk.payload).then((payload) => {
|
|
105
|
+
self.log("info", `send text (${chunk.kind}): "${payload}"`)
|
|
103
106
|
const chunkNew = chunk.clone()
|
|
104
107
|
chunkNew.payload = payload
|
|
105
108
|
this.push(chunkNew)
|
package/speechflow-cli/src/{speechflow-node-t2t-spellcheck.ts → speechflow-node-t2t-proofread.ts}
RENAMED
|
@@ -13,79 +13,65 @@ import * as util from "./speechflow-util"
|
|
|
13
13
|
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
14
14
|
|
|
15
15
|
/* internal utility types */
|
|
16
|
-
type
|
|
17
|
-
|
|
16
|
+
type PromptParts = {
|
|
17
|
+
systemPrompt: string,
|
|
18
|
+
chat: LLMCompleteMessage[]
|
|
19
|
+
}
|
|
20
|
+
type Config = { [ key: string ]: PromptParts }
|
|
18
21
|
|
|
19
|
-
/* SpeechFlow node for LLM-based text-to-text
|
|
20
|
-
export default class
|
|
22
|
+
/* SpeechFlow node for LLM-based text-to-text proofreading */
|
|
23
|
+
export default class SpeechFlowNodeT2TProofread extends SpeechFlowNode {
|
|
21
24
|
/* declare official node name */
|
|
22
|
-
public static name = "t2t-
|
|
25
|
+
public static name = "t2t-proofread"
|
|
23
26
|
|
|
24
27
|
/* internal state */
|
|
25
28
|
private llm: LLM | null = null
|
|
26
29
|
|
|
27
|
-
/* internal LLM setup */
|
|
30
|
+
/* internal LLM setup: prompt parts per language */
|
|
28
31
|
private setup: Config = {
|
|
29
|
-
/* English (EN)
|
|
32
|
+
/* English (EN) proofreading */
|
|
30
33
|
"en": {
|
|
31
34
|
systemPrompt:
|
|
32
|
-
"You are a
|
|
35
|
+
"You are a strict text corrector for English.\n" +
|
|
36
|
+
"Correct spelling, punctuation and grammar.\n" +
|
|
33
37
|
"Output only the corrected text.\n" +
|
|
34
|
-
"Do NOT use markdown.\n" +
|
|
35
|
-
"Do NOT give any explanations.\n" +
|
|
36
|
-
"Do NOT give any introduction.\n" +
|
|
37
|
-
"Do NOT give any comments.\n" +
|
|
38
|
-
"Do NOT give any preamble.\n" +
|
|
39
|
-
"Do NOT give any prolog.\n" +
|
|
40
|
-
"Do NOT give any epilog.\n" +
|
|
41
|
-
"Do NOT change the grammar.\n" +
|
|
42
|
-
"Do NOT use synonyms for words.\n" +
|
|
43
|
-
"Keep all words.\n" +
|
|
44
|
-
"Fill in missing commas.\n" +
|
|
45
|
-
"Fill in missing points.\n" +
|
|
46
|
-
"Fill in missing question marks.\n" +
|
|
47
|
-
"Fill in missing hyphens.\n" +
|
|
48
|
-
"Focus ONLY on the word spelling.\n" +
|
|
49
38
|
"The text you have to correct is:\n",
|
|
50
39
|
chat: [
|
|
51
|
-
{ role: "user", content: "I luve my wyfe" },
|
|
40
|
+
{ role: "user", content: "I luve my wyfe." },
|
|
52
41
|
{ role: "assistant", content: "I love my wife." },
|
|
53
42
|
{ role: "user", content: "The weether is wunderfull!" },
|
|
54
43
|
{ role: "assistant", content: "The weather is wonderful!" },
|
|
55
|
-
{ role: "user", content: "
|
|
56
|
-
{ role: "assistant", content: "
|
|
44
|
+
{ role: "user", content: "Hello how are you today" },
|
|
45
|
+
{ role: "assistant", content: "Hello, how are you today?" },
|
|
46
|
+
{ role: "user", content: "I went to the store and bought some milk eggs and bread" },
|
|
47
|
+
{ role: "assistant", content: "I went to the store and bought some milk, eggs, and bread." },
|
|
48
|
+
{ role: "user", content: "She don't likes apples." },
|
|
49
|
+
{ role: "assistant", content: "She doesn't like apples." },
|
|
50
|
+
{ role: "user", content: "Yesterday I go to the park." },
|
|
51
|
+
{ role: "assistant", content: "Yesterday I went to the park." }
|
|
57
52
|
]
|
|
58
53
|
},
|
|
59
54
|
|
|
60
|
-
/* German (DE)
|
|
55
|
+
/* German (DE) proofreading */
|
|
61
56
|
"de": {
|
|
62
57
|
systemPrompt:
|
|
63
|
-
"Du bist ein
|
|
58
|
+
"Du bist ein strikter Textkorrektor für Deutsch.\n" +
|
|
59
|
+
"Korrigiere die Rechtschreibung, die Zeichensetzung und die Grammatik.\n" +
|
|
64
60
|
"Gib nur den korrigierten Text aus.\n" +
|
|
65
|
-
"Benutze KEIN Markdown.\n" +
|
|
66
|
-
"Gib KEINE Erklärungen.\n" +
|
|
67
|
-
"Gib KEINE Einleitung.\n" +
|
|
68
|
-
"Gib KEINE Kommentare.\n" +
|
|
69
|
-
"Gib KEINE Präambel.\n" +
|
|
70
|
-
"Gib KEINEN Prolog.\n" +
|
|
71
|
-
"Gib KEINEN Epilog.\n" +
|
|
72
|
-
"Ändere NICHT die Grammatik.\n" +
|
|
73
|
-
"Verwende KEINE Synonyme für Wörter.\n" +
|
|
74
|
-
"Behalte alle Wörter bei.\n" +
|
|
75
|
-
"Füge fehlende Kommas ein.\n" +
|
|
76
|
-
"Füge fehlende Punkte ein.\n" +
|
|
77
|
-
"Füge fehlende Fragezeichen ein.\n" +
|
|
78
|
-
"Füge fehlende Bindestriche ein.\n" +
|
|
79
|
-
"Füge fehlende Gedankenstriche ein.\n" +
|
|
80
|
-
"Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
|
|
81
61
|
"Der von dir zu korrigierende Text ist:\n",
|
|
82
62
|
chat: [
|
|
83
|
-
{ role: "user", content: "Ich ljebe meine Frao" },
|
|
63
|
+
{ role: "user", content: "Ich ljebe meine Frao." },
|
|
84
64
|
{ role: "assistant", content: "Ich liebe meine Frau." },
|
|
85
65
|
{ role: "user", content: "Die Wedter ist wunderschoen." },
|
|
86
66
|
{ role: "assistant", content: "Das Wetter ist wunderschön." },
|
|
87
|
-
{ role: "user", content: "
|
|
88
|
-
{ role: "assistant", content: "
|
|
67
|
+
{ role: "user", content: "Hallo wie geht es dir heute" },
|
|
68
|
+
{ role: "assistant", content: "Hallo, wie geht es dir heute?" },
|
|
69
|
+
{ role: "user", content: "Ich bin in den Laden gegangen und habe Milch Eier und Brot gekauft" },
|
|
70
|
+
{ role: "assistant", content: "Ich bin in den Laden gegangen und habe Milch, Eier und Brot gekauft." },
|
|
71
|
+
{ role: "user", content: "Er gehen nach Hause." },
|
|
72
|
+
{ role: "assistant", content: "Er geht nach Hause." },
|
|
73
|
+
{ role: "user", content: "Gestern ich gehe in den Park." },
|
|
74
|
+
{ role: "assistant", content: "Gestern ging ich in den Park." }
|
|
89
75
|
]
|
|
90
76
|
}
|
|
91
77
|
}
|
|
@@ -96,17 +82,13 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
96
82
|
|
|
97
83
|
/* declare node configuration parameters */
|
|
98
84
|
this.configure({
|
|
99
|
-
lang:
|
|
100
|
-
provider:
|
|
101
|
-
api:
|
|
102
|
-
model:
|
|
103
|
-
key:
|
|
85
|
+
lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
|
|
86
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
87
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
88
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
89
|
+
key: { type: "string", val: "", match: /^.*$/ }
|
|
104
90
|
})
|
|
105
91
|
|
|
106
|
-
/* tell effective mode */
|
|
107
|
-
this.log("info", `spellchecking language "${this.params.lang}" ` +
|
|
108
|
-
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
109
|
-
|
|
110
92
|
/* declare node input/output format */
|
|
111
93
|
this.input = "text"
|
|
112
94
|
this.output = "text"
|
|
@@ -114,6 +96,10 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
114
96
|
|
|
115
97
|
/* open node */
|
|
116
98
|
async open () {
|
|
99
|
+
/* tell effective mode */
|
|
100
|
+
this.log("info", `proofreading language "${this.params.lang}" ` +
|
|
101
|
+
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
102
|
+
|
|
117
103
|
/* instantiate LLM */
|
|
118
104
|
this.llm = new LLM({
|
|
119
105
|
provider: this.params.provider,
|
|
@@ -127,17 +113,20 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
127
113
|
})
|
|
128
114
|
await this.llm.open()
|
|
129
115
|
|
|
130
|
-
/* provide text-to-text
|
|
116
|
+
/* provide text-to-text proofreading */
|
|
131
117
|
const llm = this.llm
|
|
132
|
-
const
|
|
118
|
+
const proofread = async (text: string) => {
|
|
133
119
|
const cfg = this.setup[this.params.lang]
|
|
134
120
|
if (!cfg)
|
|
135
121
|
throw new Error(`unsupported language: ${this.params.lang}`)
|
|
136
|
-
|
|
122
|
+
this.log("info", `input: "${text}"`)
|
|
123
|
+
const output = await llm.complete({
|
|
137
124
|
system: cfg.systemPrompt,
|
|
138
125
|
messages: cfg.chat,
|
|
139
126
|
prompt: text
|
|
140
127
|
})
|
|
128
|
+
this.log("info", `output: "${output}"`)
|
|
129
|
+
return output
|
|
141
130
|
}
|
|
142
131
|
|
|
143
132
|
/* establish a transform stream and connect it to LLM */
|
|
@@ -154,14 +143,20 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
|
154
143
|
callback()
|
|
155
144
|
}
|
|
156
145
|
else {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
146
|
+
if (chunk.kind === "final") {
|
|
147
|
+
proofread(chunk.payload).then((payload) => {
|
|
148
|
+
const chunkNew = chunk.clone()
|
|
149
|
+
chunkNew.payload = payload
|
|
150
|
+
this.push(chunkNew)
|
|
151
|
+
callback()
|
|
152
|
+
}).catch((error: unknown) => {
|
|
153
|
+
callback(util.ensureError(error))
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
this.push(chunk)
|
|
161
158
|
callback()
|
|
162
|
-
}
|
|
163
|
-
callback(util.ensureError(error))
|
|
164
|
-
})
|
|
159
|
+
}
|
|
165
160
|
}
|
|
166
161
|
},
|
|
167
162
|
final (callback) {
|
|
@@ -324,12 +324,30 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
324
324
|
return
|
|
325
325
|
}
|
|
326
326
|
if (element.chunk.kind === "intermediate") {
|
|
327
|
-
|
|
328
|
-
|
|
327
|
+
/* the trailing element is a (speculative) intermediate chunk:
|
|
328
|
+
if the newly arrived chunk carries the very same intermediate
|
|
329
|
+
payload (Deepgram re-sends identical intermediates), treat it
|
|
330
|
+
as a no-op to avoid churning the queue (delete+append) and
|
|
331
|
+
re-emitting an unchanged preview */
|
|
332
|
+
if (chunk.kind === "intermediate"
|
|
333
|
+
&& (element.chunk.payload as string) === (chunk.payload as string)) {
|
|
334
|
+
self.lastChunkTime = Date.now()
|
|
335
|
+
callback()
|
|
336
|
+
return
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/* remove the trailing intermediate silently, so the
|
|
340
|
+
transient half-mutated queue (intermediate deleted, but
|
|
341
|
+
replacement not yet appended) does NOT emit a "write"
|
|
342
|
+
event and trigger a premature flush that would emit a
|
|
343
|
+
truncated preview and oscillate the dashboard preview */
|
|
344
|
+
self.queue.silently(() => {
|
|
345
|
+
self.queueRecv.walk(-1)
|
|
346
|
+
self.queueRecv.delete()
|
|
347
|
+
})
|
|
329
348
|
}
|
|
330
349
|
}
|
|
331
350
|
}
|
|
332
|
-
previewedPayload = ""
|
|
333
351
|
self.queueRecv.append({ type: "text-frame", chunk, complete: false })
|
|
334
352
|
self.lastChunkTime = Date.now()
|
|
335
353
|
callback()
|
|
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
|
83
83
|
this.configure({
|
|
84
84
|
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
85
85
|
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
86
|
-
model: { type: "string", val: "
|
|
86
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
87
87
|
key: { type: "string", val: "", match: /^.*$/ },
|
|
88
88
|
lang: { type: "string", pos: 0, val: "en", match: /^(?:en|de)$/ },
|
|
89
89
|
size: { type: "number", pos: 1, val: 4, match: (n: number) => n >= 1 && n <= 20 },
|
|
@@ -109,7 +109,7 @@ export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
|
|
|
109
109
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
110
110
|
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
111
111
|
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
112
|
-
model: { type: "string", val: "
|
|
112
|
+
model: { type: "string", val: "gemma4:e4b", match: /^.+$/ },
|
|
113
113
|
key: { type: "string", val: "", match: /^.*$/ }
|
|
114
114
|
})
|
|
115
115
|
|