speechflow 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/package.json +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +10 -9
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +17 -2
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +7 -8
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +5 -5
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +5 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -4
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +8 -3
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +6 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +3 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +9 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +3 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +159 -56
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +4 -4
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +6 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +14 -4
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +11 -9
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +12 -2
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +4 -0
- package/speechflow-cli/dst/speechflow-util-queue.js +41 -0
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +12 -9
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +23 -7
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +7 -8
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +6 -6
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +6 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -6
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +12 -3
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +8 -1
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +3 -1
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +1 -0
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +9 -1
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +214 -61
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +5 -5
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +7 -1
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +14 -4
- package/speechflow-cli/src/speechflow-util-audio.ts +12 -10
- package/speechflow-cli/src/speechflow-util-llm.ts +13 -2
- package/speechflow-cli/src/speechflow-util-queue.ts +43 -0
- package/speechflow-ui-db/dst/index.js +3 -3
|
@@ -35,6 +35,66 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
35
35
|
private queueRecv = this.queue.pointerUse("recv")
|
|
36
36
|
private closing = false
|
|
37
37
|
private workingOffTimer: ReturnType<typeof setTimeout> | null = null
|
|
38
|
+
private lastChunkTime = 0
|
|
39
|
+
|
|
40
|
+
/* known abbreviations from English and German (lowercased),
|
|
41
|
+
which should NOT be treated as sentence boundaries */
|
|
42
|
+
private static abbreviations = new Set([
|
|
43
|
+
"prof", "dr", "mr", "mrs", "ms", "jr", "sr", "st",
|
|
44
|
+
"vs", "etc", "ca", "bzw", "bspw", "usw", "sog", "ggf", "evtl"
|
|
45
|
+
])
|
|
46
|
+
|
|
47
|
+
/* find the first valid sentence boundary in text */
|
|
48
|
+
private static findSentenceBoundary (text: string): { sentence: string, rest: string } | null {
|
|
49
|
+
for (let i = 0; i < text.length; i++) {
|
|
50
|
+
/* match sentence-ending punctuation (including ellipsis "..." and "…") */
|
|
51
|
+
const pm = /^(\.\.\.|\u2026|\.|\?|!)/.exec(text.slice(i, i + 3))
|
|
52
|
+
if (!pm)
|
|
53
|
+
continue
|
|
54
|
+
const firstPunctPos = i
|
|
55
|
+
i += pm[1].length - 1
|
|
56
|
+
|
|
57
|
+
/* extract the word preceding the punctuation mark */
|
|
58
|
+
let j = Math.max(0, firstPunctPos - 1)
|
|
59
|
+
while (j >= 0) {
|
|
60
|
+
/* handle surrogate pairs (for characters outside the BMP) */
|
|
61
|
+
if (j > 0 && /[\uDC00-\uDFFF]/.test(text[j])) {
|
|
62
|
+
if (!/^\p{L}$/u.test(text[j - 1] + text[j]))
|
|
63
|
+
break
|
|
64
|
+
j -= 2
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
if (!/^\p{L}$/u.test(text[j]))
|
|
68
|
+
break
|
|
69
|
+
j--
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
const precedingWord = text.substring(j + 1, firstPunctPos)
|
|
73
|
+
|
|
74
|
+
/* skip abbreviations (only relevant for periods) */
|
|
75
|
+
if (pm[1] === ".") {
|
|
76
|
+
/* skip single-letter abbreviations (handles "U.S.", "e.g.", "i.e.", etc.) */
|
|
77
|
+
if (precedingWord.length === 1 && /^\p{L}$/u.test(precedingWord))
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
/* skip known multi-letter abbreviations (case-insensitive matching) */
|
|
81
|
+
if (SpeechFlowNodeT2TSentence.abbreviations.has(precedingWord.toLowerCase()))
|
|
82
|
+
continue
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* return what follows the punctuation mark
|
|
86
|
+
(also skip over optional closing quotes/parentheses/brackets) */
|
|
87
|
+
const after = text.substring(i + 1)
|
|
88
|
+
const m = after.match(/^(["\u201D\u2019)\]]*)\s+([\s\S]+)$/)
|
|
89
|
+
if (m !== null)
|
|
90
|
+
return { sentence: text.substring(0, i + 1 + m[1].length), rest: m[2] }
|
|
91
|
+
|
|
92
|
+
/* found a punctuation at end of text (possibly with trailing closing chars and whitespace) */
|
|
93
|
+
if (/^["\u201D\u2019)\]]*\s*$/.test(after))
|
|
94
|
+
return { sentence: text.substring(0, i + 1) + after.replace(/\s+$/, ""), rest: "" }
|
|
95
|
+
}
|
|
96
|
+
return null
|
|
97
|
+
}
|
|
38
98
|
|
|
39
99
|
/* construct node */
|
|
40
100
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -53,7 +113,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
53
113
|
|
|
54
114
|
/* concatenate two payloads with proper whitespacing */
|
|
55
115
|
private concatPayload (s1: string, s2: string) {
|
|
56
|
-
if (!(
|
|
116
|
+
if (!(/\s+$/.test(s1) || /^\s+/.test(s2)))
|
|
57
117
|
return `${s1} ${s2}`
|
|
58
118
|
else
|
|
59
119
|
return `${s1}${s2}`
|
|
@@ -64,24 +124,12 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
64
124
|
/* clear destruction flag */
|
|
65
125
|
this.closing = false
|
|
66
126
|
|
|
67
|
-
/* work off queued text frames */
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
/* control working off round */
|
|
74
|
-
if (workingOff)
|
|
75
|
-
return
|
|
76
|
-
workingOff = true
|
|
77
|
-
if (this.workingOffTimer !== null) {
|
|
78
|
-
clearTimeout(this.workingOffTimer)
|
|
79
|
-
this.workingOffTimer = null
|
|
80
|
-
}
|
|
81
|
-
this.queue.off("write", workOffQueue)
|
|
82
|
-
|
|
83
|
-
/* try to work off one or more chunks */
|
|
84
|
-
while (!this.closing) {
|
|
127
|
+
/* work off queued text frames (inner processing) */
|
|
128
|
+
const workOffQueueInner = (): boolean => {
|
|
129
|
+
const maxIterations = 50
|
|
130
|
+
let iterations = 0
|
|
131
|
+
while (!this.closing && iterations < maxIterations) {
|
|
132
|
+
iterations++
|
|
85
133
|
const element = this.queueSplit.peek()
|
|
86
134
|
if (element === undefined)
|
|
87
135
|
break
|
|
@@ -91,46 +139,49 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
91
139
|
}
|
|
92
140
|
|
|
93
141
|
/* skip elements already completed */
|
|
94
|
-
if (element.type === "text-frame"
|
|
142
|
+
if (element.type === "text-frame"
|
|
143
|
+
&& element.chunk.kind === "final"
|
|
144
|
+
&& element.complete === true) {
|
|
95
145
|
this.queueSplit.walk(+1)
|
|
96
146
|
continue
|
|
97
147
|
}
|
|
98
148
|
|
|
99
149
|
/* perform sentence splitting on input chunk */
|
|
100
150
|
if (element.chunk.kind === "final") {
|
|
151
|
+
element.chunk = element.chunk.clone()
|
|
101
152
|
const chunk = element.chunk
|
|
102
153
|
const payload = chunk.payload as string
|
|
103
|
-
const
|
|
104
|
-
if (
|
|
154
|
+
const boundary = SpeechFlowNodeT2TSentence.findSentenceBoundary(payload)
|
|
155
|
+
if (boundary !== null) {
|
|
105
156
|
/* contains a sentence */
|
|
106
|
-
const
|
|
107
|
-
if (rest !==
|
|
157
|
+
const { sentence, rest } = boundary
|
|
158
|
+
if (rest !== "") {
|
|
108
159
|
/* contains more than a sentence */
|
|
109
160
|
const chunk2 = chunk.clone()
|
|
110
161
|
const duration = Duration.fromMillis(
|
|
111
162
|
chunk.timestampEnd.minus(chunk.timestampStart).toMillis() *
|
|
112
|
-
(sentence.length / payload.length))
|
|
163
|
+
(sentence.length / Math.max(payload.length, 1)))
|
|
113
164
|
chunk2.timestampStart = chunk.timestampStart.plus(duration)
|
|
114
165
|
chunk.timestampEnd = chunk2.timestampStart
|
|
115
166
|
chunk.payload = sentence
|
|
116
167
|
chunk2.payload = rest
|
|
117
168
|
element.complete = true
|
|
118
|
-
this.queue.
|
|
119
|
-
this.queueSplit.touch()
|
|
120
|
-
this.queue.silent(false)
|
|
169
|
+
this.queue.silently(() => { this.queueSplit.touch() })
|
|
121
170
|
this.queueSplit.walk(+1)
|
|
122
171
|
this.queueSplit.insert({ type: "text-frame", chunk: chunk2, complete: false })
|
|
123
172
|
}
|
|
124
173
|
else {
|
|
125
174
|
/* contains just the sentence */
|
|
126
175
|
element.complete = true
|
|
127
|
-
this.queue.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
176
|
+
const position = this.queue.silently(() =>
|
|
177
|
+
this.queueSplit.silently(() => {
|
|
178
|
+
const pos = this.queueSplit.position()
|
|
179
|
+
this.queueSplit.walk(+1)
|
|
180
|
+
return pos
|
|
181
|
+
})
|
|
182
|
+
)
|
|
183
|
+
if (position < this.queue.elements.length)
|
|
184
|
+
this.queueSplit.touch(position)
|
|
134
185
|
}
|
|
135
186
|
}
|
|
136
187
|
else {
|
|
@@ -151,21 +202,52 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
151
202
|
}
|
|
152
203
|
if (element2.chunk.kind === "final") {
|
|
153
204
|
/* merge into following chunk */
|
|
205
|
+
element2.chunk = element2.chunk.clone()
|
|
154
206
|
element2.chunk.timestampStart = element.chunk.timestampStart
|
|
155
207
|
element2.chunk.payload = this.concatPayload(element.chunk.payload as string,
|
|
156
208
|
element2.chunk.payload as string)
|
|
157
209
|
|
|
158
210
|
/* remove current element and touch now current element */
|
|
159
|
-
this.queue.
|
|
160
|
-
this.queueSplit.delete()
|
|
161
|
-
this.queue.silent(false)
|
|
211
|
+
this.queue.silently(() => { this.queueSplit.delete() })
|
|
162
212
|
this.queueSplit.touch()
|
|
163
213
|
}
|
|
164
|
-
else
|
|
165
|
-
|
|
214
|
+
else {
|
|
215
|
+
/* following chunk is intermediate (speculative):
|
|
216
|
+
check timeout to flush incomplete sentence fragment */
|
|
217
|
+
if (this.lastChunkTime > 0
|
|
218
|
+
&& (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
|
|
219
|
+
element.complete = true
|
|
220
|
+
const position2 = this.queue.silently(() =>
|
|
221
|
+
this.queueSplit.silently(() => {
|
|
222
|
+
const pos = this.queueSplit.position()
|
|
223
|
+
this.queueSplit.walk(+1)
|
|
224
|
+
return pos
|
|
225
|
+
})
|
|
226
|
+
)
|
|
227
|
+
if (position2 < this.queue.elements.length)
|
|
228
|
+
this.queueSplit.touch(position2)
|
|
229
|
+
}
|
|
230
|
+
else
|
|
231
|
+
break
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
else if (this.lastChunkTime > 0
|
|
235
|
+
&& (Date.now() - this.lastChunkTime) >= (this.params.timeout as number)) {
|
|
236
|
+
/* no following chunk yet, but timeout expired:
|
|
237
|
+
flush incomplete sentence fragment */
|
|
238
|
+
element.complete = true
|
|
239
|
+
const position = this.queue.silently(() =>
|
|
240
|
+
this.queueSplit.silently(() => {
|
|
241
|
+
const pos = this.queueSplit.position()
|
|
242
|
+
this.queueSplit.walk(+1)
|
|
243
|
+
return pos
|
|
244
|
+
})
|
|
245
|
+
)
|
|
246
|
+
if (position < this.queue.elements.length)
|
|
247
|
+
this.queueSplit.touch(position)
|
|
166
248
|
}
|
|
167
249
|
else {
|
|
168
|
-
/* no following chunk yet */
|
|
250
|
+
/* no following chunk yet, still within timeout */
|
|
169
251
|
break
|
|
170
252
|
}
|
|
171
253
|
}
|
|
@@ -173,18 +255,48 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
173
255
|
else
|
|
174
256
|
break
|
|
175
257
|
}
|
|
258
|
+
return (!this.closing && iterations >= maxIterations)
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/* work off queued text frames (outer processing) */
|
|
262
|
+
let workingOff = false
|
|
263
|
+
const workOffQueue = async () => {
|
|
264
|
+
if (this.closing)
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
/* control working off round */
|
|
268
|
+
if (workingOff)
|
|
269
|
+
return
|
|
270
|
+
workingOff = true
|
|
271
|
+
if (this.workingOffTimer !== null) {
|
|
272
|
+
clearTimeout(this.workingOffTimer)
|
|
273
|
+
this.workingOffTimer = null
|
|
274
|
+
}
|
|
275
|
+
this.queue.off("write", workOffQueue)
|
|
176
276
|
|
|
177
|
-
/*
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
277
|
+
/* try to work off one or more chunks */
|
|
278
|
+
let hasMore = false
|
|
279
|
+
try {
|
|
280
|
+
hasMore = workOffQueueInner()
|
|
281
|
+
}
|
|
282
|
+
catch (error) {
|
|
283
|
+
this.log("error", `sentence splitting error: ${error}`)
|
|
284
|
+
}
|
|
285
|
+
finally {
|
|
286
|
+
/* re-initiate working off round (if still not destroyed) */
|
|
287
|
+
workingOff = false
|
|
288
|
+
if (!this.closing) {
|
|
289
|
+
this.workingOffTimer = setTimeout(workOffQueue, hasMore ? 0 : 100)
|
|
290
|
+
this.queue.once("write", workOffQueue)
|
|
291
|
+
}
|
|
181
292
|
}
|
|
182
|
-
workingOff = false
|
|
183
293
|
}
|
|
184
294
|
this.queue.once("write", workOffQueue)
|
|
185
295
|
|
|
186
296
|
/* provide Duplex stream and internally attach to classifier */
|
|
187
|
-
let
|
|
297
|
+
let previewedPayload = ""
|
|
298
|
+
let flushListenerRegistered = false
|
|
299
|
+
let eofPushed = false
|
|
188
300
|
const self = this
|
|
189
301
|
this.stream = new Stream.Duplex({
|
|
190
302
|
writableObjectMode: true,
|
|
@@ -217,8 +329,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
217
329
|
}
|
|
218
330
|
}
|
|
219
331
|
}
|
|
220
|
-
|
|
332
|
+
previewedPayload = ""
|
|
221
333
|
self.queueRecv.append({ type: "text-frame", chunk, complete: false })
|
|
334
|
+
self.lastChunkTime = Date.now()
|
|
222
335
|
callback()
|
|
223
336
|
}
|
|
224
337
|
},
|
|
@@ -229,6 +342,20 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
229
342
|
callback()
|
|
230
343
|
return
|
|
231
344
|
}
|
|
345
|
+
|
|
346
|
+
/* promote any trailing intermediate chunk to final
|
|
347
|
+
(no replacement will ever arrive, so treat it as final) */
|
|
348
|
+
const recvPos = self.queueRecv.position()
|
|
349
|
+
if (recvPos > 0) {
|
|
350
|
+
const element = self.queueRecv.peek(recvPos - 1)
|
|
351
|
+
if (element
|
|
352
|
+
&& element.type === "text-frame"
|
|
353
|
+
&& element.chunk.kind === "intermediate") {
|
|
354
|
+
element.chunk = element.chunk.clone()
|
|
355
|
+
element.chunk.kind = "final"
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
232
359
|
/* signal end of file */
|
|
233
360
|
self.queueRecv.append({ type: "text-eof" })
|
|
234
361
|
callback()
|
|
@@ -236,17 +363,27 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
236
363
|
|
|
237
364
|
/* send text chunk(s) (readable side of stream) */
|
|
238
365
|
read (_size) {
|
|
366
|
+
/* idempotently push EOF to readable side */
|
|
367
|
+
const pushNull = () => {
|
|
368
|
+
if (eofPushed)
|
|
369
|
+
return
|
|
370
|
+
eofPushed = true
|
|
371
|
+
this.push(null)
|
|
372
|
+
}
|
|
373
|
+
|
|
239
374
|
/* flush pending text chunks */
|
|
240
375
|
const flushPendingChunks = () => {
|
|
376
|
+
flushListenerRegistered = false
|
|
241
377
|
if (self.closing) {
|
|
242
|
-
|
|
378
|
+
pushNull()
|
|
243
379
|
return
|
|
244
380
|
}
|
|
245
381
|
const element = self.queueSend.peek()
|
|
246
382
|
if (element !== undefined
|
|
247
383
|
&& element.type === "text-eof") {
|
|
248
|
-
|
|
384
|
+
pushNull()
|
|
249
385
|
self.queueSend.walk(+1)
|
|
386
|
+
self.queue.trim()
|
|
250
387
|
}
|
|
251
388
|
else if (element !== undefined
|
|
252
389
|
&& element.type === "text-frame"
|
|
@@ -258,7 +395,7 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
258
395
|
if (nextElement === undefined)
|
|
259
396
|
break
|
|
260
397
|
else if (nextElement.type === "text-eof") {
|
|
261
|
-
|
|
398
|
+
pushNull()
|
|
262
399
|
self.queueSend.walk(+1)
|
|
263
400
|
eofSeen = true
|
|
264
401
|
break
|
|
@@ -266,20 +403,22 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
266
403
|
else if (nextElement.type === "text-frame"
|
|
267
404
|
&& nextElement.complete !== true)
|
|
268
405
|
break
|
|
269
|
-
self.log("info", `send text
|
|
406
|
+
self.log("info", `send text/complete (${nextElement.chunk.kind}): ${JSON.stringify(nextElement.chunk.payload)} pos=${self.queueSend.position()}`)
|
|
270
407
|
this.push(nextElement.chunk)
|
|
271
408
|
self.queueSend.walk(+1)
|
|
272
|
-
self.queue.trim()
|
|
273
409
|
}
|
|
410
|
+
previewedPayload = ""
|
|
411
|
+
self.queue.trim()
|
|
274
412
|
|
|
275
413
|
/* wait for more data (unless end-of-stream was reached) */
|
|
276
|
-
if (!eofSeen && !self.closing)
|
|
414
|
+
if (!eofSeen && !self.closing && !flushListenerRegistered) {
|
|
415
|
+
flushListenerRegistered = true
|
|
277
416
|
self.queue.once("write", flushPendingChunks)
|
|
417
|
+
}
|
|
278
418
|
}
|
|
279
419
|
else if (element !== undefined
|
|
280
420
|
&& element.type === "text-frame"
|
|
281
421
|
&& element.complete === false
|
|
282
|
-
&& !previewed
|
|
283
422
|
&& self.params.interim === true) {
|
|
284
423
|
/* merge together all still queued elements and
|
|
285
424
|
send this out as an intermediate chunk as preview */
|
|
@@ -293,17 +432,30 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
293
432
|
break
|
|
294
433
|
previewChunk.payload = self.concatPayload(
|
|
295
434
|
previewChunk.payload as string, element2.chunk.payload as string)
|
|
435
|
+
previewChunk.timestampEnd = element2.chunk.timestampEnd
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/* send preview only if payload actually changed */
|
|
439
|
+
if ((previewChunk.payload as string) !== previewedPayload) {
|
|
440
|
+
this.push(previewChunk)
|
|
441
|
+
self.log("info", `send text/preview (intermediate): ${JSON.stringify(previewChunk.payload)}`)
|
|
442
|
+
previewedPayload = previewChunk.payload as string
|
|
296
443
|
}
|
|
297
|
-
this.push(previewChunk)
|
|
298
|
-
self.log("info", `send text 2 (intermediate): ${JSON.stringify(previewChunk.payload)}`)
|
|
299
|
-
previewed = true
|
|
300
444
|
|
|
301
445
|
/* wait for more data */
|
|
302
|
-
if (!self.closing)
|
|
446
|
+
if (!self.closing && !flushListenerRegistered) {
|
|
447
|
+
flushListenerRegistered = true
|
|
303
448
|
self.queue.once("write", flushPendingChunks)
|
|
449
|
+
}
|
|
304
450
|
}
|
|
305
|
-
else if (!self.closing)
|
|
451
|
+
else if (!self.closing && !flushListenerRegistered) {
|
|
452
|
+
flushListenerRegistered = true
|
|
306
453
|
self.queue.once("write", flushPendingChunks)
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
if (flushListenerRegistered) {
|
|
457
|
+
self.queue.removeListener("write", flushPendingChunks)
|
|
458
|
+
flushListenerRegistered = false
|
|
307
459
|
}
|
|
308
460
|
flushPendingChunks()
|
|
309
461
|
}
|
|
@@ -321,8 +473,9 @@ export default class SpeechFlowNodeT2TSentence extends SpeechFlowNode {
|
|
|
321
473
|
this.workingOffTimer = null
|
|
322
474
|
}
|
|
323
475
|
|
|
324
|
-
/* remove any pending event listeners */
|
|
476
|
+
/* remove any pending event listeners and clear queue */
|
|
325
477
|
this.queue.removeAllListeners("write")
|
|
478
|
+
this.queue.clear()
|
|
326
479
|
|
|
327
480
|
/* shutdown stream */
|
|
328
481
|
if (this.stream !== null) {
|
|
@@ -96,7 +96,7 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
|
|
|
96
96
|
this.subprocess.on("error", (err) => {
|
|
97
97
|
this.log("error", `subprocess error: ${err.message}`)
|
|
98
98
|
this.emit("error", err)
|
|
99
|
-
if (this.stream !== null)
|
|
99
|
+
if (this.stream !== null && !this.stream.destroyed)
|
|
100
100
|
this.stream.emit("error", err)
|
|
101
101
|
})
|
|
102
102
|
|
|
@@ -172,6 +172,10 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
|
|
|
172
172
|
})
|
|
173
173
|
}
|
|
174
174
|
|
|
175
|
+
/* remove event listeners to prevent errors during kill sequence */
|
|
176
|
+
this.subprocess.removeAllListeners("error")
|
|
177
|
+
this.subprocess.removeAllListeners("exit")
|
|
178
|
+
|
|
175
179
|
/* wait for subprocess to exit gracefully */
|
|
176
180
|
const ac2 = new AbortController()
|
|
177
181
|
await Promise.race([
|
|
@@ -208,10 +212,6 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
|
|
|
208
212
|
this.log("error", "subprocess did not terminate even after SIGKILL")
|
|
209
213
|
})
|
|
210
214
|
|
|
211
|
-
/* remove event listeners to prevent memory leaks */
|
|
212
|
-
this.subprocess.removeAllListeners("error")
|
|
213
|
-
this.subprocess.removeAllListeners("exit")
|
|
214
|
-
|
|
215
215
|
/* clear subprocess reference */
|
|
216
216
|
this.subprocess = null
|
|
217
217
|
}
|
|
@@ -46,6 +46,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
|
|
|
46
46
|
private rtpSequence = 0
|
|
47
47
|
private rtpTimestamp = 0
|
|
48
48
|
private rtpSSRC = 0
|
|
49
|
+
private rtpMarkerNext = true
|
|
49
50
|
private maxConnections = 10
|
|
50
51
|
|
|
51
52
|
/* Opus codec configuration: 48kHz, mono, 16-bit */
|
|
@@ -177,7 +178,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
|
|
|
177
178
|
padding: false,
|
|
178
179
|
paddingSize: 0,
|
|
179
180
|
extension: false,
|
|
180
|
-
marker:
|
|
181
|
+
marker: this.rtpMarkerNext,
|
|
181
182
|
payloadType: 111, /* Opus payload type */
|
|
182
183
|
sequenceNumber: this.rtpSequence++ & 0xFFFF,
|
|
183
184
|
timestamp: this.rtpTimestamp,
|
|
@@ -186,6 +187,9 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
|
|
|
186
187
|
extensions: []
|
|
187
188
|
})
|
|
188
189
|
|
|
190
|
+
/* clear marker (set only on first packet of a talkspurt per RFC 3551) */
|
|
191
|
+
this.rtpMarkerNext = false
|
|
192
|
+
|
|
189
193
|
/* build RTP packet */
|
|
190
194
|
const rtpPacket = new RtpPacket(rtpHeader, opusPacket)
|
|
191
195
|
|
|
@@ -365,6 +369,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
|
|
|
365
369
|
this.rtpSequence = Math.floor(Math.random() * 0x10000)
|
|
366
370
|
this.rtpTimestamp = Math.floor(Math.random() * 0x100000000) >>> 0
|
|
367
371
|
this.rtpSSRC = Math.floor(Math.random() * 0x100000000) >>> 0
|
|
372
|
+
this.rtpMarkerNext = true
|
|
368
373
|
|
|
369
374
|
/* setup chunk queue for incoming audio */
|
|
370
375
|
this.chunkQueue = new util.AsyncQueue<SpeechFlowChunk>()
|
|
@@ -482,6 +487,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
|
|
|
482
487
|
}
|
|
483
488
|
if (self.peerConnections.size === 0) {
|
|
484
489
|
/* silently drop if no viewers connected */
|
|
490
|
+
self.rtpMarkerNext = true
|
|
485
491
|
callback()
|
|
486
492
|
return
|
|
487
493
|
}
|
|
@@ -102,8 +102,13 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
102
102
|
buffer = Buffer.from(data)
|
|
103
103
|
else
|
|
104
104
|
buffer = Buffer.concat(data)
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
try {
|
|
106
|
+
const chunk = util.streamChunkDecode(buffer)
|
|
107
|
+
this.chunkQueue?.write(chunk)
|
|
108
|
+
}
|
|
109
|
+
catch (_err: unknown) {
|
|
110
|
+
this.log("warning", `received invalid CBOR chunk on URL ${this.params.listen} from peer ${peer}`)
|
|
111
|
+
}
|
|
107
112
|
})
|
|
108
113
|
})
|
|
109
114
|
this.server.on("error", (error) => {
|
|
@@ -219,8 +224,13 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
|
|
|
219
224
|
return
|
|
220
225
|
}
|
|
221
226
|
const buffer = Buffer.from(ev.data)
|
|
222
|
-
|
|
223
|
-
|
|
227
|
+
try {
|
|
228
|
+
const chunk = util.streamChunkDecode(buffer)
|
|
229
|
+
this.chunkQueue?.write(chunk)
|
|
230
|
+
}
|
|
231
|
+
catch (_err: unknown) {
|
|
232
|
+
this.log("warning", `received invalid CBOR chunk from URL ${this.params.connect}`)
|
|
233
|
+
}
|
|
224
234
|
})
|
|
225
235
|
this.client.binaryType = "arraybuffer"
|
|
226
236
|
const self = this
|
|
@@ -262,16 +262,9 @@ export class WebAudio {
|
|
|
262
262
|
for (let i = 0; i < int16Array.length; i++)
|
|
263
263
|
float32Data[i] = int16Array[i] / 32768.0
|
|
264
264
|
|
|
265
|
-
/*
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
type: "start-capture",
|
|
269
|
-
chunkId,
|
|
270
|
-
expectedSamples: int16Array.length
|
|
271
|
-
})
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/* wait for capture-ready acknowledgment before sending data */
|
|
265
|
+
/* register capture-ready handler first (before posting start-capture,
|
|
266
|
+
to avoid a race where capture-ready arrives before the listener
|
|
267
|
+
is in place) */
|
|
275
268
|
const readyHandler = (event: MessageEvent) => {
|
|
276
269
|
const { type: msgType, chunkId: msgChunkId } = event.data ?? {}
|
|
277
270
|
if (msgType === "capture-ready" && msgChunkId === chunkId) {
|
|
@@ -287,6 +280,15 @@ export class WebAudio {
|
|
|
287
280
|
}
|
|
288
281
|
if (this.captureNode !== null)
|
|
289
282
|
this.captureNode.port.addEventListener("message", readyHandler)
|
|
283
|
+
|
|
284
|
+
/* start capture after handler is registered */
|
|
285
|
+
if (this.captureNode !== null) {
|
|
286
|
+
this.captureNode.port.postMessage({
|
|
287
|
+
type: "start-capture",
|
|
288
|
+
chunkId,
|
|
289
|
+
expectedSamples: int16Array.length
|
|
290
|
+
})
|
|
291
|
+
}
|
|
290
292
|
}
|
|
291
293
|
catch (error) {
|
|
292
294
|
clearTimeout(timeout)
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
/* standard dependencies */
|
|
8
8
|
import EventEmitter from "node:events"
|
|
9
9
|
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import * as util from "./speechflow-util-misc"
|
|
12
|
+
|
|
10
13
|
/* external dependencies */
|
|
11
14
|
import OpenAI from "openai"
|
|
12
15
|
import Anthropic from "@anthropic-ai/sdk"
|
|
@@ -353,8 +356,16 @@ export class LLM extends EventEmitter {
|
|
|
353
356
|
this.ollama?.abort()
|
|
354
357
|
this.ollama = null
|
|
355
358
|
}
|
|
356
|
-
else if (this.config.provider === "transformers") {
|
|
357
|
-
|
|
359
|
+
else if (this.config.provider === "transformers" && this.transformer !== null) {
|
|
360
|
+
const ac = new AbortController()
|
|
361
|
+
await Promise.race([
|
|
362
|
+
this.transformer.dispose(),
|
|
363
|
+
util.timeout(5000, "transformer dispose timeout", ac.signal)
|
|
364
|
+
]).finally(() => {
|
|
365
|
+
ac.abort()
|
|
366
|
+
}).catch((error) => {
|
|
367
|
+
this.log("warning", `error during transformer cleanup: ${error}`)
|
|
368
|
+
})
|
|
358
369
|
this.transformer = null
|
|
359
370
|
}
|
|
360
371
|
this.initialized = false
|