speechflow 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +220 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.yaml +5 -3
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +69 -14
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +74 -17
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +20 -6
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* internal types */
|
|
8
|
+
interface InputChunkMessage {
|
|
9
|
+
type: "input-chunk"
|
|
10
|
+
chunkId: string
|
|
11
|
+
data: { pcmData: Float32Array, channels: number }
|
|
12
|
+
}
|
|
13
|
+
interface StartCaptureMessage {
|
|
14
|
+
type: "start-capture"
|
|
15
|
+
chunkId: string
|
|
16
|
+
expectedSamples: number
|
|
17
|
+
}
|
|
18
|
+
type WorkletMessage = InputChunkMessage | StartCaptureMessage
|
|
19
|
+
interface ChunkData {
|
|
20
|
+
data: Float32Array
|
|
21
|
+
chunkId: string
|
|
22
|
+
}
|
|
23
|
+
interface ChunkStartedMessage {
|
|
24
|
+
type: "chunk-started"
|
|
25
|
+
chunkId: string
|
|
26
|
+
}
|
|
27
|
+
interface CaptureCompleteMessage {
|
|
28
|
+
type: "capture-complete"
|
|
29
|
+
chunkId: string
|
|
30
|
+
data: number[]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/* audio source node */
|
|
34
|
+
class AudioSourceProcessor extends AudioWorkletProcessor {
|
|
35
|
+
/* internal state */
|
|
36
|
+
private pendingData: ChunkData[] = []
|
|
37
|
+
private currentChunk: ChunkData | null = null
|
|
38
|
+
private currentOffset = 0
|
|
39
|
+
|
|
40
|
+
/* node construction */
|
|
41
|
+
constructor() {
|
|
42
|
+
super()
|
|
43
|
+
|
|
44
|
+
/* receive input chunks */
|
|
45
|
+
this.port.addEventListener("message", (event: MessageEvent<WorkletMessage>) => {
|
|
46
|
+
const { type, chunkId } = event.data
|
|
47
|
+
if (type === "input-chunk")
|
|
48
|
+
this.pendingData.push({ data: event.data.data.pcmData, chunkId })
|
|
49
|
+
})
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/* process audio frame */
|
|
53
|
+
process(
|
|
54
|
+
inputs: Float32Array[][], /* unused */
|
|
55
|
+
outputs: Float32Array[][],
|
|
56
|
+
parameters: Record<string, Float32Array> /* unused */
|
|
57
|
+
): boolean {
|
|
58
|
+
/* determine output */
|
|
59
|
+
const output = outputs[0]
|
|
60
|
+
if (!output || output.length === 0)
|
|
61
|
+
return true
|
|
62
|
+
const frameCount = output[0].length
|
|
63
|
+
const channelCount = output.length
|
|
64
|
+
|
|
65
|
+
/* get current chunk if we don't have one */
|
|
66
|
+
if (this.currentChunk === null && this.pendingData.length > 0) {
|
|
67
|
+
this.currentChunk = this.pendingData.shift()!
|
|
68
|
+
this.currentOffset = 0
|
|
69
|
+
|
|
70
|
+
/* signal chunk start */
|
|
71
|
+
const message: ChunkStartedMessage = {
|
|
72
|
+
type: "chunk-started",
|
|
73
|
+
chunkId: this.currentChunk.chunkId
|
|
74
|
+
}
|
|
75
|
+
this.port.postMessage(message)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/* process input */
|
|
79
|
+
if (this.currentChunk) {
|
|
80
|
+
/* output current chunk */
|
|
81
|
+
const samplesPerChannel = this.currentChunk.data.length / channelCount
|
|
82
|
+
const remainingFrames = samplesPerChannel - this.currentOffset
|
|
83
|
+
const framesToProcess = Math.min(frameCount, remainingFrames)
|
|
84
|
+
|
|
85
|
+
/* copy data from current chunk (interleaved to planar) */
|
|
86
|
+
for (let frame = 0; frame < framesToProcess; frame++) {
|
|
87
|
+
for (let ch = 0; ch < channelCount; ch++) {
|
|
88
|
+
const interleavedIndex = (this.currentOffset + frame) * channelCount + ch
|
|
89
|
+
output[ch][frame] = this.currentChunk.data[interleavedIndex] ?? 0
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/* zero-pad remaining output if needed */
|
|
94
|
+
for (let frame = framesToProcess; frame < frameCount; frame++)
|
|
95
|
+
for (let ch = 0; ch < channelCount; ch++)
|
|
96
|
+
output[ch][frame] = 0
|
|
97
|
+
|
|
98
|
+
/* check if current chunk is finished */
|
|
99
|
+
this.currentOffset += framesToProcess
|
|
100
|
+
if (this.currentOffset >= samplesPerChannel) {
|
|
101
|
+
this.currentChunk = null
|
|
102
|
+
this.currentOffset = 0
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
/* output silence when no input */
|
|
107
|
+
for (let ch = 0; ch < channelCount; ch++)
|
|
108
|
+
output[ch].fill(0)
|
|
109
|
+
}
|
|
110
|
+
return true
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/* audio capture node */
|
|
115
|
+
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
116
|
+
/* internal state */
|
|
117
|
+
private activeCaptures = new Map<string, { data: number[], expectedSamples: number }>()
|
|
118
|
+
|
|
119
|
+
/* node construction */
|
|
120
|
+
constructor() {
|
|
121
|
+
super()
|
|
122
|
+
|
|
123
|
+
/* receive start of capturing command */
|
|
124
|
+
this.port.addEventListener("message", (event: MessageEvent<WorkletMessage>) => {
|
|
125
|
+
const { type, chunkId } = event.data
|
|
126
|
+
if (type === "start-capture") {
|
|
127
|
+
this.activeCaptures.set(chunkId, {
|
|
128
|
+
data: [],
|
|
129
|
+
expectedSamples: event.data.expectedSamples
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
})
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/* process audio frame */
|
|
136
|
+
process(
|
|
137
|
+
inputs: Float32Array[][],
|
|
138
|
+
outputs: Float32Array[][], /* unused */
|
|
139
|
+
parameters: Record<string, Float32Array> /* unused */
|
|
140
|
+
): boolean {
|
|
141
|
+
/* determine input */
|
|
142
|
+
const input = inputs[0]
|
|
143
|
+
if (!input || input.length === 0 || this.activeCaptures.size === 0)
|
|
144
|
+
return true
|
|
145
|
+
const frameCount = input[0].length
|
|
146
|
+
const channelCount = input.length
|
|
147
|
+
|
|
148
|
+
/* iterate over all active captures */
|
|
149
|
+
for (const [ chunkId, capture ] of this.activeCaptures) {
|
|
150
|
+
/* convert planar to interleaved */
|
|
151
|
+
for (let frame = 0; frame < frameCount; frame++)
|
|
152
|
+
for (let ch = 0; ch < channelCount; ch++)
|
|
153
|
+
capture.data.push(input[ch][frame])
|
|
154
|
+
|
|
155
|
+
/* send back captured data */
|
|
156
|
+
if (capture.data.length >= capture.expectedSamples) {
|
|
157
|
+
const message: CaptureCompleteMessage = {
|
|
158
|
+
type: "capture-complete",
|
|
159
|
+
chunkId,
|
|
160
|
+
data: capture.data.slice(0, capture.expectedSamples)
|
|
161
|
+
}
|
|
162
|
+
this.port.postMessage(message)
|
|
163
|
+
this.activeCaptures.delete(chunkId)
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return true
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/* register the new audio nodes */
|
|
171
|
+
registerProcessor("source", AudioSourceProcessor)
|
|
172
|
+
registerProcessor("capture", AudioCaptureProcessor)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { AudioContext, AudioWorkletNode } from "node-web-audio-api"
|
|
12
|
+
|
|
13
|
+
export class WebAudio {
|
|
14
|
+
/* internal state */
|
|
15
|
+
public audioContext: AudioContext
|
|
16
|
+
public sourceNode: AudioWorkletNode | null = null
|
|
17
|
+
public captureNode: AudioWorkletNode | null = null
|
|
18
|
+
private pendingPromises = new Map<string, {
|
|
19
|
+
resolve: (value: Int16Array) => void
|
|
20
|
+
reject: (error: Error) => void
|
|
21
|
+
timeout: ReturnType<typeof setTimeout>
|
|
22
|
+
}>()
|
|
23
|
+
|
|
24
|
+
/* construct object */
|
|
25
|
+
constructor(
|
|
26
|
+
public sampleRate: number,
|
|
27
|
+
public channels: number
|
|
28
|
+
) {
|
|
29
|
+
/* create new audio context */
|
|
30
|
+
this.audioContext = new AudioContext({
|
|
31
|
+
sampleRate,
|
|
32
|
+
latencyHint: "interactive"
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/* setup object */
|
|
37
|
+
public async setup (): Promise<void> {
|
|
38
|
+
/* ensure audio context is not suspended */
|
|
39
|
+
if (this.audioContext.state === "suspended")
|
|
40
|
+
await this.audioContext.resume()
|
|
41
|
+
|
|
42
|
+
/* add audio worklet module */
|
|
43
|
+
const url = path.resolve(__dirname, "speechflow-utils-audio-wt.js")
|
|
44
|
+
await this.audioContext.audioWorklet.addModule(url)
|
|
45
|
+
|
|
46
|
+
/* create source node */
|
|
47
|
+
this.sourceNode = new AudioWorkletNode(this.audioContext, "source", {
|
|
48
|
+
numberOfInputs: 0,
|
|
49
|
+
numberOfOutputs: 1,
|
|
50
|
+
outputChannelCount: [ this.channels ]
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
/* create capture node */
|
|
54
|
+
this.captureNode = new AudioWorkletNode(this.audioContext, "capture", {
|
|
55
|
+
numberOfInputs: 1,
|
|
56
|
+
numberOfOutputs: 0
|
|
57
|
+
})
|
|
58
|
+
this.captureNode!.port.addEventListener("message", (event) => {
|
|
59
|
+
const { type, chunkId, data } = event.data ?? {}
|
|
60
|
+
if (type === "capture-complete") {
|
|
61
|
+
const promise = this.pendingPromises.get(chunkId)
|
|
62
|
+
if (promise) {
|
|
63
|
+
clearTimeout(promise.timeout)
|
|
64
|
+
this.pendingPromises.delete(chunkId)
|
|
65
|
+
const int16Data = new Int16Array(data.length)
|
|
66
|
+
for (let i = 0; i < data.length; i++)
|
|
67
|
+
int16Data[i] = Math.max(-32768, Math.min(32767, Math.round(data[i] * 32767)))
|
|
68
|
+
promise.resolve(int16Data)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
/* start ports */
|
|
74
|
+
this.sourceNode.port.start()
|
|
75
|
+
this.captureNode!.port.start()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/* process single audio chunk */
|
|
79
|
+
public async process (int16Array: Int16Array): Promise<Int16Array> {
|
|
80
|
+
const chunkId = `chunk_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`
|
|
81
|
+
return new Promise<Int16Array>((resolve, reject) => {
|
|
82
|
+
const timeout = setTimeout(() => {
|
|
83
|
+
this.pendingPromises.delete(chunkId)
|
|
84
|
+
reject(new Error("processing timeout"))
|
|
85
|
+
}, (int16Array.length / this.audioContext.sampleRate) * 1000 + 250)
|
|
86
|
+
if (this.captureNode !== null)
|
|
87
|
+
this.pendingPromises.set(chunkId, { resolve, reject, timeout })
|
|
88
|
+
try {
|
|
89
|
+
const float32Data = new Float32Array(int16Array.length)
|
|
90
|
+
for (let i = 0; i < int16Array.length; i++)
|
|
91
|
+
float32Data[i] = int16Array[i] / 32768.0
|
|
92
|
+
|
|
93
|
+
/* start capture first */
|
|
94
|
+
if (this.captureNode !== null) {
|
|
95
|
+
this.captureNode?.port.postMessage({
|
|
96
|
+
type: "start-capture",
|
|
97
|
+
chunkId,
|
|
98
|
+
expectedSamples: int16Array.length
|
|
99
|
+
})
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/* small delay to ensure capture is ready before sending data */
|
|
103
|
+
setTimeout(() => {
|
|
104
|
+
/* send input to source node */
|
|
105
|
+
this.sourceNode?.port.postMessage({
|
|
106
|
+
type: "input-chunk",
|
|
107
|
+
chunkId,
|
|
108
|
+
data: { pcmData: float32Data, channels: this.channels }
|
|
109
|
+
}, [ float32Data.buffer ])
|
|
110
|
+
}, 5)
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
clearTimeout(timeout)
|
|
114
|
+
if (this.captureNode !== null)
|
|
115
|
+
this.pendingPromises.delete(chunkId)
|
|
116
|
+
reject(new Error(`failed to process chunk: ${error}`))
|
|
117
|
+
}
|
|
118
|
+
})
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
public async destroy (): Promise<void> {
|
|
122
|
+
/* reject all pending promises */
|
|
123
|
+
try {
|
|
124
|
+
this.pendingPromises.forEach(({ reject, timeout }) => {
|
|
125
|
+
clearTimeout(timeout)
|
|
126
|
+
reject(new Error("WebAudio destroyed"))
|
|
127
|
+
})
|
|
128
|
+
this.pendingPromises.clear()
|
|
129
|
+
}
|
|
130
|
+
catch (_err) {
|
|
131
|
+
/* ignored - cleanup during shutdown */
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/* disconnect nodes */
|
|
135
|
+
if (this.sourceNode !== null) {
|
|
136
|
+
this.sourceNode.disconnect()
|
|
137
|
+
this.sourceNode = null
|
|
138
|
+
}
|
|
139
|
+
if (this.captureNode !== null) {
|
|
140
|
+
this.captureNode.disconnect()
|
|
141
|
+
this.captureNode = null
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/* stop context */
|
|
145
|
+
await this.audioContext.close()
|
|
146
|
+
}
|
|
147
|
+
}
|
|
@@ -10,7 +10,7 @@ import { EventEmitter } from "node:events"
|
|
|
10
10
|
|
|
11
11
|
/* external dependencies */
|
|
12
12
|
import { DateTime, Duration } from "luxon"
|
|
13
|
-
import CBOR
|
|
13
|
+
import * as CBOR from "cbor2"
|
|
14
14
|
import * as IntervalTree from "node-interval-tree"
|
|
15
15
|
|
|
16
16
|
/* internal dependencies */
|
|
@@ -86,6 +86,31 @@ export function convertF32ToBuf (arr: Float32Array) {
|
|
|
86
86
|
return Buffer.from(int16Array.buffer)
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
/* helper function: convert Buffer in PCM/I16 to Int16Array */
|
|
90
|
+
export function convertBufToI16 (buf: Buffer, littleEndian = true) {
|
|
91
|
+
if (buf.length % 2 !== 0)
|
|
92
|
+
throw new Error("buffer length must be even for 16-bit samples")
|
|
93
|
+
const dataView = new DataView(buf.buffer, buf.byteOffset, buf.byteLength)
|
|
94
|
+
const arr = new Int16Array(buf.length / 2)
|
|
95
|
+
for (let i = 0; i < buf.length / 2; i++)
|
|
96
|
+
arr[i] = dataView.getInt16(i * 2, littleEndian)
|
|
97
|
+
return arr
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/* helper function: convert In16Array in PCM/I16 to Buffer */
|
|
101
|
+
export function convertI16ToBuf (arr: Int16Array, littleEndian = true) {
|
|
102
|
+
if (arr.length === 0)
|
|
103
|
+
return Buffer.alloc(0)
|
|
104
|
+
const buf = Buffer.allocUnsafe(arr.length * 2)
|
|
105
|
+
for (let i = 0; i < arr.length; i++) {
|
|
106
|
+
if (littleEndian)
|
|
107
|
+
buf.writeInt16LE(arr[i], i * 2)
|
|
108
|
+
else
|
|
109
|
+
buf.writeInt16BE(arr[i], i * 2)
|
|
110
|
+
}
|
|
111
|
+
return buf
|
|
112
|
+
}
|
|
113
|
+
|
|
89
114
|
/* create a Duplex/Transform stream which has
|
|
90
115
|
object-mode on Writable side and buffer/string-mode on Readable side */
|
|
91
116
|
export function createTransformStreamForWritableSide () {
|
|
@@ -209,25 +234,16 @@ export class SingleQueue<T> extends EventEmitter {
|
|
|
209
234
|
}
|
|
210
235
|
read () {
|
|
211
236
|
return new Promise<T>((resolve, reject) => {
|
|
212
|
-
const consume = () =>
|
|
213
|
-
|
|
214
|
-
|
|
237
|
+
const consume = () =>
|
|
238
|
+
this.queue.length > 0 ? this.queue.pop()! : null
|
|
239
|
+
const tryToConsume = () => {
|
|
240
|
+
const item = consume()
|
|
241
|
+
if (item !== null)
|
|
242
|
+
resolve(item)
|
|
215
243
|
else
|
|
216
|
-
|
|
217
|
-
}
|
|
218
|
-
let item = consume()
|
|
219
|
-
if (item !== null)
|
|
220
|
-
resolve(item)
|
|
221
|
-
else {
|
|
222
|
-
const tryToConsume = () => {
|
|
223
|
-
item = consume()
|
|
224
|
-
if (item !== null)
|
|
225
|
-
resolve(item)
|
|
226
|
-
else
|
|
227
|
-
this.once("dequeue", tryToConsume)
|
|
228
|
-
}
|
|
229
|
-
this.once("dequeue", tryToConsume)
|
|
244
|
+
this.once("dequeue", tryToConsume)
|
|
230
245
|
}
|
|
246
|
+
tryToConsume()
|
|
231
247
|
})
|
|
232
248
|
}
|
|
233
249
|
}
|
|
@@ -256,22 +272,16 @@ export class DoubleQueue<T0, T1> extends EventEmitter {
|
|
|
256
272
|
const item1 = this.queue1.pop() as T1
|
|
257
273
|
return [ item0, item1 ]
|
|
258
274
|
}
|
|
259
|
-
|
|
260
|
-
return null
|
|
275
|
+
return null
|
|
261
276
|
}
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if (items !== null)
|
|
269
|
-
resolve(items)
|
|
270
|
-
else
|
|
271
|
-
this.once("dequeue", tryToConsume)
|
|
272
|
-
}
|
|
273
|
-
this.once("dequeue", tryToConsume)
|
|
277
|
+
const tryToConsume = () => {
|
|
278
|
+
const items = consume()
|
|
279
|
+
if (items !== null)
|
|
280
|
+
resolve(items)
|
|
281
|
+
else
|
|
282
|
+
this.once("dequeue", tryToConsume)
|
|
274
283
|
}
|
|
284
|
+
tryToConsume()
|
|
275
285
|
})
|
|
276
286
|
}
|
|
277
287
|
}
|
|
@@ -520,3 +530,86 @@ export class TimeStore<T> extends EventEmitter {
|
|
|
520
530
|
this.tree = new IntervalTree.IntervalTree<TimeStoreInterval<T>>()
|
|
521
531
|
}
|
|
522
532
|
}
|
|
533
|
+
|
|
534
|
+
/* asynchronous queue */
|
|
535
|
+
export class AsyncQueue<T> {
|
|
536
|
+
private queue: Array<T | null> = []
|
|
537
|
+
private resolvers: ((v: T | null) => void)[] = []
|
|
538
|
+
write (v: T | null) {
|
|
539
|
+
const resolve = this.resolvers.shift()
|
|
540
|
+
if (resolve)
|
|
541
|
+
resolve(v)
|
|
542
|
+
else
|
|
543
|
+
this.queue.push(v)
|
|
544
|
+
}
|
|
545
|
+
async read () {
|
|
546
|
+
if (this.queue.length > 0)
|
|
547
|
+
return this.queue.shift()!
|
|
548
|
+
else
|
|
549
|
+
return new Promise<T | null>((resolve) => this.resolvers.push(resolve))
|
|
550
|
+
}
|
|
551
|
+
destroy () {
|
|
552
|
+
for (const resolve of this.resolvers)
|
|
553
|
+
resolve(null)
|
|
554
|
+
this.resolvers = []
|
|
555
|
+
this.queue = []
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
/* process Int16Array in fixed-size segments */
|
|
560
|
+
export async function processInt16ArrayInSegments (
|
|
561
|
+
data: Int16Array<ArrayBuffer>,
|
|
562
|
+
segmentSize: number,
|
|
563
|
+
processor: (segment: Int16Array<ArrayBuffer>) => Promise<Int16Array<ArrayBuffer>>
|
|
564
|
+
): Promise<Int16Array<ArrayBuffer>> {
|
|
565
|
+
/* process full segments */
|
|
566
|
+
let i = 0
|
|
567
|
+
while ((i + segmentSize) <= data.length) {
|
|
568
|
+
const segment = data.slice(i, i + segmentSize)
|
|
569
|
+
const result = await processor(segment)
|
|
570
|
+
data.set(result, i)
|
|
571
|
+
i += segmentSize
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/* process final partial segment if it exists */
|
|
575
|
+
if (i < data.length) {
|
|
576
|
+
const len = data.length - i
|
|
577
|
+
const segment = new Int16Array(segmentSize)
|
|
578
|
+
segment.set(data.slice(i), 0)
|
|
579
|
+
segment.fill(0, len, segmentSize)
|
|
580
|
+
const result = await processor(segment)
|
|
581
|
+
data.set(result.slice(0, len), i)
|
|
582
|
+
}
|
|
583
|
+
return data
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/* cached regular expression class */
|
|
587
|
+
export class CachedRegExp {
|
|
588
|
+
private cache = new Map<string, RegExp>()
|
|
589
|
+
compile (pattern: string): RegExp | null {
|
|
590
|
+
if (this.cache.has(pattern))
|
|
591
|
+
return this.cache.get(pattern)!
|
|
592
|
+
try {
|
|
593
|
+
const regex = new RegExp(pattern)
|
|
594
|
+
this.cache.set(pattern, regex)
|
|
595
|
+
return regex
|
|
596
|
+
}
|
|
597
|
+
catch (_error) {
|
|
598
|
+
return null
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
clear (): void {
|
|
602
|
+
this.cache.clear()
|
|
603
|
+
}
|
|
604
|
+
size (): number {
|
|
605
|
+
return this.cache.size
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/* helper functions for linear/decibel conversions */
|
|
610
|
+
export function lin2dB (x: number): number {
|
|
611
|
+
return 20 * Math.log10(Math.max(x, 1e-12))
|
|
612
|
+
}
|
|
613
|
+
export function dB2lin (db: number): number {
|
|
614
|
+
return Math.pow(10, db / 20)
|
|
615
|
+
}
|