vocal-stack 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +269 -0
- package/dist/flow/index.cjs +571 -0
- package/dist/flow/index.cjs.map +1 -0
- package/dist/flow/index.d.cts +337 -0
- package/dist/flow/index.d.ts +337 -0
- package/dist/flow/index.js +559 -0
- package/dist/flow/index.js.map +1 -0
- package/dist/index.cjs +1026 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +32 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +1003 -0
- package/dist/index.js.map +1 -0
- package/dist/monitor/index.cjs +291 -0
- package/dist/monitor/index.cjs.map +1 -0
- package/dist/monitor/index.d.cts +122 -0
- package/dist/monitor/index.d.ts +122 -0
- package/dist/monitor/index.js +286 -0
- package/dist/monitor/index.js.map +1 -0
- package/dist/sanitizer/index.cjs +190 -0
- package/dist/sanitizer/index.cjs.map +1 -0
- package/dist/sanitizer/index.d.cts +83 -0
- package/dist/sanitizer/index.d.ts +83 -0
- package/dist/sanitizer/index.js +186 -0
- package/dist/sanitizer/index.js.map +1 -0
- package/package.json +90 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 vocal-stack contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# vocal-stack
|
|
2
|
+
|
|
3
|
+
> High-performance utility library for Voice AI agents
|
|
4
|
+
|
|
5
|
+
**vocal-stack** solves the "last mile" challenges when building production-ready voice AI agents: text sanitization for TTS, latency management with smart filler injection, and performance monitoring.
|
|
6
|
+
|
|
7
|
+
Platform-agnostic • Streaming-first • TypeScript strict • 90%+ test coverage
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
### 🧹 **Text Sanitizer**
|
|
14
|
+
Transform LLM output into TTS-optimized strings
|
|
15
|
+
- Strip markdown, URLs, code blocks, complex punctuation
|
|
16
|
+
- Plugin-based system for extensibility
|
|
17
|
+
- Streaming and sync APIs
|
|
18
|
+
|
|
19
|
+
### ⚡ **Flow Control**
|
|
20
|
+
Manage latency with intelligent filler injection
|
|
21
|
+
- Detect stream stalls (default 700ms threshold)
|
|
22
|
+
- Inject filler phrases ("um", "let me think") only before first chunk
|
|
23
|
+
- Handle barge-in with state machine and buffer management
|
|
24
|
+
- Dual API: high-level stream wrapper + low-level event-based
|
|
25
|
+
|
|
26
|
+
### 📊 **Latency Monitoring**
|
|
27
|
+
Track and profile voice agent performance
|
|
28
|
+
- Measure time to first token (TTFT) and total duration
|
|
29
|
+
- Calculate percentiles (p50, p95, p99)
|
|
30
|
+
- Export metrics (JSON, CSV)
|
|
31
|
+
- Real-time monitoring with callbacks
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npm install vocal-stack
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
yarn add vocal-stack
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pnpm add vocal-stack
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Requirements**: Node.js 18+
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
### Text Sanitization
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
import { sanitizeForSpeech } from 'vocal-stack';
|
|
57
|
+
|
|
58
|
+
const markdown = '## Hello World\nCheck out [this link](https://example.com)';
|
|
59
|
+
const speakable = sanitizeForSpeech(markdown);
|
|
60
|
+
// Output: "Hello World Check out this link"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Flow Control
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
import { withFlowControl } from 'vocal-stack';
|
|
67
|
+
|
|
68
|
+
for await (const chunk of withFlowControl(llmStream)) {
|
|
69
|
+
sendToTTS(chunk);
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Latency Monitoring
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
import { VoiceAuditor } from 'vocal-stack';
|
|
77
|
+
|
|
78
|
+
const auditor = new VoiceAuditor();
|
|
79
|
+
|
|
80
|
+
for await (const chunk of auditor.track('request-123', llmStream)) {
|
|
81
|
+
sendToTTS(chunk);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
console.log(auditor.getSummary()); // { avgTimeToFirstToken: 150, ... }
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Composable Architecture
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
import { SpeechSanitizer, FlowController, VoiceAuditor } from 'vocal-stack';
|
|
91
|
+
|
|
92
|
+
const sanitizer = new SpeechSanitizer({ rules: ['markdown', 'urls'] });
|
|
93
|
+
const flowController = new FlowController({
|
|
94
|
+
stallThresholdMs: 700,
|
|
95
|
+
onFillerInjected: (filler) => sendToTTS(filler),
|
|
96
|
+
});
|
|
97
|
+
const auditor = new VoiceAuditor({ enableRealtime: true });
|
|
98
|
+
|
|
99
|
+
// Compose: LLM → Sanitize → Flow Control → Monitor → TTS
|
|
100
|
+
async function processVoiceStream(llmStream: AsyncIterable<string>) {
|
|
101
|
+
const sanitized = sanitizer.sanitizeStream(llmStream);
|
|
102
|
+
const controlled = flowController.wrap(sanitized);
|
|
103
|
+
const monitored = auditor.track('req-123', controlled);
|
|
104
|
+
|
|
105
|
+
for await (const chunk of monitored) {
|
|
106
|
+
await sendToTTS(chunk);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
console.log('Performance:', auditor.getSummary());
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## API Overview
|
|
114
|
+
|
|
115
|
+
### Sanitizer Module
|
|
116
|
+
|
|
117
|
+
**High-Level API:**
|
|
118
|
+
```typescript
|
|
119
|
+
import { sanitizeForSpeech } from 'vocal-stack';
|
|
120
|
+
|
|
121
|
+
const clean = sanitizeForSpeech(text); // Quick one-liner
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Class-Based API:**
|
|
125
|
+
```typescript
|
|
126
|
+
import { SpeechSanitizer } from 'vocal-stack';
|
|
127
|
+
|
|
128
|
+
const sanitizer = new SpeechSanitizer({
|
|
129
|
+
rules: ['markdown', 'urls', 'code-blocks', 'punctuation'],
|
|
130
|
+
customReplacements: new Map([['https://', 'link']]),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// Sync
|
|
134
|
+
const result = sanitizer.sanitize(text);
|
|
135
|
+
|
|
136
|
+
// Streaming
|
|
137
|
+
for await (const chunk of sanitizer.sanitizeStream(llmStream)) {
|
|
138
|
+
console.log(chunk);
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Flow Module
|
|
143
|
+
|
|
144
|
+
**High-Level API:**
|
|
145
|
+
```typescript
|
|
146
|
+
import { FlowController, withFlowControl } from 'vocal-stack';
|
|
147
|
+
|
|
148
|
+
// Convenience function
|
|
149
|
+
for await (const chunk of withFlowControl(llmStream)) {
|
|
150
|
+
sendToTTS(chunk);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Class-based with configuration
|
|
154
|
+
const controller = new FlowController({
|
|
155
|
+
stallThresholdMs: 700,
|
|
156
|
+
fillerPhrases: ['um', 'let me think'],
|
|
157
|
+
enableFillers: true,
|
|
158
|
+
onFillerInjected: (filler) => sendToTTS(filler),
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
for await (const chunk of controller.wrap(llmStream)) {
|
|
162
|
+
sendToTTS(chunk);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Barge-in support
|
|
166
|
+
controller.interrupt();
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Low-Level API:**
|
|
170
|
+
```typescript
|
|
171
|
+
import { FlowManager } from 'vocal-stack';
|
|
172
|
+
|
|
173
|
+
const manager = new FlowManager({ stallThresholdMs: 700 });
|
|
174
|
+
|
|
175
|
+
manager.on((event) => {
|
|
176
|
+
switch (event.type) {
|
|
177
|
+
case 'stall-detected':
|
|
178
|
+
console.log(`Stalled for ${event.durationMs}ms`);
|
|
179
|
+
break;
|
|
180
|
+
case 'filler-injected':
|
|
181
|
+
sendToTTS(event.filler);
|
|
182
|
+
break;
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
manager.start();
|
|
187
|
+
for await (const chunk of llmStream) {
|
|
188
|
+
manager.processChunk(chunk);
|
|
189
|
+
sendToTTS(chunk);
|
|
190
|
+
}
|
|
191
|
+
manager.complete();
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Monitor Module
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
import { VoiceAuditor } from 'vocal-stack';
|
|
198
|
+
|
|
199
|
+
const auditor = new VoiceAuditor({
|
|
200
|
+
enableRealtime: true,
|
|
201
|
+
onMetric: (metric) => console.log(metric),
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// Automatic tracking with stream wrapper
|
|
205
|
+
for await (const chunk of auditor.track('req-123', llmStream)) {
|
|
206
|
+
sendToTTS(chunk);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Get statistics
|
|
210
|
+
const summary = auditor.getSummary();
|
|
211
|
+
console.log(summary);
|
|
212
|
+
// {
|
|
213
|
+
// count: 10,
|
|
214
|
+
// avgTimeToFirstToken: 150,
|
|
215
|
+
// p50TimeToFirstToken: 120,
|
|
216
|
+
// p95TimeToFirstToken: 300,
|
|
217
|
+
// ...
|
|
218
|
+
// }
|
|
219
|
+
|
|
220
|
+
// Export data
|
|
221
|
+
const json = auditor.export('json');
|
|
222
|
+
const csv = auditor.export('csv');
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Tree-Shakeable Imports
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
// Import only what you need
|
|
231
|
+
import { SpeechSanitizer } from 'vocal-stack/sanitizer';
|
|
232
|
+
import { FlowController } from 'vocal-stack/flow';
|
|
233
|
+
import { VoiceAuditor } from 'vocal-stack/monitor';
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Architecture
|
|
239
|
+
|
|
240
|
+
vocal-stack is built with three independent, composable modules:
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
LLM Stream → Sanitizer → Flow Controller → Monitor → TTS
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
- **Sanitizer**: Cleans text for TTS
|
|
247
|
+
- **Flow Controller**: Manages latency and injects fillers
|
|
248
|
+
- **Monitor**: Tracks performance metrics
|
|
249
|
+
|
|
250
|
+
Each module works standalone or together. Use only what you need.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Documentation
|
|
255
|
+
|
|
256
|
+
- API Reference (coming soon)
|
|
257
|
+
- Examples in `./examples/`
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## License
|
|
262
|
+
|
|
263
|
+
MIT
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Contributing
|
|
268
|
+
|
|
269
|
+
Contributions welcome! Please open an issue or PR.
|