vocal-stack 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 vocal-stack contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,269 @@
1
+ # vocal-stack
2
+
3
+ > High-performance utility library for Voice AI agents
4
+
5
+ **vocal-stack** solves the "last mile" challenges when building production-ready voice AI agents: text sanitization for TTS, latency management with smart filler injection, and performance monitoring.
6
+
7
+ Platform-agnostic • Streaming-first • TypeScript strict • 90%+ test coverage
8
+
9
+ ---
10
+
11
+ ## Features
12
+
13
+ ### 🧹 **Text Sanitizer**
14
+ Transform LLM output into TTS-optimized strings
15
+ - Strip markdown, URLs, code blocks, complex punctuation
16
+ - Plugin-based system for extensibility
17
+ - Streaming and sync APIs
18
+
19
+ ### ⚡ **Flow Control**
20
+ Manage latency with intelligent filler injection
21
+ - Detect stream stalls (default 700ms threshold)
22
+ - Inject filler phrases ("um", "let me think") only before first chunk
23
+ - Handle barge-in with state machine and buffer management
24
+ - Dual API: high-level stream wrapper + low-level event-based
25
+
26
+ ### 📊 **Latency Monitoring**
27
+ Track and profile voice agent performance
28
+ - Measure time to first token (TTFT) and total duration
29
+ - Calculate percentiles (p50, p95, p99)
30
+ - Export metrics (JSON, CSV)
31
+ - Real-time monitoring with callbacks
32
+
33
+ ---
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ npm install vocal-stack
39
+ ```
40
+
41
+ ```bash
42
+ yarn add vocal-stack
43
+ ```
44
+
45
+ ```bash
46
+ pnpm add vocal-stack
47
+ ```
48
+
49
+ **Requirements**: Node.js 18+
50
+
51
+ ## Quick Start
52
+
53
+ ### Text Sanitization
54
+
55
+ ```typescript
56
+ import { sanitizeForSpeech } from 'vocal-stack';
57
+
58
+ const markdown = '## Hello World\nCheck out [this link](https://example.com)';
59
+ const speakable = sanitizeForSpeech(markdown);
60
+ // Output: "Hello World Check out this link"
61
+ ```
62
+
63
+ ### Flow Control
64
+
65
+ ```typescript
66
+ import { withFlowControl } from 'vocal-stack';
67
+
68
+ for await (const chunk of withFlowControl(llmStream)) {
69
+ sendToTTS(chunk);
70
+ }
71
+ ```
72
+
73
+ ### Latency Monitoring
74
+
75
+ ```typescript
76
+ import { VoiceAuditor } from 'vocal-stack';
77
+
78
+ const auditor = new VoiceAuditor();
79
+
80
+ for await (const chunk of auditor.track('request-123', llmStream)) {
81
+ sendToTTS(chunk);
82
+ }
83
+
84
+ console.log(auditor.getSummary()); // { avgTimeToFirstToken: 150, ... }
85
+ ```
86
+
87
+ ### Composable Architecture
88
+
89
+ ```typescript
90
+ import { SpeechSanitizer, FlowController, VoiceAuditor } from 'vocal-stack';
91
+
92
+ const sanitizer = new SpeechSanitizer({ rules: ['markdown', 'urls'] });
93
+ const flowController = new FlowController({
94
+ stallThresholdMs: 700,
95
+ onFillerInjected: (filler) => sendToTTS(filler),
96
+ });
97
+ const auditor = new VoiceAuditor({ enableRealtime: true });
98
+
99
+ // Compose: LLM → Sanitize → Flow Control → Monitor → TTS
100
+ async function processVoiceStream(llmStream: AsyncIterable<string>) {
101
+ const sanitized = sanitizer.sanitizeStream(llmStream);
102
+ const controlled = flowController.wrap(sanitized);
103
+ const monitored = auditor.track('req-123', controlled);
104
+
105
+ for await (const chunk of monitored) {
106
+ await sendToTTS(chunk);
107
+ }
108
+
109
+ console.log('Performance:', auditor.getSummary());
110
+ }
111
+ ```
112
+
113
+ ## API Overview
114
+
115
+ ### Sanitizer Module
116
+
117
+ **High-Level API:**
118
+ ```typescript
119
+ import { sanitizeForSpeech } from 'vocal-stack';
120
+
121
+ const clean = sanitizeForSpeech(text); // Quick one-liner
122
+ ```
123
+
124
+ **Class-Based API:**
125
+ ```typescript
126
+ import { SpeechSanitizer } from 'vocal-stack';
127
+
128
+ const sanitizer = new SpeechSanitizer({
129
+ rules: ['markdown', 'urls', 'code-blocks', 'punctuation'],
130
+ customReplacements: new Map([['https://', 'link']]),
131
+ });
132
+
133
+ // Sync
134
+ const result = sanitizer.sanitize(text);
135
+
136
+ // Streaming
137
+ for await (const chunk of sanitizer.sanitizeStream(llmStream)) {
138
+ console.log(chunk);
139
+ }
140
+ ```
141
+
142
+ ### Flow Module
143
+
144
+ **High-Level API:**
145
+ ```typescript
146
+ import { FlowController, withFlowControl } from 'vocal-stack';
147
+
148
+ // Convenience function
149
+ for await (const chunk of withFlowControl(llmStream)) {
150
+ sendToTTS(chunk);
151
+ }
152
+
153
+ // Class-based with configuration
154
+ const controller = new FlowController({
155
+ stallThresholdMs: 700,
156
+ fillerPhrases: ['um', 'let me think'],
157
+ enableFillers: true,
158
+ onFillerInjected: (filler) => sendToTTS(filler),
159
+ });
160
+
161
+ for await (const chunk of controller.wrap(llmStream)) {
162
+ sendToTTS(chunk);
163
+ }
164
+
165
+ // Barge-in support
166
+ controller.interrupt();
167
+ ```
168
+
169
+ **Low-Level API:**
170
+ ```typescript
171
+ import { FlowManager } from 'vocal-stack';
172
+
173
+ const manager = new FlowManager({ stallThresholdMs: 700 });
174
+
175
+ manager.on((event) => {
176
+ switch (event.type) {
177
+ case 'stall-detected':
178
+ console.log(`Stalled for ${event.durationMs}ms`);
179
+ break;
180
+ case 'filler-injected':
181
+ sendToTTS(event.filler);
182
+ break;
183
+ }
184
+ });
185
+
186
+ manager.start();
187
+ for await (const chunk of llmStream) {
188
+ manager.processChunk(chunk);
189
+ sendToTTS(chunk);
190
+ }
191
+ manager.complete();
192
+ ```
193
+
194
+ ### Monitor Module
195
+
196
+ ```typescript
197
+ import { VoiceAuditor } from 'vocal-stack';
198
+
199
+ const auditor = new VoiceAuditor({
200
+ enableRealtime: true,
201
+ onMetric: (metric) => console.log(metric),
202
+ });
203
+
204
+ // Automatic tracking with stream wrapper
205
+ for await (const chunk of auditor.track('req-123', llmStream)) {
206
+ sendToTTS(chunk);
207
+ }
208
+
209
+ // Get statistics
210
+ const summary = auditor.getSummary();
211
+ console.log(summary);
212
+ // {
213
+ // count: 10,
214
+ // avgTimeToFirstToken: 150,
215
+ // p50TimeToFirstToken: 120,
216
+ // p95TimeToFirstToken: 300,
217
+ // ...
218
+ // }
219
+
220
+ // Export data
221
+ const json = auditor.export('json');
222
+ const csv = auditor.export('csv');
223
+ ```
224
+
225
+ ---
226
+
227
+ ## Tree-Shakeable Imports
228
+
229
+ ```typescript
230
+ // Import only what you need
231
+ import { SpeechSanitizer } from 'vocal-stack/sanitizer';
232
+ import { FlowController } from 'vocal-stack/flow';
233
+ import { VoiceAuditor } from 'vocal-stack/monitor';
234
+ ```
235
+
236
+ ---
237
+
238
+ ## Architecture
239
+
240
+ vocal-stack is built with three independent, composable modules:
241
+
242
+ ```
243
+ LLM Stream → Sanitizer → Flow Controller → Monitor → TTS
244
+ ```
245
+
246
+ - **Sanitizer**: Cleans text for TTS
247
+ - **Flow Controller**: Manages latency and injects fillers
248
+ - **Monitor**: Tracks performance metrics
249
+
250
+ Each module works standalone or together. Use only what you need.
251
+
252
+ ---
253
+
254
+ ## Documentation
255
+
256
+ - API Reference (coming soon)
257
+ - Examples in `./examples/`
258
+
259
+ ---
260
+
261
+ ## License
262
+
263
+ MIT
264
+
265
+ ---
266
+
267
+ ## Contributing
268
+
269
+ Contributions welcome! Please open an issue or PR.