@makefinks/daemon 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +126 -0
- package/dist/cli.js +22 -0
- package/package.json +79 -0
- package/src/ai/agent-turn-runner.ts +130 -0
- package/src/ai/daemon-ai.ts +403 -0
- package/src/ai/exa-client.ts +21 -0
- package/src/ai/exa-fetch-cache.ts +104 -0
- package/src/ai/model-config.ts +99 -0
- package/src/ai/sanitize-messages.ts +83 -0
- package/src/ai/system-prompt.ts +363 -0
- package/src/ai/tools/fetch-urls.ts +187 -0
- package/src/ai/tools/grounding-manager.ts +94 -0
- package/src/ai/tools/index.ts +52 -0
- package/src/ai/tools/read-file.ts +100 -0
- package/src/ai/tools/render-url.ts +275 -0
- package/src/ai/tools/run-bash.ts +224 -0
- package/src/ai/tools/subagents.ts +195 -0
- package/src/ai/tools/todo-manager.ts +150 -0
- package/src/ai/tools/web-search.ts +91 -0
- package/src/app/App.tsx +711 -0
- package/src/app/components/AppOverlays.tsx +131 -0
- package/src/app/components/AvatarLayer.tsx +51 -0
- package/src/app/components/ConversationPane.tsx +476 -0
- package/src/avatar/DaemonAvatarRenderable.ts +343 -0
- package/src/avatar/daemon-avatar-rig.ts +1165 -0
- package/src/avatar-preview.ts +186 -0
- package/src/cli.ts +26 -0
- package/src/components/ApiKeyInput.tsx +99 -0
- package/src/components/ApiKeyStep.tsx +95 -0
- package/src/components/ApprovalPicker.tsx +109 -0
- package/src/components/ContentBlockView.tsx +141 -0
- package/src/components/DaemonText.tsx +34 -0
- package/src/components/DeviceMenu.tsx +166 -0
- package/src/components/GroundingBadge.tsx +21 -0
- package/src/components/GroundingMenu.tsx +310 -0
- package/src/components/HotkeysPane.tsx +115 -0
- package/src/components/InlineStatusIndicator.tsx +106 -0
- package/src/components/ModelMenu.tsx +411 -0
- package/src/components/OnboardingOverlay.tsx +446 -0
- package/src/components/ProviderMenu.tsx +177 -0
- package/src/components/SessionMenu.tsx +297 -0
- package/src/components/SettingsMenu.tsx +291 -0
- package/src/components/StatusBar.tsx +126 -0
- package/src/components/TokenUsageDisplay.tsx +92 -0
- package/src/components/ToolCallView.tsx +113 -0
- package/src/components/TypingInputBar.tsx +131 -0
- package/src/components/tool-layouts/components.tsx +120 -0
- package/src/components/tool-layouts/defaults.ts +9 -0
- package/src/components/tool-layouts/index.ts +22 -0
- package/src/components/tool-layouts/layouts/bash.ts +110 -0
- package/src/components/tool-layouts/layouts/grounding.tsx +98 -0
- package/src/components/tool-layouts/layouts/index.ts +8 -0
- package/src/components/tool-layouts/layouts/read-file.ts +59 -0
- package/src/components/tool-layouts/layouts/subagent.tsx +118 -0
- package/src/components/tool-layouts/layouts/system-info.ts +8 -0
- package/src/components/tool-layouts/layouts/todo.tsx +139 -0
- package/src/components/tool-layouts/layouts/url-tools.ts +220 -0
- package/src/components/tool-layouts/layouts/web-search.ts +110 -0
- package/src/components/tool-layouts/registry.ts +17 -0
- package/src/components/tool-layouts/types.ts +94 -0
- package/src/hooks/daemon-event-handlers.ts +944 -0
- package/src/hooks/keyboard-handlers.ts +399 -0
- package/src/hooks/menu-navigation.ts +147 -0
- package/src/hooks/use-app-audio-devices-loader.ts +71 -0
- package/src/hooks/use-app-callbacks.ts +202 -0
- package/src/hooks/use-app-context-builder.ts +159 -0
- package/src/hooks/use-app-display-state.ts +162 -0
- package/src/hooks/use-app-menus.ts +51 -0
- package/src/hooks/use-app-model-pricing-loader.ts +45 -0
- package/src/hooks/use-app-model.ts +123 -0
- package/src/hooks/use-app-openrouter-models-loader.ts +44 -0
- package/src/hooks/use-app-openrouter-provider-loader.ts +35 -0
- package/src/hooks/use-app-preferences-bootstrap.ts +212 -0
- package/src/hooks/use-app-sessions.ts +105 -0
- package/src/hooks/use-app-settings.ts +62 -0
- package/src/hooks/use-conversation-manager.ts +163 -0
- package/src/hooks/use-copy-on-select.ts +50 -0
- package/src/hooks/use-daemon-events.ts +396 -0
- package/src/hooks/use-daemon-keyboard.ts +397 -0
- package/src/hooks/use-grounding.ts +46 -0
- package/src/hooks/use-input-history.ts +92 -0
- package/src/hooks/use-menu-keyboard.ts +93 -0
- package/src/hooks/use-playwright-notification.ts +23 -0
- package/src/hooks/use-reasoning-animation.ts +97 -0
- package/src/hooks/use-response-timer.ts +55 -0
- package/src/hooks/use-tool-approval.tsx +202 -0
- package/src/hooks/use-typing-mode.ts +137 -0
- package/src/hooks/use-voice-dependencies-notification.ts +37 -0
- package/src/index.tsx +48 -0
- package/src/scripts/setup-browsers.ts +42 -0
- package/src/state/app-context.tsx +160 -0
- package/src/state/daemon-events.ts +67 -0
- package/src/state/daemon-state.ts +493 -0
- package/src/state/migrations/001-init.ts +33 -0
- package/src/state/migrations/index.ts +8 -0
- package/src/state/model-history-store.ts +45 -0
- package/src/state/runtime-context.ts +21 -0
- package/src/state/session-store.ts +359 -0
- package/src/types/index.ts +405 -0
- package/src/types/theme.ts +52 -0
- package/src/ui/constants.ts +157 -0
- package/src/utils/clipboard.ts +89 -0
- package/src/utils/debug-logger.ts +69 -0
- package/src/utils/formatters.ts +242 -0
- package/src/utils/js-rendering.ts +77 -0
- package/src/utils/markdown-tables.ts +234 -0
- package/src/utils/model-metadata.ts +191 -0
- package/src/utils/openrouter-endpoints.ts +212 -0
- package/src/utils/openrouter-models.ts +205 -0
- package/src/utils/openrouter-pricing.ts +59 -0
- package/src/utils/openrouter-reported-cost.ts +16 -0
- package/src/utils/paste.ts +33 -0
- package/src/utils/preferences.ts +289 -0
- package/src/utils/text-fragment.ts +39 -0
- package/src/utils/tool-output-preview.ts +250 -0
- package/src/utils/voice-dependencies.ts +107 -0
- package/src/utils/workspace-manager.ts +85 -0
- package/src/voice/audio-recorder.ts +579 -0
- package/src/voice/mic-level.ts +35 -0
- package/src/voice/tts/openai-tts-stream.ts +222 -0
- package/src/voice/tts/speech-controller.ts +64 -0
- package/src/voice/tts/tts-player.ts +257 -0
- package/src/voice/voice-input-controller.ts +96 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI TTS client using the streaming API.
|
|
3
|
+
* Generates speech audio and emits PCM chunks as they arrive for real-time playback.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { EventEmitter } from "node:events";
|
|
7
|
+
import OpenAI from "openai";
|
|
8
|
+
import type { SpeechCreateParams } from "openai/resources/audio/speech";
|
|
9
|
+
|
|
10
|
+
export interface OpenAITTSOptions {
|
|
11
|
+
/** OpenAI API key (defaults to OPENAI_API_KEY env var) */
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
/** OpenAI speech model (default: gpt-4o-mini-tts) */
|
|
14
|
+
model?: SpeechCreateParams["model"];
|
|
15
|
+
/** Voice name (default: onyx) */
|
|
16
|
+
voice?: SpeechCreateParams["voice"];
|
|
17
|
+
/** Output format (default: pcm) */
|
|
18
|
+
outputFormat?: SpeechCreateParams["response_format"];
|
|
19
|
+
/** Speech speed multiplier (0.25 to 4.0) */
|
|
20
|
+
speed?: SpeechCreateParams["speed"];
|
|
21
|
+
/** Optional style instructions (works with gpt-4o-mini-tts, not tts-1/tts-1-hd) */
|
|
22
|
+
instructions?: SpeechCreateParams["instructions"];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface OpenAITTSStreamEvents {
|
|
26
|
+
audio: (chunk: Buffer) => void;
|
|
27
|
+
done: () => void;
|
|
28
|
+
error: (error: Error) => void;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const DEFAULT_MODEL: SpeechCreateParams["model"] = "gpt-4o-mini-tts";
|
|
32
|
+
const DEFAULT_VOICE: SpeechCreateParams["voice"] = "onyx";
|
|
33
|
+
const DEFAULT_FORMAT: SpeechCreateParams["response_format"] = "pcm";
|
|
34
|
+
const DEFAULT_SPEED: SpeechCreateParams["speed"] = 1.1;
|
|
35
|
+
|
|
36
|
+
const DEFAULT_INSTRUCTIONS =
|
|
37
|
+
"Speak with a confident and precise tone inspired by JARVIS from Iron Man with a purposeful and helpful cadence.";
|
|
38
|
+
|
|
39
|
+
interface SpeechAudioDeltaEvent {
|
|
40
|
+
type: "speech.audio.delta";
|
|
41
|
+
audio: string;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface SpeechAudioDoneEvent {
|
|
45
|
+
type: "speech.audio.done";
|
|
46
|
+
usage?: {
|
|
47
|
+
input_tokens: number;
|
|
48
|
+
output_tokens: number;
|
|
49
|
+
total_tokens: number;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
type SpeechStreamEvent = SpeechAudioDeltaEvent | SpeechAudioDoneEvent;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* OpenAI TTS client using the streaming API.
|
|
57
|
+
* Emits 'audio' events with Buffer chunks as they arrive, then 'done' when complete.
|
|
58
|
+
*/
|
|
59
|
+
export class OpenAITTSStream extends EventEmitter {
|
|
60
|
+
private client: OpenAI;
|
|
61
|
+
private model: SpeechCreateParams["model"];
|
|
62
|
+
private voice: SpeechCreateParams["voice"];
|
|
63
|
+
private outputFormat: SpeechCreateParams["response_format"];
|
|
64
|
+
private speed: SpeechCreateParams["speed"];
|
|
65
|
+
private instructions: SpeechCreateParams["instructions"];
|
|
66
|
+
private _isSpeaking = false;
|
|
67
|
+
private _aborted = false;
|
|
68
|
+
private abortController: AbortController | null = null;
|
|
69
|
+
|
|
70
|
+
constructor(options: OpenAITTSOptions = {}) {
|
|
71
|
+
super();
|
|
72
|
+
|
|
73
|
+
this.client = new OpenAI({
|
|
74
|
+
apiKey: options.apiKey,
|
|
75
|
+
});
|
|
76
|
+
this.model = options.model ?? DEFAULT_MODEL;
|
|
77
|
+
this.voice = options.voice ?? DEFAULT_VOICE;
|
|
78
|
+
this.outputFormat = options.outputFormat ?? DEFAULT_FORMAT;
|
|
79
|
+
this.speed = options.speed ?? DEFAULT_SPEED;
|
|
80
|
+
this.instructions = options.instructions ?? DEFAULT_INSTRUCTIONS;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
get isSpeaking(): boolean {
|
|
84
|
+
return this._isSpeaking;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async speak(text: string): Promise<void> {
|
|
88
|
+
if (!text.trim()) {
|
|
89
|
+
this.emit("done");
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
this.stop();
|
|
94
|
+
this._aborted = false;
|
|
95
|
+
this._isSpeaking = true;
|
|
96
|
+
this.abortController = new AbortController();
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const modelStr = String(this.model);
|
|
100
|
+
const supportsSSE = !modelStr.startsWith("tts-1");
|
|
101
|
+
|
|
102
|
+
const params: SpeechCreateParams = {
|
|
103
|
+
model: this.model,
|
|
104
|
+
input: text,
|
|
105
|
+
voice: this.voice,
|
|
106
|
+
response_format: this.outputFormat,
|
|
107
|
+
speed: this.speed,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
if (supportsSSE) {
|
|
111
|
+
params.stream_format = "sse";
|
|
112
|
+
params.instructions = this.instructions;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const response = await this.client.audio.speech.create(params, {
|
|
116
|
+
signal: this.abortController.signal,
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
const body = response.body;
|
|
120
|
+
if (!body) {
|
|
121
|
+
throw new Error("No response body from OpenAI TTS API");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (supportsSSE) {
|
|
125
|
+
await this.processSSEStream(body);
|
|
126
|
+
} else {
|
|
127
|
+
await this.processRawAudioStream(body);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
this._isSpeaking = false;
|
|
131
|
+
if (!this._aborted) {
|
|
132
|
+
this.emit("done");
|
|
133
|
+
}
|
|
134
|
+
} catch (error) {
|
|
135
|
+
this._isSpeaking = false;
|
|
136
|
+
if (!this._aborted) {
|
|
137
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
138
|
+
this.emit("error", err);
|
|
139
|
+
}
|
|
140
|
+
} finally {
|
|
141
|
+
this.abortController = null;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
private async processSSEStream(body: ReadableStream<Uint8Array>): Promise<void> {
|
|
146
|
+
const reader = body.getReader();
|
|
147
|
+
const decoder = new TextDecoder();
|
|
148
|
+
let buffer = "";
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
while (true) {
|
|
152
|
+
if (this._aborted) break;
|
|
153
|
+
|
|
154
|
+
const { done, value } = await reader.read();
|
|
155
|
+
if (done) break;
|
|
156
|
+
|
|
157
|
+
buffer += decoder.decode(value, { stream: true });
|
|
158
|
+
|
|
159
|
+
const lines = buffer.split("\n");
|
|
160
|
+
buffer = lines.pop() ?? "";
|
|
161
|
+
|
|
162
|
+
for (const line of lines) {
|
|
163
|
+
if (this._aborted) break;
|
|
164
|
+
|
|
165
|
+
const trimmed = line.trim();
|
|
166
|
+
if (!trimmed || trimmed.startsWith(":")) continue;
|
|
167
|
+
|
|
168
|
+
if (trimmed.startsWith("data: ")) {
|
|
169
|
+
const data = trimmed.slice(6);
|
|
170
|
+
if (data === "[DONE]") continue;
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const event = JSON.parse(data) as SpeechStreamEvent;
|
|
174
|
+
|
|
175
|
+
if (event.type === "speech.audio.delta") {
|
|
176
|
+
const audioBuffer = Buffer.from(event.audio, "base64");
|
|
177
|
+
this.emit("audio", audioBuffer);
|
|
178
|
+
}
|
|
179
|
+
} catch {}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
} finally {
|
|
184
|
+
reader.releaseLock();
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
private async processRawAudioStream(body: ReadableStream<Uint8Array>): Promise<void> {
|
|
189
|
+
const reader = body.getReader();
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
while (true) {
|
|
193
|
+
if (this._aborted) break;
|
|
194
|
+
|
|
195
|
+
const { done, value } = await reader.read();
|
|
196
|
+
if (done) break;
|
|
197
|
+
|
|
198
|
+
this.emit("audio", Buffer.from(value));
|
|
199
|
+
}
|
|
200
|
+
} finally {
|
|
201
|
+
reader.releaseLock();
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
stop(): void {
|
|
206
|
+
this._aborted = true;
|
|
207
|
+
|
|
208
|
+
if (this.abortController) {
|
|
209
|
+
try {
|
|
210
|
+
this.abortController.abort();
|
|
211
|
+
} catch {}
|
|
212
|
+
this.abortController = null;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
this._isSpeaking = false;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
destroy(): void {
|
|
219
|
+
this.stop();
|
|
220
|
+
this.removeAllListeners();
|
|
221
|
+
}
|
|
222
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
|
|
3
|
+
import { destroyTTSPlayer, getTTSPlayer, stopSpeaking } from "./tts-player";
|
|
4
|
+
|
|
5
|
+
export interface SpeechControllerOptions {
|
|
6
|
+
speed: number;
|
|
7
|
+
outputDeviceName?: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface SpeechControllerEvents {
|
|
11
|
+
audioLevel: (level: number) => void;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class SpeechController extends EventEmitter {
|
|
15
|
+
private abortController: AbortController | null = null;
|
|
16
|
+
|
|
17
|
+
async speak(text: string, options: SpeechControllerOptions, signal?: AbortSignal): Promise<void> {
|
|
18
|
+
if (!text.trim()) return;
|
|
19
|
+
|
|
20
|
+
this.stop();
|
|
21
|
+
this.abortController = new AbortController();
|
|
22
|
+
|
|
23
|
+
if (signal) {
|
|
24
|
+
if (signal.aborted) {
|
|
25
|
+
this.stop();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
signal.addEventListener("abort", () => this.stop(), { once: true });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const player = getTTSPlayer({
|
|
32
|
+
openai: {
|
|
33
|
+
speed: options.speed,
|
|
34
|
+
},
|
|
35
|
+
outputDeviceName: options.outputDeviceName,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const handleAudioLevel = (level: number) => {
|
|
39
|
+
this.emit("audioLevel", level);
|
|
40
|
+
};
|
|
41
|
+
player.on("audioLevel", handleAudioLevel);
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
await player.speak(text, this.abortController.signal);
|
|
45
|
+
} finally {
|
|
46
|
+
player.off("audioLevel", handleAudioLevel);
|
|
47
|
+
this.abortController = null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
stop(): void {
|
|
52
|
+
if (this.abortController) {
|
|
53
|
+
this.abortController.abort();
|
|
54
|
+
this.abortController = null;
|
|
55
|
+
}
|
|
56
|
+
stopSpeaking();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
destroy(): void {
|
|
60
|
+
this.stop();
|
|
61
|
+
destroyTTSPlayer();
|
|
62
|
+
this.removeAllListeners();
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import { spawn, type ChildProcess } from "node:child_process";
|
|
2
|
+
import { EventEmitter } from "node:events";
|
|
3
|
+
import { OpenAITTSStream, type OpenAITTSOptions } from "./openai-tts-stream";
|
|
4
|
+
import { debug } from "../../utils/debug-logger";
|
|
5
|
+
|
|
6
|
+
export interface TTSPlayerOptions {
|
|
7
|
+
openai?: OpenAITTSOptions;
|
|
8
|
+
outputDeviceName?: string;
|
|
9
|
+
debug?: boolean;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface TTSPlayerEvents {
|
|
13
|
+
speaking: () => void;
|
|
14
|
+
done: () => void;
|
|
15
|
+
error: (error: Error) => void;
|
|
16
|
+
audioLevel: (level: number) => void;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class TTSPlayer extends EventEmitter {
|
|
20
|
+
private options: TTSPlayerOptions;
|
|
21
|
+
private tts: OpenAITTSStream | null = null;
|
|
22
|
+
private player: ChildProcess | null = null;
|
|
23
|
+
private _isSpeaking = false;
|
|
24
|
+
private audioChunksReceived = 0;
|
|
25
|
+
private abortController: AbortController | null = null;
|
|
26
|
+
private audioLevelSmoothed = 0;
|
|
27
|
+
private audioLevelLastEmitMs = 0;
|
|
28
|
+
|
|
29
|
+
constructor(options: TTSPlayerOptions = {}) {
|
|
30
|
+
super();
|
|
31
|
+
this.options = options;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
updateOptions(options: TTSPlayerOptions = {}): void {
|
|
35
|
+
this.options = {
|
|
36
|
+
...this.options,
|
|
37
|
+
...options,
|
|
38
|
+
openai: {
|
|
39
|
+
...this.options.openai,
|
|
40
|
+
...options.openai,
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
get isSpeaking(): boolean {
|
|
46
|
+
return this._isSpeaking;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private emitAudioLevelFromChunk(chunk: Buffer): void {
|
|
50
|
+
if (chunk.length < 2) return;
|
|
51
|
+
|
|
52
|
+
let samples = 0;
|
|
53
|
+
let sumSquares = 0;
|
|
54
|
+
|
|
55
|
+
const sampleCount = Math.floor(chunk.length / 2);
|
|
56
|
+
const view = new Int16Array(chunk.buffer, chunk.byteOffset, sampleCount);
|
|
57
|
+
for (let i = 0; i < view.length; i++) {
|
|
58
|
+
const v = view[i] ?? 0;
|
|
59
|
+
const f = v / 32768;
|
|
60
|
+
sumSquares += f * f;
|
|
61
|
+
samples++;
|
|
62
|
+
}
|
|
63
|
+
if (!samples) return;
|
|
64
|
+
|
|
65
|
+
const rms = Math.sqrt(sumSquares / samples);
|
|
66
|
+
const noiseFloor = 0.01;
|
|
67
|
+
const gain = 8;
|
|
68
|
+
const rawLevel = Math.max(0, (rms - noiseFloor) * gain);
|
|
69
|
+
const level = Math.min(1, rawLevel);
|
|
70
|
+
|
|
71
|
+
const prev = this.audioLevelSmoothed;
|
|
72
|
+
const alpha = level > prev ? 0.55 : 0.15;
|
|
73
|
+
this.audioLevelSmoothed = prev + (level - prev) * alpha;
|
|
74
|
+
|
|
75
|
+
const now = Date.now();
|
|
76
|
+
if (now - this.audioLevelLastEmitMs < 16) return;
|
|
77
|
+
this.audioLevelLastEmitMs = now;
|
|
78
|
+
this.emit("audioLevel", this.audioLevelSmoothed);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async speak(text: string, signal?: AbortSignal): Promise<void> {
|
|
82
|
+
if (!text.trim()) return;
|
|
83
|
+
|
|
84
|
+
this.stop();
|
|
85
|
+
|
|
86
|
+
this._isSpeaking = true;
|
|
87
|
+
this.audioChunksReceived = 0;
|
|
88
|
+
this.audioLevelSmoothed = 0;
|
|
89
|
+
this.audioLevelLastEmitMs = 0;
|
|
90
|
+
this.emit("speaking");
|
|
91
|
+
|
|
92
|
+
this.abortController = new AbortController();
|
|
93
|
+
|
|
94
|
+
if (signal) {
|
|
95
|
+
if (signal.aborted) {
|
|
96
|
+
this.stop();
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
signal.addEventListener("abort", () => this.stop(), { once: true });
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return new Promise<void>((resolve, reject) => {
|
|
103
|
+
const cleanup = () => {
|
|
104
|
+
this._isSpeaking = false;
|
|
105
|
+
this.emit("done");
|
|
106
|
+
resolve();
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const handleError = (error: Error) => {
|
|
110
|
+
this.stop();
|
|
111
|
+
this.emit("error", error);
|
|
112
|
+
reject(error);
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
try {
|
|
116
|
+
this.tts = new OpenAITTSStream({
|
|
117
|
+
outputFormat: "pcm",
|
|
118
|
+
...this.options.openai,
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const soxArgs: string[] = [
|
|
122
|
+
"-q",
|
|
123
|
+
"-t",
|
|
124
|
+
"raw",
|
|
125
|
+
"-e",
|
|
126
|
+
"signed-integer",
|
|
127
|
+
"-b",
|
|
128
|
+
"16",
|
|
129
|
+
"-r",
|
|
130
|
+
"24000",
|
|
131
|
+
"-c",
|
|
132
|
+
"1",
|
|
133
|
+
"-",
|
|
134
|
+
];
|
|
135
|
+
|
|
136
|
+
if (this.options.outputDeviceName) {
|
|
137
|
+
soxArgs.push("-t", "coreaudio", this.options.outputDeviceName);
|
|
138
|
+
} else {
|
|
139
|
+
soxArgs.push("-d");
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
this.player = spawn("sox", soxArgs, {
|
|
143
|
+
stdio: ["pipe", "ignore", "pipe"],
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
this.tts.on("audio", (chunk: Buffer) => {
|
|
147
|
+
this.audioChunksReceived++;
|
|
148
|
+
if (this.options.debug && this.audioChunksReceived === 1) {
|
|
149
|
+
debug.info("tts-player-first-audio-chunk");
|
|
150
|
+
}
|
|
151
|
+
this.emitAudioLevelFromChunk(chunk);
|
|
152
|
+
if (this.player?.stdin?.writable) {
|
|
153
|
+
this.player.stdin.write(chunk);
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
this.tts.on("done", () => {
|
|
158
|
+
if (this.options.debug) {
|
|
159
|
+
debug.info("tts-player-tts-done");
|
|
160
|
+
}
|
|
161
|
+
try {
|
|
162
|
+
this.player?.stdin?.end();
|
|
163
|
+
} catch {
|
|
164
|
+
// Ignore
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
this.tts.on("error", handleError);
|
|
169
|
+
|
|
170
|
+
this.player.on("error", (err) => {
|
|
171
|
+
handleError(new Error(`sox player error: ${err.message}. Ensure sox is installed.`));
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
this.player.on("close", (code) => {
|
|
175
|
+
if (this.options.debug) {
|
|
176
|
+
debug.info("tts-player-sox-exited", { code });
|
|
177
|
+
}
|
|
178
|
+
cleanup();
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
this.player.stderr?.on("data", (data: Buffer) => {
|
|
182
|
+
const msg = data.toString();
|
|
183
|
+
if (this.options.debug) {
|
|
184
|
+
debug.info("tts-player-sox-stderr", { msg });
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
if (this.options.debug) {
|
|
189
|
+
debug.info("tts-player-starting-openai-tts");
|
|
190
|
+
}
|
|
191
|
+
this.tts.speak(text).catch(handleError);
|
|
192
|
+
} catch (error) {
|
|
193
|
+
handleError(error instanceof Error ? error : new Error(String(error)));
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
stop(): void {
|
|
199
|
+
if (this.abortController) {
|
|
200
|
+
this.abortController.abort();
|
|
201
|
+
this.abortController = null;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (this.tts) {
|
|
205
|
+
this.tts.destroy();
|
|
206
|
+
this.tts = null;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (this.player) {
|
|
210
|
+
try {
|
|
211
|
+
this.player.kill("SIGTERM");
|
|
212
|
+
} catch {
|
|
213
|
+
// Ignore
|
|
214
|
+
}
|
|
215
|
+
this.player = null;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (this._isSpeaking) {
|
|
219
|
+
this._isSpeaking = false;
|
|
220
|
+
this.emit("done");
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
destroy(): void {
|
|
225
|
+
this.stop();
|
|
226
|
+
this.removeAllListeners();
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
let player: TTSPlayer | null = null;
|
|
231
|
+
|
|
232
|
+
export function getTTSPlayer(options?: TTSPlayerOptions): TTSPlayer {
|
|
233
|
+
if (!player) {
|
|
234
|
+
player = new TTSPlayer(options);
|
|
235
|
+
} else if (options) {
|
|
236
|
+
player.updateOptions(options);
|
|
237
|
+
}
|
|
238
|
+
return player;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
export function destroyTTSPlayer(): void {
|
|
242
|
+
if (player) {
|
|
243
|
+
player.destroy();
|
|
244
|
+
player = null;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export async function speak(text: string, signal?: AbortSignal): Promise<void> {
|
|
249
|
+
const p = getTTSPlayer();
|
|
250
|
+
return p.speak(text, signal);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export function stopSpeaking(): void {
|
|
254
|
+
if (player) {
|
|
255
|
+
player.stop();
|
|
256
|
+
}
|
|
257
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
|
|
3
|
+
import { getRecorder, destroyRecorder } from "./audio-recorder";
|
|
4
|
+
import { computeMicLevelFromPcm16Chunk, smoothMicLevel } from "./mic-level";
|
|
5
|
+
|
|
6
|
+
export interface VoiceInputControllerEvents {
|
|
7
|
+
micLevel: (level: number) => void;
|
|
8
|
+
error: (error: Error) => void;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export class VoiceInputController extends EventEmitter {
|
|
12
|
+
private recorderErrorHandler: ((error: Error) => void) | null = null;
|
|
13
|
+
private recorderDataHandler: ((chunk: Buffer) => void) | null = null;
|
|
14
|
+
private activeRecorder: ReturnType<typeof getRecorder> | null = null;
|
|
15
|
+
private micLevelSmoothed = 0;
|
|
16
|
+
private micLevelLastEmitMs = 0;
|
|
17
|
+
|
|
18
|
+
private detachRecorderListeners(): void {
|
|
19
|
+
if (!this.activeRecorder) return;
|
|
20
|
+
if (this.recorderErrorHandler) {
|
|
21
|
+
this.activeRecorder.off("error", this.recorderErrorHandler);
|
|
22
|
+
this.recorderErrorHandler = null;
|
|
23
|
+
}
|
|
24
|
+
if (this.recorderDataHandler) {
|
|
25
|
+
this.activeRecorder.off("data", this.recorderDataHandler);
|
|
26
|
+
this.recorderDataHandler = null;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
private emitMicLevelFromChunk(chunk: Buffer): void {
|
|
31
|
+
const level = computeMicLevelFromPcm16Chunk(chunk);
|
|
32
|
+
if (level === null) return;
|
|
33
|
+
|
|
34
|
+
this.micLevelSmoothed = smoothMicLevel(this.micLevelSmoothed, level);
|
|
35
|
+
|
|
36
|
+
const now = Date.now();
|
|
37
|
+
if (now - this.micLevelLastEmitMs < 16) return;
|
|
38
|
+
this.micLevelLastEmitMs = now;
|
|
39
|
+
this.emit("micLevel", this.micLevelSmoothed);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
start(): void {
|
|
43
|
+
this.detachRecorderListeners();
|
|
44
|
+
|
|
45
|
+
const recorder = getRecorder();
|
|
46
|
+
this.activeRecorder = recorder;
|
|
47
|
+
this.micLevelSmoothed = 0;
|
|
48
|
+
this.micLevelLastEmitMs = 0;
|
|
49
|
+
|
|
50
|
+
this.recorderDataHandler = (chunk: Buffer) => {
|
|
51
|
+
this.emitMicLevelFromChunk(chunk);
|
|
52
|
+
};
|
|
53
|
+
this.recorderErrorHandler = (err: Error) => {
|
|
54
|
+
try {
|
|
55
|
+
recorder.stop();
|
|
56
|
+
} catch {
|
|
57
|
+
// Ignore; we're already failing.
|
|
58
|
+
}
|
|
59
|
+
this.detachRecorderListeners();
|
|
60
|
+
this.emit("error", err);
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
recorder.on("data", this.recorderDataHandler);
|
|
64
|
+
recorder.on("error", this.recorderErrorHandler);
|
|
65
|
+
recorder.start();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async stop(): Promise<{ audioBuffer: Buffer; duration: number }> {
|
|
69
|
+
const recorder = this.activeRecorder ?? getRecorder();
|
|
70
|
+
this.detachRecorderListeners();
|
|
71
|
+
this.activeRecorder = null;
|
|
72
|
+
|
|
73
|
+
const duration = recorder.getDuration();
|
|
74
|
+
const audioBuffer = await recorder.stopAsync();
|
|
75
|
+
|
|
76
|
+
return { audioBuffer, duration };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
cancel(): void {
|
|
80
|
+
const recorder = this.activeRecorder ?? getRecorder();
|
|
81
|
+
this.detachRecorderListeners();
|
|
82
|
+
this.activeRecorder = null;
|
|
83
|
+
try {
|
|
84
|
+
recorder.stop();
|
|
85
|
+
} catch {
|
|
86
|
+
// Ignore
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
destroy(): void {
|
|
91
|
+
this.detachRecorderListeners();
|
|
92
|
+
this.activeRecorder = null;
|
|
93
|
+
destroyRecorder();
|
|
94
|
+
this.removeAllListeners();
|
|
95
|
+
}
|
|
96
|
+
}
|