agent-voice 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ask-5J4JCHM4.js +307 -0
- package/dist/{ask-KM3JPI36.js → ask-F6CPRZ22.js} +31 -23
- package/dist/{auth-KET5DNSE.js → auth-4VUEFCFK.js} +1 -1
- package/dist/chunk-3YEHGYHI.js +115 -0
- package/dist/chunk-NHLAAFR3.js +276 -0
- package/dist/chunk-YU5FF2L7.js +12 -0
- package/dist/chunk-ZNUQXGGO.js +145 -0
- package/dist/cli.js +375 -32
- package/dist/daemon-client-6GF277XU.js +94 -0
- package/dist/daemon-lifecycle-BNXENMXI.js +25 -0
- package/dist/daemon.js +473 -0
- package/dist/index.js +36 -23
- package/dist/say-6EJTKNJJ.js +195 -0
- package/package.json +4 -3
- package/dist/chunk-RGYWLATZ.js +0 -61
package/dist/daemon.js
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BIT_DEPTH,
|
|
3
|
+
CHANNELS,
|
|
4
|
+
SAMPLE_RATE
|
|
5
|
+
} from "./chunk-YU5FF2L7.js";
|
|
6
|
+
|
|
7
|
+
// src/daemon.ts
|
|
8
|
+
import { rmSync as rmSync3 } from "fs";
|
|
9
|
+
import { createRequire } from "module";
|
|
10
|
+
import { createServer } from "net";
|
|
11
|
+
|
|
12
|
+
// src/config.ts
|
|
13
|
+
import { chmodSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
14
|
+
import { homedir } from "os";
|
|
15
|
+
import { join } from "path";
|
|
16
|
+
var CONFIG_DIR = join(homedir(), ".agent-voice");
|
|
17
|
+
var CONFIG_PATH = join(CONFIG_DIR, "config.json");
|
|
18
|
+
var DAEMON_SOCKET_PATH = join(CONFIG_DIR, "daemon.sock");
|
|
19
|
+
var DAEMON_PID_PATH = join(CONFIG_DIR, "daemon.pid");
|
|
20
|
+
var LOG_DIR = join(CONFIG_DIR, "logs");
|
|
21
|
+
var AUDIO_LOG_DIR = join(LOG_DIR, "audio");
|
|
22
|
+
var EVENTS_LOG_PATH = join(LOG_DIR, "events.ndjson");
|
|
23
|
+
var DAEMON_DEFAULTS = {
|
|
24
|
+
idleTimeoutMinutes: 30,
|
|
25
|
+
audioRingBufferSize: 50
|
|
26
|
+
};
|
|
27
|
+
function readConfig() {
|
|
28
|
+
try {
|
|
29
|
+
return JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
|
|
30
|
+
} catch {
|
|
31
|
+
return {};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function resolveAuth() {
|
|
35
|
+
const config = readConfig();
|
|
36
|
+
if (config.auth?.apiKey) {
|
|
37
|
+
return config.auth;
|
|
38
|
+
}
|
|
39
|
+
if (process.env.OPENAI_API_KEY) {
|
|
40
|
+
return { apiKey: process.env.OPENAI_API_KEY };
|
|
41
|
+
}
|
|
42
|
+
throw new Error(
|
|
43
|
+
"No API key found. Run `agent-voice auth` or set OPENAI_API_KEY."
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
function isDebugEnabled() {
|
|
47
|
+
if (process.env.AGENT_VOICE_DEBUG === "1") return true;
|
|
48
|
+
return readConfig().debug === true;
|
|
49
|
+
}
|
|
50
|
+
function isDebugAudioEnabled() {
|
|
51
|
+
if (process.env.AGENT_VOICE_DEBUG_AUDIO === "1") return true;
|
|
52
|
+
return readConfig()["debug.audio"] === true;
|
|
53
|
+
}
|
|
54
|
+
function resolveDaemonConfig() {
|
|
55
|
+
const config = readConfig();
|
|
56
|
+
return {
|
|
57
|
+
idleTimeoutMinutes: config.daemon?.idleTimeoutMinutes ?? DAEMON_DEFAULTS.idleTimeoutMinutes,
|
|
58
|
+
audioRingBufferSize: config.daemon?.audioRingBufferSize ?? DAEMON_DEFAULTS.audioRingBufferSize
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// src/daemon-lifecycle.ts
|
|
63
|
+
import { spawn } from "child_process";
|
|
64
|
+
import {
|
|
65
|
+
existsSync,
|
|
66
|
+
mkdirSync as mkdirSync2,
|
|
67
|
+
readFileSync as readFileSync2,
|
|
68
|
+
rmSync,
|
|
69
|
+
writeFileSync as writeFileSync2
|
|
70
|
+
} from "fs";
|
|
71
|
+
import { connect } from "net";
|
|
72
|
+
import { dirname, join as join2 } from "path";
|
|
73
|
+
|
|
74
|
+
// src/daemon-protocol.ts
|
|
75
|
+
import { z } from "zod";
|
|
76
|
+
var DaemonRequest = z.discriminatedUnion("type", [
|
|
77
|
+
z.object({
|
|
78
|
+
type: z.literal("say"),
|
|
79
|
+
id: z.string(),
|
|
80
|
+
message: z.string(),
|
|
81
|
+
voice: z.string()
|
|
82
|
+
}),
|
|
83
|
+
z.object({
|
|
84
|
+
type: z.literal("ask"),
|
|
85
|
+
id: z.string(),
|
|
86
|
+
message: z.string(),
|
|
87
|
+
voice: z.string(),
|
|
88
|
+
timeout: z.number(),
|
|
89
|
+
ack: z.boolean()
|
|
90
|
+
}),
|
|
91
|
+
z.object({ type: z.literal("ping") }),
|
|
92
|
+
z.object({ type: z.literal("shutdown") })
|
|
93
|
+
]);
|
|
94
|
+
var TraceEntry = z.object({
|
|
95
|
+
atMs: z.number(),
|
|
96
|
+
event: z.string(),
|
|
97
|
+
detail: z.record(z.unknown()).optional()
|
|
98
|
+
});
|
|
99
|
+
var DaemonResponse = z.discriminatedUnion("type", [
|
|
100
|
+
z.object({ type: z.literal("say:done"), id: z.string() }),
|
|
101
|
+
z.object({
|
|
102
|
+
type: z.literal("ask:done"),
|
|
103
|
+
id: z.string(),
|
|
104
|
+
transcript: z.string()
|
|
105
|
+
}),
|
|
106
|
+
z.object({ type: z.literal("error"), id: z.string(), message: z.string() }),
|
|
107
|
+
z.object({
|
|
108
|
+
type: z.literal("pong"),
|
|
109
|
+
uptime: z.number(),
|
|
110
|
+
commandCount: z.number()
|
|
111
|
+
}),
|
|
112
|
+
z.object({ type: z.literal("log"), id: z.string(), entry: TraceEntry })
|
|
113
|
+
]);
|
|
114
|
+
function encodeMessage(msg) {
|
|
115
|
+
const json = JSON.stringify(msg);
|
|
116
|
+
const payload = Buffer.from(`${json}
|
|
117
|
+
`, "utf-8");
|
|
118
|
+
const header = Buffer.alloc(4);
|
|
119
|
+
header.writeUInt32BE(payload.length, 0);
|
|
120
|
+
return Buffer.concat([header, payload]);
|
|
121
|
+
}
|
|
122
|
+
function createMessageParser(onMessage) {
|
|
123
|
+
let buffer = Buffer.alloc(0);
|
|
124
|
+
return (chunk) => {
|
|
125
|
+
buffer = Buffer.concat([buffer, chunk]);
|
|
126
|
+
while (buffer.length >= 4) {
|
|
127
|
+
const length = buffer.readUInt32BE(0);
|
|
128
|
+
if (buffer.length < 4 + length) break;
|
|
129
|
+
const payload = buffer.subarray(4, 4 + length).toString("utf-8");
|
|
130
|
+
buffer = buffer.subarray(4 + length);
|
|
131
|
+
onMessage(JSON.parse(payload));
|
|
132
|
+
}
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// src/daemon-lifecycle.ts
|
|
137
|
+
function writeDaemonPid(pid) {
|
|
138
|
+
mkdirSync2(dirname(DAEMON_PID_PATH), { recursive: true });
|
|
139
|
+
writeFileSync2(DAEMON_PID_PATH, `${pid}
|
|
140
|
+
`);
|
|
141
|
+
}
|
|
142
|
+
function removeDaemonPid() {
|
|
143
|
+
try {
|
|
144
|
+
rmSync(DAEMON_PID_PATH);
|
|
145
|
+
} catch {
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// src/daemon-log.ts
|
|
150
|
+
import {
|
|
151
|
+
appendFileSync,
|
|
152
|
+
mkdirSync as mkdirSync3,
|
|
153
|
+
readdirSync,
|
|
154
|
+
rmSync as rmSync2,
|
|
155
|
+
writeFileSync as writeFileSync3
|
|
156
|
+
} from "fs";
|
|
157
|
+
import { join as join3 } from "path";
|
|
158
|
+
function ensureLogDir() {
|
|
159
|
+
mkdirSync3(LOG_DIR, { recursive: true });
|
|
160
|
+
}
|
|
161
|
+
function ensureAudioDir() {
|
|
162
|
+
mkdirSync3(AUDIO_LOG_DIR, { recursive: true });
|
|
163
|
+
}
|
|
164
|
+
function appendLogEntry(entry) {
|
|
165
|
+
if (!isDebugEnabled()) return;
|
|
166
|
+
ensureLogDir();
|
|
167
|
+
appendFileSync(EVENTS_LOG_PATH, `${JSON.stringify(entry)}
|
|
168
|
+
`);
|
|
169
|
+
}
|
|
170
|
+
function createCommandLogger(cmd, id) {
|
|
171
|
+
const startMs = Date.now();
|
|
172
|
+
return {
|
|
173
|
+
log(event, detail) {
|
|
174
|
+
appendLogEntry({
|
|
175
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
176
|
+
cmd,
|
|
177
|
+
id,
|
|
178
|
+
event,
|
|
179
|
+
detail
|
|
180
|
+
});
|
|
181
|
+
},
|
|
182
|
+
trace(event) {
|
|
183
|
+
appendLogEntry({
|
|
184
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
185
|
+
cmd,
|
|
186
|
+
id,
|
|
187
|
+
event: event.event,
|
|
188
|
+
detail: { ...event.detail, atMs: event.atMs }
|
|
189
|
+
});
|
|
190
|
+
},
|
|
191
|
+
get startMs() {
|
|
192
|
+
return startMs;
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
function createWavBuffer(pcm16) {
|
|
197
|
+
const header = Buffer.alloc(44);
|
|
198
|
+
const dataSize = pcm16.length;
|
|
199
|
+
const fileSize = 36 + dataSize;
|
|
200
|
+
const byteRate = SAMPLE_RATE * CHANNELS * (BIT_DEPTH / 8);
|
|
201
|
+
const blockAlign = CHANNELS * (BIT_DEPTH / 8);
|
|
202
|
+
header.write("RIFF", 0);
|
|
203
|
+
header.writeUInt32LE(fileSize, 4);
|
|
204
|
+
header.write("WAVE", 8);
|
|
205
|
+
header.write("fmt ", 12);
|
|
206
|
+
header.writeUInt32LE(16, 16);
|
|
207
|
+
header.writeUInt16LE(1, 20);
|
|
208
|
+
header.writeUInt16LE(CHANNELS, 22);
|
|
209
|
+
header.writeUInt32LE(SAMPLE_RATE, 24);
|
|
210
|
+
header.writeUInt32LE(byteRate, 28);
|
|
211
|
+
header.writeUInt16LE(blockAlign, 32);
|
|
212
|
+
header.writeUInt16LE(BIT_DEPTH, 34);
|
|
213
|
+
header.write("data", 36);
|
|
214
|
+
header.writeUInt32LE(dataSize, 40);
|
|
215
|
+
return Buffer.concat([header, pcm16]);
|
|
216
|
+
}
|
|
217
|
+
function writeAudioCapture(id, streams) {
|
|
218
|
+
if (!isDebugAudioEnabled()) return [];
|
|
219
|
+
ensureAudioDir();
|
|
220
|
+
const written = [];
|
|
221
|
+
for (const [name, chunks] of Object.entries(streams)) {
|
|
222
|
+
if (chunks.length === 0) continue;
|
|
223
|
+
const path = join3(AUDIO_LOG_DIR, `${id}-${name}.wav`);
|
|
224
|
+
writeFileSync3(path, createWavBuffer(Buffer.concat(chunks)));
|
|
225
|
+
written.push(path);
|
|
226
|
+
}
|
|
227
|
+
enforceRingBuffer();
|
|
228
|
+
return written;
|
|
229
|
+
}
|
|
230
|
+
function enforceRingBuffer() {
|
|
231
|
+
const { audioRingBufferSize } = resolveDaemonConfig();
|
|
232
|
+
let files;
|
|
233
|
+
try {
|
|
234
|
+
files = readdirSync(AUDIO_LOG_DIR).filter((f) => f.endsWith(".wav")).sort();
|
|
235
|
+
} catch {
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
const commandIds = /* @__PURE__ */ new Set();
|
|
239
|
+
for (const file of files) {
|
|
240
|
+
const match = file.match(/^(.+)-(?:assistant|mic|model-input)\.wav$/);
|
|
241
|
+
if (match) commandIds.add(match[1]);
|
|
242
|
+
}
|
|
243
|
+
const ids = [...commandIds].sort();
|
|
244
|
+
const excess = ids.length - audioRingBufferSize;
|
|
245
|
+
if (excess <= 0) return;
|
|
246
|
+
const idsToRemove = new Set(ids.slice(0, excess));
|
|
247
|
+
for (const file of files) {
|
|
248
|
+
const match = file.match(/^(.+)-(?:assistant|mic|model-input)\.wav$/);
|
|
249
|
+
if (match && idsToRemove.has(match[1])) {
|
|
250
|
+
try {
|
|
251
|
+
rmSync2(join3(AUDIO_LOG_DIR, file));
|
|
252
|
+
} catch {
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// src/daemon.ts
|
|
259
|
+
var require2 = createRequire(import.meta.url);
|
|
260
|
+
var engineState = null;
|
|
261
|
+
var commandCount = 0;
|
|
262
|
+
var startedAt = Date.now();
|
|
263
|
+
var idleTimer = null;
|
|
264
|
+
function resetIdleTimer() {
|
|
265
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
266
|
+
const { idleTimeoutMinutes } = resolveDaemonConfig();
|
|
267
|
+
idleTimer = setTimeout(
|
|
268
|
+
() => {
|
|
269
|
+
shutdown();
|
|
270
|
+
},
|
|
271
|
+
idleTimeoutMinutes * 60 * 1e3
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
function getOrCreateEngine(mode) {
|
|
275
|
+
if (engineState && engineState.mode === mode) {
|
|
276
|
+
return engineState.engine;
|
|
277
|
+
}
|
|
278
|
+
if (engineState) {
|
|
279
|
+
try {
|
|
280
|
+
engineState.engine.stop();
|
|
281
|
+
engineState.engine.close();
|
|
282
|
+
} catch {
|
|
283
|
+
}
|
|
284
|
+
engineState = null;
|
|
285
|
+
}
|
|
286
|
+
const { AudioEngine } = require2("agent-voice-audio");
|
|
287
|
+
const engine = new AudioEngine({
|
|
288
|
+
sampleRate: SAMPLE_RATE,
|
|
289
|
+
channels: 1,
|
|
290
|
+
enableAec: mode === "ask",
|
|
291
|
+
streamDelayMs: mode === "ask" ? 30 : void 0
|
|
292
|
+
});
|
|
293
|
+
engine.start();
|
|
294
|
+
engineState = { engine, mode };
|
|
295
|
+
return engine;
|
|
296
|
+
}
|
|
297
|
+
function createEngineProxy(engine) {
|
|
298
|
+
return {
|
|
299
|
+
start() {
|
|
300
|
+
},
|
|
301
|
+
stop() {
|
|
302
|
+
},
|
|
303
|
+
close() {
|
|
304
|
+
},
|
|
305
|
+
play: engine.play.bind(engine),
|
|
306
|
+
readProcessedCapture: engine.readProcessedCapture.bind(engine),
|
|
307
|
+
readRawCapture: engine.readRawCapture.bind(engine),
|
|
308
|
+
setStreamDelayMs: engine.setStreamDelayMs.bind(engine),
|
|
309
|
+
getStats: engine.getStats.bind(engine)
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
var commandQueue = [];
|
|
313
|
+
var processing = false;
|
|
314
|
+
async function processQueue() {
|
|
315
|
+
if (processing) return;
|
|
316
|
+
processing = true;
|
|
317
|
+
while (commandQueue.length > 0) {
|
|
318
|
+
const item = commandQueue.shift();
|
|
319
|
+
if (!item) break;
|
|
320
|
+
await executeCommand(item.request, item.socket);
|
|
321
|
+
}
|
|
322
|
+
processing = false;
|
|
323
|
+
}
|
|
324
|
+
function send(socket, msg) {
|
|
325
|
+
if (!socket.destroyed) {
|
|
326
|
+
socket.write(encodeMessage(msg));
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
async function executeCommand(request, socket) {
|
|
330
|
+
if (request.type === "ping") {
|
|
331
|
+
send(socket, {
|
|
332
|
+
type: "pong",
|
|
333
|
+
uptime: Date.now() - startedAt,
|
|
334
|
+
commandCount
|
|
335
|
+
});
|
|
336
|
+
return;
|
|
337
|
+
}
|
|
338
|
+
if (request.type === "shutdown") {
|
|
339
|
+
shutdown();
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
commandCount++;
|
|
343
|
+
resetIdleTimer();
|
|
344
|
+
if (request.type === "say") {
|
|
345
|
+
await executeSay(request, socket);
|
|
346
|
+
} else if (request.type === "ask") {
|
|
347
|
+
await executeAsk(request, socket);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
async function executeSay(request, socket) {
|
|
351
|
+
const logger = createCommandLogger("say", request.id);
|
|
352
|
+
const assistantChunks = [];
|
|
353
|
+
try {
|
|
354
|
+
const engine = getOrCreateEngine("say");
|
|
355
|
+
const proxy = createEngineProxy(engine);
|
|
356
|
+
const auth = resolveAuth();
|
|
357
|
+
const { say } = await import("./say-6EJTKNJJ.js");
|
|
358
|
+
await say(request.message, {
|
|
359
|
+
voice: request.voice,
|
|
360
|
+
auth,
|
|
361
|
+
createAudioEngine: () => proxy,
|
|
362
|
+
onAssistantAudio(pcm16) {
|
|
363
|
+
assistantChunks.push(Buffer.from(pcm16));
|
|
364
|
+
},
|
|
365
|
+
onTrace(event) {
|
|
366
|
+
logger.trace(event);
|
|
367
|
+
send(socket, { type: "log", id: request.id, entry: event });
|
|
368
|
+
}
|
|
369
|
+
});
|
|
370
|
+
writeAudioCapture(request.id, { assistant: assistantChunks });
|
|
371
|
+
logger.log("done");
|
|
372
|
+
send(socket, { type: "say:done", id: request.id });
|
|
373
|
+
} catch (err) {
|
|
374
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
375
|
+
logger.log("error", { message });
|
|
376
|
+
writeAudioCapture(request.id, { assistant: assistantChunks });
|
|
377
|
+
send(socket, { type: "error", id: request.id, message });
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
async function executeAsk(request, socket) {
|
|
381
|
+
const logger = createCommandLogger("ask", request.id);
|
|
382
|
+
const assistantChunks = [];
|
|
383
|
+
const micChunks = [];
|
|
384
|
+
const modelInputChunks = [];
|
|
385
|
+
try {
|
|
386
|
+
const engine = getOrCreateEngine("ask");
|
|
387
|
+
const proxy = createEngineProxy(engine);
|
|
388
|
+
const auth = resolveAuth();
|
|
389
|
+
const { ask } = await import("./ask-5J4JCHM4.js");
|
|
390
|
+
const transcript = await ask(request.message, {
|
|
391
|
+
voice: request.voice,
|
|
392
|
+
timeout: request.timeout,
|
|
393
|
+
ack: request.ack,
|
|
394
|
+
auth,
|
|
395
|
+
createAudioEngine: () => proxy,
|
|
396
|
+
onAssistantAudio(pcm16) {
|
|
397
|
+
assistantChunks.push(Buffer.from(pcm16));
|
|
398
|
+
},
|
|
399
|
+
onMicAudio(pcm16) {
|
|
400
|
+
micChunks.push(Buffer.from(pcm16));
|
|
401
|
+
},
|
|
402
|
+
onAudioFrameSent(pcm16) {
|
|
403
|
+
modelInputChunks.push(Buffer.from(pcm16));
|
|
404
|
+
},
|
|
405
|
+
onTrace(event) {
|
|
406
|
+
logger.trace(event);
|
|
407
|
+
send(socket, { type: "log", id: request.id, entry: event });
|
|
408
|
+
}
|
|
409
|
+
});
|
|
410
|
+
writeAudioCapture(request.id, {
|
|
411
|
+
assistant: assistantChunks,
|
|
412
|
+
mic: micChunks,
|
|
413
|
+
"model-input": modelInputChunks
|
|
414
|
+
});
|
|
415
|
+
logger.log("done", { transcript });
|
|
416
|
+
send(socket, { type: "ask:done", id: request.id, transcript });
|
|
417
|
+
} catch (err) {
|
|
418
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
419
|
+
logger.log("error", { message });
|
|
420
|
+
writeAudioCapture(request.id, {
|
|
421
|
+
assistant: assistantChunks,
|
|
422
|
+
mic: micChunks,
|
|
423
|
+
"model-input": modelInputChunks
|
|
424
|
+
});
|
|
425
|
+
send(socket, { type: "error", id: request.id, message });
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
var server = createServer((socket) => {
|
|
429
|
+
const parse = createMessageParser((msg) => {
|
|
430
|
+
const result = DaemonRequest.safeParse(msg);
|
|
431
|
+
if (!result.success) {
|
|
432
|
+
send(socket, {
|
|
433
|
+
type: "error",
|
|
434
|
+
id: "unknown",
|
|
435
|
+
message: `Invalid request: ${result.error.message}`
|
|
436
|
+
});
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
commandQueue.push({ request: result.data, socket });
|
|
440
|
+
processQueue();
|
|
441
|
+
});
|
|
442
|
+
socket.on("data", parse);
|
|
443
|
+
socket.on("error", () => {
|
|
444
|
+
});
|
|
445
|
+
});
|
|
446
|
+
function shutdown() {
|
|
447
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
448
|
+
server.close();
|
|
449
|
+
if (engineState) {
|
|
450
|
+
try {
|
|
451
|
+
engineState.engine.stop();
|
|
452
|
+
engineState.engine.close();
|
|
453
|
+
} catch {
|
|
454
|
+
}
|
|
455
|
+
engineState = null;
|
|
456
|
+
}
|
|
457
|
+
removeDaemonPid();
|
|
458
|
+
try {
|
|
459
|
+
rmSync3(DAEMON_SOCKET_PATH);
|
|
460
|
+
} catch {
|
|
461
|
+
}
|
|
462
|
+
process.exit(0);
|
|
463
|
+
}
|
|
464
|
+
try {
|
|
465
|
+
rmSync3(DAEMON_SOCKET_PATH);
|
|
466
|
+
} catch {
|
|
467
|
+
}
|
|
468
|
+
server.listen(DAEMON_SOCKET_PATH, () => {
|
|
469
|
+
writeDaemonPid(process.pid);
|
|
470
|
+
resetIdleTimer();
|
|
471
|
+
});
|
|
472
|
+
process.on("SIGTERM", shutdown);
|
|
473
|
+
process.on("SIGINT", shutdown);
|
package/dist/index.js
CHANGED
|
@@ -197,6 +197,7 @@ async function ask(message, options = {}) {
|
|
|
197
197
|
let lastAssistantAudioAt = 0;
|
|
198
198
|
let nearEndEvidenceSeen = false;
|
|
199
199
|
let nearEndEvidenceAtMs = 0;
|
|
200
|
+
let nearEndEvidenceConfirmed = false;
|
|
200
201
|
let cleaned = false;
|
|
201
202
|
let settled = false;
|
|
202
203
|
async function cleanup() {
|
|
@@ -265,6 +266,19 @@ async function ask(message, options = {}) {
|
|
|
265
266
|
if (rms >= minSpeechRms) {
|
|
266
267
|
nearEndEvidenceSeen = true;
|
|
267
268
|
nearEndEvidenceAtMs = Date.now();
|
|
269
|
+
if (!nearEndEvidenceConfirmed && speechStartedAtMs > 0) {
|
|
270
|
+
const evidencePreRollMs = readEnvInt(
|
|
271
|
+
"AGENT_VOICE_SPEECH_EVIDENCE_PREROLL_MS",
|
|
272
|
+
200
|
|
273
|
+
);
|
|
274
|
+
const evidencePostRollMs = readEnvInt(
|
|
275
|
+
"AGENT_VOICE_SPEECH_EVIDENCE_POSTROLL_MS",
|
|
276
|
+
1500
|
|
277
|
+
);
|
|
278
|
+
if (nearEndEvidenceAtMs >= speechStartedAtMs - evidencePreRollMs && nearEndEvidenceAtMs <= speechStartedAtMs + evidencePostRollMs) {
|
|
279
|
+
nearEndEvidenceConfirmed = true;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
268
282
|
trace("audio:near_end_evidence", { rms, minSpeechRms });
|
|
269
283
|
}
|
|
270
284
|
onAudioFrameSent?.(frame);
|
|
@@ -303,29 +317,14 @@ async function ask(message, options = {}) {
|
|
|
303
317
|
}
|
|
304
318
|
logEvent("realtime:transcript", `text="${text}"`);
|
|
305
319
|
trace("realtime:transcript", { text });
|
|
306
|
-
if (speechDetected) {
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
);
|
|
315
|
-
const evidenceEarliestMs = speechStartedAtMs - evidencePreRollMs;
|
|
316
|
-
const evidenceLatestMs = speechStartedAtMs + evidencePostRollMs;
|
|
317
|
-
const hasTimelyNearEndEvidence = nearEndEvidenceSeen && nearEndEvidenceAtMs >= evidenceEarliestMs && nearEndEvidenceAtMs <= evidenceLatestMs;
|
|
318
|
-
if (!hasTimelyNearEndEvidence) {
|
|
319
|
-
trace("realtime:transcript_ignored_no_near_end_evidence", {
|
|
320
|
-
text,
|
|
321
|
-
speechStartedAtMs,
|
|
322
|
-
nearEndEvidenceSeen,
|
|
323
|
-
nearEndEvidenceAtMs,
|
|
324
|
-
evidenceEarliestMs,
|
|
325
|
-
evidenceLatestMs
|
|
326
|
-
});
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
320
|
+
if (speechDetected && !nearEndEvidenceConfirmed) {
|
|
321
|
+
trace("realtime:transcript_ignored_no_near_end_evidence", {
|
|
322
|
+
text,
|
|
323
|
+
speechStartedAtMs,
|
|
324
|
+
nearEndEvidenceSeen,
|
|
325
|
+
nearEndEvidenceAtMs
|
|
326
|
+
});
|
|
327
|
+
return;
|
|
329
328
|
}
|
|
330
329
|
if (transcriptTimer) {
|
|
331
330
|
clearTimeout(transcriptTimer);
|
|
@@ -339,6 +338,15 @@ async function ask(message, options = {}) {
|
|
|
339
338
|
trace("realtime:speech_started");
|
|
340
339
|
speechDetected = true;
|
|
341
340
|
speechStartedAtMs = Date.now();
|
|
341
|
+
if (nearEndEvidenceSeen && !nearEndEvidenceConfirmed) {
|
|
342
|
+
const evidencePreRollMs = readEnvInt(
|
|
343
|
+
"AGENT_VOICE_SPEECH_EVIDENCE_PREROLL_MS",
|
|
344
|
+
200
|
|
345
|
+
);
|
|
346
|
+
if (nearEndEvidenceAtMs >= speechStartedAtMs - evidencePreRollMs) {
|
|
347
|
+
nearEndEvidenceConfirmed = true;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
342
350
|
if (timeoutTimer) {
|
|
343
351
|
clearTimeout(timeoutTimer);
|
|
344
352
|
timeoutTimer = null;
|
|
@@ -424,6 +432,11 @@ import { homedir } from "os";
|
|
|
424
432
|
import { join } from "path";
|
|
425
433
|
var CONFIG_DIR = join(homedir(), ".agent-voice");
|
|
426
434
|
var CONFIG_PATH = join(CONFIG_DIR, "config.json");
|
|
435
|
+
var DAEMON_SOCKET_PATH = join(CONFIG_DIR, "daemon.sock");
|
|
436
|
+
var DAEMON_PID_PATH = join(CONFIG_DIR, "daemon.pid");
|
|
437
|
+
var LOG_DIR = join(CONFIG_DIR, "logs");
|
|
438
|
+
var AUDIO_LOG_DIR = join(LOG_DIR, "audio");
|
|
439
|
+
var EVENTS_LOG_PATH = join(LOG_DIR, "events.ndjson");
|
|
427
440
|
function readConfig() {
|
|
428
441
|
try {
|
|
429
442
|
return JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
|