open-agents-ai 0.187.280 → 0.187.282
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +125 -30
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -328288,6 +328288,23 @@ function computeSignalFromText(text, confidence) {
|
|
|
328288
328288
|
function truncateForLog(s2, n2) {
|
|
328289
328289
|
return s2.length <= n2 ? s2 : s2.slice(0, n2 - 1) + "…";
|
|
328290
328290
|
}
|
|
328291
|
+
function extractToolJson(text) {
|
|
328292
|
+
const lines = text.split(/\r?\n/);
|
|
328293
|
+
for (const line of lines) {
|
|
328294
|
+
const t2 = line.trim();
|
|
328295
|
+
if (!t2.startsWith("{") || !t2.endsWith("}")) continue;
|
|
328296
|
+
try {
|
|
328297
|
+
const obj = JSON.parse(t2);
|
|
328298
|
+
if (typeof obj.tool === "string") {
|
|
328299
|
+
const name10 = obj.tool;
|
|
328300
|
+
const args = obj.args && typeof obj.args === "object" ? obj.args : {};
|
|
328301
|
+
return { name: name10, args };
|
|
328302
|
+
}
|
|
328303
|
+
} catch {
|
|
328304
|
+
}
|
|
328305
|
+
}
|
|
328306
|
+
return null;
|
|
328307
|
+
}
|
|
328291
328308
|
var VAD_SILENCE_MS, MAX_SEGMENT_MS, MAX_CONTEXT_TURNS, SYSTEM_PROMPT2, MIN_SIGNAL_SCORE, NOISE_ONLY_RE, VoiceChatSession;
|
|
328292
328309
|
var init_voicechat = __esm({
|
|
328293
328310
|
"packages/cli/src/tui/voicechat.ts"() {
|
|
@@ -328295,7 +328312,7 @@ var init_voicechat = __esm({
|
|
|
328295
328312
|
VAD_SILENCE_MS = 2e3;
|
|
328296
328313
|
MAX_SEGMENT_MS = 6500;
|
|
328297
328314
|
MAX_CONTEXT_TURNS = 20;
|
|
328298
|
-
SYSTEM_PROMPT2 = `You are a voice assistant having a live spoken conversation. Keep responses extremely brief — 1-2 sentences max. You're speaking aloud, not writing. Be conversational, direct, and helpful. Don't use markdown
|
|
328315
|
+
SYSTEM_PROMPT2 = `You are a voice assistant having a live spoken conversation. Keep responses extremely brief — 1-2 sentences max. You're speaking aloud, not writing. Be conversational, direct, and helpful. Don't use markdown or formatting — just natural speech. Never invent environment facts (like cwd, OS, specs). If you need a precise fact from the main agent, output on a single line EXACTLY one JSON object with fields {"tool": string, "args": object} and nothing else; then wait for the tool result before answering. Prefer tools for factual queries; otherwise, answer directly.`;
|
|
328299
328316
|
MIN_SIGNAL_SCORE = 0.4;
|
|
328300
328317
|
NOISE_ONLY_RE = /^(?:[.·…\s,;:!?\-–—_()\[\]{}"'`]+|(?:uh|um|erm|hmm|mm+|uhh+|umm+)[\s.!?]*)+$/i;
|
|
328301
328318
|
VoiceChatSession = class extends EventEmitter10 {
|
|
@@ -328305,6 +328322,9 @@ var init_voicechat = __esm({
|
|
|
328305
328322
|
model;
|
|
328306
328323
|
apiKey;
|
|
328307
328324
|
runner;
|
|
328325
|
+
verbose = false;
|
|
328326
|
+
debugSnr = false;
|
|
328327
|
+
toolRelay = null;
|
|
328308
328328
|
// State machine
|
|
328309
328329
|
_state = "IDLE";
|
|
328310
328330
|
active = false;
|
|
@@ -328337,6 +328357,9 @@ var init_voicechat = __esm({
|
|
|
328337
328357
|
this.model = opts.model;
|
|
328338
328358
|
this.apiKey = opts.apiKey ?? "";
|
|
328339
328359
|
this.runner = opts.runner ?? null;
|
|
328360
|
+
this.verbose = Boolean(opts.verbose);
|
|
328361
|
+
this.debugSnr = Boolean(opts.debugSnr);
|
|
328362
|
+
this.toolRelay = opts.toolRelay ?? null;
|
|
328340
328363
|
this.onStatus = opts.onStatus ?? (() => {
|
|
328341
328364
|
});
|
|
328342
328365
|
this.onUserSpeech = opts.onUserSpeech ?? (() => {
|
|
@@ -328376,7 +328399,7 @@ var init_voicechat = __esm({
|
|
|
328376
328399
|
this.active = true;
|
|
328377
328400
|
this.context = [{ role: "system", content: SYSTEM_PROMPT2 }];
|
|
328378
328401
|
this.turnCount = 0;
|
|
328379
|
-
this.onStatus("VoiceChat
|
|
328402
|
+
if (this.verbose) this.onStatus("VoiceChat active — LISTENING");
|
|
328380
328403
|
this._onTranscript = (...args) => {
|
|
328381
328404
|
let text;
|
|
328382
328405
|
let isFinal;
|
|
@@ -328406,7 +328429,7 @@ var init_voicechat = __esm({
|
|
|
328406
328429
|
await this.listen.stop().catch(() => {
|
|
328407
328430
|
});
|
|
328408
328431
|
await this.listen.start();
|
|
328409
|
-
this.onStatus("Mic auto-recovered — LISTENING");
|
|
328432
|
+
if (this.verbose) this.onStatus("Mic auto-recovered — LISTENING");
|
|
328410
328433
|
} catch {
|
|
328411
328434
|
}
|
|
328412
328435
|
}, 1e3);
|
|
@@ -328417,11 +328440,9 @@ var init_voicechat = __esm({
|
|
|
328417
328440
|
try {
|
|
328418
328441
|
await this.listen.start();
|
|
328419
328442
|
this.setState("LISTENING");
|
|
328420
|
-
this.onStatus("Mic active — LISTENING for speech...");
|
|
328443
|
+
if (this.verbose) this.onStatus("Mic active — LISTENING for speech...");
|
|
328421
328444
|
} catch (err) {
|
|
328422
|
-
this.onStatus(
|
|
328423
|
-
`Mic failed: ${err instanceof Error ? err.message : String(err)}. VoiceChat active without mic.`
|
|
328424
|
-
);
|
|
328445
|
+
this.onStatus(`Mic failed: ${err instanceof Error ? err.message : String(err)}. VoiceChat active without mic.`);
|
|
328425
328446
|
this.setState("LISTENING");
|
|
328426
328447
|
}
|
|
328427
328448
|
}
|
|
@@ -328456,7 +328477,7 @@ var init_voicechat = __esm({
|
|
|
328456
328477
|
} catch {
|
|
328457
328478
|
}
|
|
328458
328479
|
this.setState("IDLE");
|
|
328459
|
-
this.onStatus("VoiceChat ended");
|
|
328480
|
+
if (this.verbose) this.onStatus("VoiceChat ended");
|
|
328460
328481
|
this.emit("stopped");
|
|
328461
328482
|
}
|
|
328462
328483
|
// ---------------------------------------------------------------------------
|
|
@@ -328512,7 +328533,7 @@ var init_voicechat = __esm({
|
|
|
328512
328533
|
}
|
|
328513
328534
|
const score = this.lastSignalScore ?? computeSignalFromText(text);
|
|
328514
328535
|
if (score < MIN_SIGNAL_SCORE || NOISE_ONLY_RE.test(text)) {
|
|
328515
|
-
this.onStatus(`Ignoring low-signal utterance (SNR:${score.toFixed(2)}): ${truncateForLog(text, 48)}`);
|
|
328536
|
+
if (this.debugSnr) this.onStatus(`Ignoring low-signal utterance (SNR:${score.toFixed(2)}): ${truncateForLog(text, 48)}`);
|
|
328516
328537
|
this.emit("snrFiltered", { score, text });
|
|
328517
328538
|
this.setState("LISTENING");
|
|
328518
328539
|
this.captureBuffer = "";
|
|
@@ -328540,10 +328561,33 @@ var init_voicechat = __esm({
|
|
|
328540
328561
|
async think() {
|
|
328541
328562
|
if (!this.active) return;
|
|
328542
328563
|
this.setState("THINKING");
|
|
328543
|
-
this.onStatus("Thinking...");
|
|
328564
|
+
if (this.verbose) this.onStatus("Thinking...");
|
|
328544
328565
|
this.abortController = new AbortController();
|
|
328545
328566
|
try {
|
|
328546
|
-
|
|
328567
|
+
if (this.toolRelay?.contextSnapshot) {
|
|
328568
|
+
try {
|
|
328569
|
+
const snap = await Promise.resolve(this.toolRelay.contextSnapshot());
|
|
328570
|
+
if (snap && snap.trim()) {
|
|
328571
|
+
this.context.push({ role: "system", content: `Context snapshot (read-only):
|
|
328572
|
+
${snap.trim()}` });
|
|
328573
|
+
}
|
|
328574
|
+
} catch {
|
|
328575
|
+
}
|
|
328576
|
+
}
|
|
328577
|
+
let response = await this.streamOllamaInference(this.abortController.signal);
|
|
328578
|
+
const toolReq = extractToolJson(response);
|
|
328579
|
+
if (toolReq && this.toolRelay) {
|
|
328580
|
+
const { name: name10, args } = toolReq;
|
|
328581
|
+
let toolOutput = "";
|
|
328582
|
+
try {
|
|
328583
|
+
toolOutput = await this.toolRelay.call(name10, args);
|
|
328584
|
+
} catch (e2) {
|
|
328585
|
+
toolOutput = `Tool ${name10} failed: ${e2 instanceof Error ? e2.message : String(e2)}`;
|
|
328586
|
+
}
|
|
328587
|
+
this.context.push({ role: "system", content: `Tool ${name10} result (authoritative):
|
|
328588
|
+
${toolOutput}` });
|
|
328589
|
+
response = await this.streamOllamaInference(this.abortController.signal);
|
|
328590
|
+
}
|
|
328547
328591
|
if (!this.active) return;
|
|
328548
328592
|
if (response.trim()) {
|
|
328549
328593
|
this.context.push({ role: "assistant", content: response.trim() });
|
|
@@ -328574,7 +328618,7 @@ var init_voicechat = __esm({
|
|
|
328574
328618
|
}
|
|
328575
328619
|
if (this.active) {
|
|
328576
328620
|
this.setState("LISTENING");
|
|
328577
|
-
this.onStatus("LISTENING...");
|
|
328621
|
+
if (this.verbose) this.onStatus("LISTENING...");
|
|
328578
328622
|
}
|
|
328579
328623
|
}
|
|
328580
328624
|
/**
|
|
@@ -333127,36 +333171,87 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
333127
333171
|
model: currentConfig.model,
|
|
333128
333172
|
apiKey: currentConfig.apiKey,
|
|
333129
333173
|
runner: summaryRunner,
|
|
333174
|
+
toolRelay: {
|
|
333175
|
+
async call(name10, args) {
|
|
333176
|
+
try {
|
|
333177
|
+
if (name10 === "voice_env") {
|
|
333178
|
+
const os8 = __require("node:os");
|
|
333179
|
+
const p2 = __require("node:process");
|
|
333180
|
+
const info = {
|
|
333181
|
+
cwd: repoRoot,
|
|
333182
|
+
platform: os8.platform(),
|
|
333183
|
+
arch: os8.arch(),
|
|
333184
|
+
cpu: (os8.cpus() || [])[0]?.model || "unknown",
|
|
333185
|
+
memGB: Math.round(os8.totalmem() / (1024 * 1024 * 1024)),
|
|
333186
|
+
node: p2.version,
|
|
333187
|
+
model: currentConfig.model
|
|
333188
|
+
};
|
|
333189
|
+
return JSON.stringify(info, null, 2);
|
|
333190
|
+
}
|
|
333191
|
+
if (name10 === "voice_status") {
|
|
333192
|
+
const status = activeTask ? {
|
|
333193
|
+
active: true,
|
|
333194
|
+
toolCalls: activeTask.toolCallCount,
|
|
333195
|
+
filesTouched: Array.from(activeTask.filesTouched).slice(-20)
|
|
333196
|
+
} : { active: false };
|
|
333197
|
+
return JSON.stringify(status, null, 2);
|
|
333198
|
+
}
|
|
333199
|
+
if (name10 === "voice_list_files") {
|
|
333200
|
+
const baseDir = String(args?.dir ?? ".");
|
|
333201
|
+
const { readdirSync: readdirSync31, statSync: statSync25 } = __require("node:fs");
|
|
333202
|
+
const { join: join106, resolve: resolve40 } = __require("node:path");
|
|
333203
|
+
const base3 = baseDir.startsWith("/") ? baseDir : resolve40(join106(repoRoot, baseDir));
|
|
333204
|
+
const items = readdirSync31(base3).slice(0, 200).map((f2) => {
|
|
333205
|
+
const s2 = statSync25(join106(base3, f2));
|
|
333206
|
+
return { name: f2, dir: s2.isDirectory(), size: s2.size };
|
|
333207
|
+
});
|
|
333208
|
+
return JSON.stringify({ dir: base3, items }, null, 2);
|
|
333209
|
+
}
|
|
333210
|
+
if (name10 === "voice_read_file") {
|
|
333211
|
+
const { readFileSync: readFileSync68 } = __require("node:fs");
|
|
333212
|
+
const { join: join106, resolve: resolve40 } = __require("node:path");
|
|
333213
|
+
const rel = String(args?.path || "");
|
|
333214
|
+
const max = Math.max(0, Math.min(8192, Number(args?.max) || 2048));
|
|
333215
|
+
const full = rel.startsWith("/") ? rel : resolve40(join106(repoRoot, rel));
|
|
333216
|
+
const buf = readFileSync68(full);
|
|
333217
|
+
const txt = buf.toString("utf8");
|
|
333218
|
+
return txt.length > max ? txt.slice(0, max) + `
|
|
333219
|
+
... [truncated ${txt.length - max} chars]` : txt;
|
|
333220
|
+
}
|
|
333221
|
+
return `Unknown tool: ${name10}`;
|
|
333222
|
+
} catch (e2) {
|
|
333223
|
+
return `Error: ${e2?.message || String(e2)}`;
|
|
333224
|
+
}
|
|
333225
|
+
},
|
|
333226
|
+
contextSnapshot() {
|
|
333227
|
+
const parts = [];
|
|
333228
|
+
parts.push(`cwd: ${repoRoot}`);
|
|
333229
|
+
if (activeTask) {
|
|
333230
|
+
parts.push(`active: yes, toolCalls: ${activeTask.toolCallCount}, filesTouched: ${activeTask.filesTouched.size}`);
|
|
333231
|
+
} else {
|
|
333232
|
+
parts.push("active: no");
|
|
333233
|
+
}
|
|
333234
|
+
return parts.join("\n");
|
|
333235
|
+
}
|
|
333236
|
+
},
|
|
333237
|
+
verbose: false,
|
|
333238
|
+
debugSnr: false,
|
|
333130
333239
|
onStatus(msg) {
|
|
333131
333240
|
writeContent(() => renderInfo2(`[voicechat] ${msg}`));
|
|
333132
333241
|
},
|
|
333133
333242
|
onUserSpeech(text) {
|
|
333134
333243
|
writeContent(() => renderInfo2(`\x1B[38;5;45m[you]\x1B[0m ${text}`));
|
|
333135
333244
|
},
|
|
333136
|
-
|
|
333137
|
-
|
|
333138
|
-
process.stdout.write(`\r\x1B[2K\x1B[38;5;243m [hearing] ${text.slice(0, 70)}\x1B[0m`);
|
|
333139
|
-
});
|
|
333245
|
+
// Suppressed to keep main loop quiet
|
|
333246
|
+
onPartialTranscript(_text) {
|
|
333140
333247
|
},
|
|
333141
333248
|
onAgentSpeech(text) {
|
|
333142
333249
|
writeContent(() => renderInfo2(`\x1B[38;5;178m[agent]\x1B[0m ${text.slice(0, 120)}`));
|
|
333143
333250
|
},
|
|
333144
|
-
|
|
333145
|
-
|
|
333251
|
+
// Keep state changes silent
|
|
333252
|
+
onStateChange(_state2) {
|
|
333146
333253
|
}
|
|
333147
333254
|
});
|
|
333148
|
-
_voiceChatSession2.on("snr", (e2) => {
|
|
333149
|
-
const s2 = typeof e2?.score === "number" ? Math.max(0, Math.min(1, e2.score)) : null;
|
|
333150
|
-
if (s2 !== null) {
|
|
333151
|
-
writeContent(() => {
|
|
333152
|
-
process.stdout.write(`\r\x1B[2K\x1B[38;5;243m [hearing] (snr:${s2.toFixed(2)})\x1B[0m`);
|
|
333153
|
-
});
|
|
333154
|
-
}
|
|
333155
|
-
});
|
|
333156
|
-
_voiceChatSession2.on("snrFiltered", (e2) => {
|
|
333157
|
-
const s2 = typeof e2?.score === "number" ? e2.score.toFixed(2) : "?";
|
|
333158
|
-
writeContent(() => renderInfo2(`\x1B[38;5;243m[voicechat]\x1B[0m dropped low-signal utterance (SNR:${s2})`));
|
|
333159
|
-
});
|
|
333160
333255
|
await _voiceChatSession2.start();
|
|
333161
333256
|
},
|
|
333162
333257
|
async voiceChatStop() {
|
package/package.json
CHANGED