agent.libx.js 0.93.2 → 0.93.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/cli.ts +1 -1
- package/dist/cli.js +23 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.js +22 -3
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/cli/cli.ts
CHANGED
|
@@ -208,7 +208,7 @@ Flags:
|
|
|
208
208
|
impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
|
|
209
209
|
with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
|
|
210
210
|
spoken replies out (echo-cancelled; speak over it to interrupt)
|
|
211
|
-
--voice-model <id> with --duplex: the fast voice model (default
|
|
211
|
+
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
|
|
212
212
|
--add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
|
|
213
213
|
--subagents allow the Task tool (spawn child agents)
|
|
214
214
|
--reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
|
package/dist/cli.js
CHANGED
|
@@ -2900,9 +2900,17 @@ var Agent = class _Agent {
|
|
|
2900
2900
|
let res;
|
|
2901
2901
|
const sent = this.trimContext();
|
|
2902
2902
|
const frag = reasoningToChatFragment(o.model, o.reasoning);
|
|
2903
|
+
const isCursorWithTools = o.model.startsWith("cursor/") && wireTools.length > 0;
|
|
2904
|
+
const cursorPo = isCursorWithTools ? {
|
|
2905
|
+
toolExecutor: async (name, args) => {
|
|
2906
|
+
const tc = { id: `cursor-${Date.now()}`, type: "function", function: { name, arguments: JSON.stringify(args) } };
|
|
2907
|
+
const raw = await this.dispatch(tc);
|
|
2908
|
+
return typeof raw === "string" ? raw : raw.text;
|
|
2909
|
+
}
|
|
2910
|
+
} : void 0;
|
|
2903
2911
|
const reasonOpts = {
|
|
2904
2912
|
...frag,
|
|
2905
|
-
...o.providerOptions ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions } } : {}
|
|
2913
|
+
...o.providerOptions || cursorPo ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions, ...cursorPo } } : {}
|
|
2906
2914
|
};
|
|
2907
2915
|
try {
|
|
2908
2916
|
if (useStream) {
|
|
@@ -3501,7 +3509,7 @@ var DuplexAgentOptions = class {
|
|
|
3501
3509
|
ai;
|
|
3502
3510
|
/** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
|
|
3503
3511
|
fs;
|
|
3504
|
-
voiceModel = "
|
|
3512
|
+
voiceModel = "groq/openai/gpt-oss-20b";
|
|
3505
3513
|
workerModel = "anthropic/claude-sonnet-4-6";
|
|
3506
3514
|
/** Escape hatches merged over the derived per-agent options. */
|
|
3507
3515
|
voiceOptions;
|
|
@@ -4003,6 +4011,8 @@ var VoiceEngine = class {
|
|
|
4003
4011
|
lastOverlapPartial = "";
|
|
4004
4012
|
// change-detection: only NEW partial text counts as activity
|
|
4005
4013
|
resumeTimer = null;
|
|
4014
|
+
turnStartAt = 0;
|
|
4015
|
+
// timestamp when the current turn began (for TTFT logging)
|
|
4006
4016
|
constructor(options) {
|
|
4007
4017
|
this.options = { ...new VoiceEngineOptions(), ...options };
|
|
4008
4018
|
const o = this.options;
|
|
@@ -4063,6 +4073,7 @@ var VoiceEngine = class {
|
|
|
4063
4073
|
this.spokeDeltas = true;
|
|
4064
4074
|
this.ackAt = now();
|
|
4065
4075
|
}
|
|
4076
|
+
this.turnStartAt = now();
|
|
4066
4077
|
this.setState("thinking");
|
|
4067
4078
|
}
|
|
4068
4079
|
speakDelta(text) {
|
|
@@ -4071,6 +4082,7 @@ var VoiceEngine = class {
|
|
|
4071
4082
|
this.reply += text;
|
|
4072
4083
|
for (const w of this.words(this.reply)) this.echoWords.add(w);
|
|
4073
4084
|
this.tts.speak(text, true);
|
|
4085
|
+
if (!this.spokeDeltas && this.turnStartAt) log7.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
|
|
4074
4086
|
this.spokeDeltas = true;
|
|
4075
4087
|
this.setState("speaking");
|
|
4076
4088
|
}
|
|
@@ -4091,6 +4103,7 @@ var VoiceEngine = class {
|
|
|
4091
4103
|
}
|
|
4092
4104
|
this.drainTimer = null;
|
|
4093
4105
|
this.speaking = false;
|
|
4106
|
+
if (this.turnStartAt) log7.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
|
|
4094
4107
|
this.echoUntil = now() + 2500;
|
|
4095
4108
|
if (!this.usingAec) this.stt.reset();
|
|
4096
4109
|
this.setState("listening");
|
|
@@ -4127,7 +4140,7 @@ var VoiceEngine = class {
|
|
|
4127
4140
|
this.ctxOpen = false;
|
|
4128
4141
|
this.interrupted = true;
|
|
4129
4142
|
this.suspectUntil = 0;
|
|
4130
|
-
this.echoUntil = now() + 2500;
|
|
4143
|
+
this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
|
|
4131
4144
|
this.tts.cancel();
|
|
4132
4145
|
this.player.kill();
|
|
4133
4146
|
if (!this.usingAec) this.stt.reset();
|
|
@@ -4322,6 +4335,8 @@ var SonioxSTT = class {
|
|
|
4322
4335
|
lastChangeAt = 0;
|
|
4323
4336
|
lastCombined = "";
|
|
4324
4337
|
endpointTimer = null;
|
|
4338
|
+
firstTokenAt = 0;
|
|
4339
|
+
// first speech token in current utterance
|
|
4325
4340
|
constructor(options) {
|
|
4326
4341
|
this.options = { ...new SonioxSTTOptions(), ...options };
|
|
4327
4342
|
}
|
|
@@ -4361,6 +4376,7 @@ var SonioxSTT = class {
|
|
|
4361
4376
|
this.endpointTimer = setInterval(() => {
|
|
4362
4377
|
const combined = (this.finalText + this.partialText).trim();
|
|
4363
4378
|
if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
|
|
4379
|
+
if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
|
|
4364
4380
|
this.reset();
|
|
4365
4381
|
this.onUtterance(combined, now2());
|
|
4366
4382
|
}, 120);
|
|
@@ -4388,10 +4404,12 @@ var SonioxSTT = class {
|
|
|
4388
4404
|
if (combined !== this.lastCombined) {
|
|
4389
4405
|
this.lastCombined = combined;
|
|
4390
4406
|
this.lastChangeAt = now2();
|
|
4407
|
+
if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
|
|
4391
4408
|
}
|
|
4392
4409
|
this.onPartial(combined);
|
|
4393
4410
|
if (endpoint && this.finalText.trim()) {
|
|
4394
4411
|
const utterance = this.finalText.trim();
|
|
4412
|
+
if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
|
|
4395
4413
|
this.reset();
|
|
4396
4414
|
this.onUtterance(utterance, now2());
|
|
4397
4415
|
}
|
|
@@ -4400,6 +4418,7 @@ var SonioxSTT = class {
|
|
|
4400
4418
|
this.finalText = "";
|
|
4401
4419
|
this.partialText = "";
|
|
4402
4420
|
this.lastCombined = "";
|
|
4421
|
+
this.firstTokenAt = 0;
|
|
4403
4422
|
}
|
|
4404
4423
|
stop() {
|
|
4405
4424
|
this.stopped = true;
|
|
@@ -7405,7 +7424,7 @@ Flags:
|
|
|
7405
7424
|
impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
|
|
7406
7425
|
with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
|
|
7407
7426
|
spoken replies out (echo-cancelled; speak over it to interrupt)
|
|
7408
|
-
--voice-model <id> with --duplex: the fast voice model (default
|
|
7427
|
+
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
|
|
7409
7428
|
--add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
|
|
7410
7429
|
--subagents allow the Task tool (spawn child agents)
|
|
7411
7430
|
--reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
|