agent.libx.js 0.93.1 → 0.93.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/cli.ts +1 -1
- package/dist/cli.js +35 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.js +34 -3
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -876,6 +876,7 @@ declare class VoiceEngine {
|
|
|
876
876
|
private pausedAt;
|
|
877
877
|
private lastOverlapPartial;
|
|
878
878
|
private resumeTimer;
|
|
879
|
+
private turnStartAt;
|
|
879
880
|
constructor(options?: Partial<VoiceEngineOptions>);
|
|
880
881
|
start(): Promise<void>;
|
|
881
882
|
get usingAec(): boolean;
|
|
@@ -942,6 +943,7 @@ declare class SonioxSTT {
|
|
|
942
943
|
private lastChangeAt;
|
|
943
944
|
private lastCombined;
|
|
944
945
|
private endpointTimer;
|
|
946
|
+
private firstTokenAt;
|
|
945
947
|
constructor(options?: Partial<SonioxSTTOptions>);
|
|
946
948
|
get usingAec(): boolean;
|
|
947
949
|
private connectWs;
|
|
@@ -967,7 +969,12 @@ declare class CartesiaTTS {
|
|
|
967
969
|
onDone: () => void;
|
|
968
970
|
firstAudioAt: number;
|
|
969
971
|
constructor(options?: Partial<CartesiaTTSOptions>);
|
|
972
|
+
private closed;
|
|
973
|
+
private connecting;
|
|
970
974
|
connect(): Promise<void>;
|
|
975
|
+
private doConnect;
|
|
976
|
+
/** Ensure the WS is open before sending — reconnects if idle-closed. */
|
|
977
|
+
private ensureConnected;
|
|
971
978
|
newContext(): string;
|
|
972
979
|
private frame;
|
|
973
980
|
speak(text: string, cont: boolean): void;
|
package/dist/index.js
CHANGED
|
@@ -3601,7 +3601,7 @@ var DuplexAgentOptions = class {
|
|
|
3601
3601
|
ai;
|
|
3602
3602
|
/** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
|
|
3603
3603
|
fs;
|
|
3604
|
-
voiceModel = "
|
|
3604
|
+
voiceModel = "groq/openai/gpt-oss-20b";
|
|
3605
3605
|
workerModel = "anthropic/claude-sonnet-4-6";
|
|
3606
3606
|
/** Escape hatches merged over the derived per-agent options. */
|
|
3607
3607
|
voiceOptions;
|
|
@@ -4198,6 +4198,8 @@ var VoiceEngine = class {
|
|
|
4198
4198
|
lastOverlapPartial = "";
|
|
4199
4199
|
// change-detection: only NEW partial text counts as activity
|
|
4200
4200
|
resumeTimer = null;
|
|
4201
|
+
turnStartAt = 0;
|
|
4202
|
+
// timestamp when the current turn began (for TTFT logging)
|
|
4201
4203
|
constructor(options) {
|
|
4202
4204
|
this.options = { ...new VoiceEngineOptions(), ...options };
|
|
4203
4205
|
const o = this.options;
|
|
@@ -4258,6 +4260,7 @@ var VoiceEngine = class {
|
|
|
4258
4260
|
this.spokeDeltas = true;
|
|
4259
4261
|
this.ackAt = now();
|
|
4260
4262
|
}
|
|
4263
|
+
this.turnStartAt = now();
|
|
4261
4264
|
this.setState("thinking");
|
|
4262
4265
|
}
|
|
4263
4266
|
speakDelta(text) {
|
|
@@ -4266,6 +4269,7 @@ var VoiceEngine = class {
|
|
|
4266
4269
|
this.reply += text;
|
|
4267
4270
|
for (const w of this.words(this.reply)) this.echoWords.add(w);
|
|
4268
4271
|
this.tts.speak(text, true);
|
|
4272
|
+
if (!this.spokeDeltas && this.turnStartAt) log8.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
|
|
4269
4273
|
this.spokeDeltas = true;
|
|
4270
4274
|
this.setState("speaking");
|
|
4271
4275
|
}
|
|
@@ -4286,6 +4290,7 @@ var VoiceEngine = class {
|
|
|
4286
4290
|
}
|
|
4287
4291
|
this.drainTimer = null;
|
|
4288
4292
|
this.speaking = false;
|
|
4293
|
+
if (this.turnStartAt) log8.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
|
|
4289
4294
|
this.echoUntil = now() + 2500;
|
|
4290
4295
|
if (!this.usingAec) this.stt.reset();
|
|
4291
4296
|
this.setState("listening");
|
|
@@ -4322,7 +4327,7 @@ var VoiceEngine = class {
|
|
|
4322
4327
|
this.ctxOpen = false;
|
|
4323
4328
|
this.interrupted = true;
|
|
4324
4329
|
this.suspectUntil = 0;
|
|
4325
|
-
this.echoUntil = now() + 2500;
|
|
4330
|
+
this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
|
|
4326
4331
|
this.tts.cancel();
|
|
4327
4332
|
this.player.kill();
|
|
4328
4333
|
if (!this.usingAec) this.stt.reset();
|
|
@@ -4517,6 +4522,8 @@ var SonioxSTT = class {
|
|
|
4517
4522
|
lastChangeAt = 0;
|
|
4518
4523
|
lastCombined = "";
|
|
4519
4524
|
endpointTimer = null;
|
|
4525
|
+
firstTokenAt = 0;
|
|
4526
|
+
// first speech token in current utterance
|
|
4520
4527
|
constructor(options) {
|
|
4521
4528
|
this.options = { ...new SonioxSTTOptions(), ...options };
|
|
4522
4529
|
}
|
|
@@ -4556,6 +4563,7 @@ var SonioxSTT = class {
|
|
|
4556
4563
|
this.endpointTimer = setInterval(() => {
|
|
4557
4564
|
const combined = (this.finalText + this.partialText).trim();
|
|
4558
4565
|
if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
|
|
4566
|
+
if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
|
|
4559
4567
|
this.reset();
|
|
4560
4568
|
this.onUtterance(combined, now2());
|
|
4561
4569
|
}, 120);
|
|
@@ -4583,10 +4591,12 @@ var SonioxSTT = class {
|
|
|
4583
4591
|
if (combined !== this.lastCombined) {
|
|
4584
4592
|
this.lastCombined = combined;
|
|
4585
4593
|
this.lastChangeAt = now2();
|
|
4594
|
+
if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
|
|
4586
4595
|
}
|
|
4587
4596
|
this.onPartial(combined);
|
|
4588
4597
|
if (endpoint && this.finalText.trim()) {
|
|
4589
4598
|
const utterance = this.finalText.trim();
|
|
4599
|
+
if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
|
|
4590
4600
|
this.reset();
|
|
4591
4601
|
this.onUtterance(utterance, now2());
|
|
4592
4602
|
}
|
|
@@ -4595,6 +4605,7 @@ var SonioxSTT = class {
|
|
|
4595
4605
|
this.finalText = "";
|
|
4596
4606
|
this.partialText = "";
|
|
4597
4607
|
this.lastCombined = "";
|
|
4608
|
+
this.firstTokenAt = 0;
|
|
4598
4609
|
}
|
|
4599
4610
|
stop() {
|
|
4600
4611
|
this.stopped = true;
|
|
@@ -4629,7 +4640,15 @@ var CartesiaTTS = class {
|
|
|
4629
4640
|
constructor(options) {
|
|
4630
4641
|
this.options = { ...new CartesiaTTSOptions(), ...options };
|
|
4631
4642
|
}
|
|
4643
|
+
closed = false;
|
|
4644
|
+
connecting = null;
|
|
4632
4645
|
async connect() {
|
|
4646
|
+
this.closed = false;
|
|
4647
|
+
this.connecting = this.doConnect();
|
|
4648
|
+
await this.connecting;
|
|
4649
|
+
this.connecting = null;
|
|
4650
|
+
}
|
|
4651
|
+
async doConnect() {
|
|
4633
4652
|
const key = await resolveAuth(this.options.auth);
|
|
4634
4653
|
const param = this.options.authMode === "token" ? "access_token" : "api_key";
|
|
4635
4654
|
this.ws = new WebSocket(`wss://api.cartesia.ai/tts/websocket?cartesia_version=2026-03-01&${param}=${key}`);
|
|
@@ -4637,7 +4656,12 @@ var CartesiaTTS = class {
|
|
|
4637
4656
|
this.ws.onopen = () => res();
|
|
4638
4657
|
this.ws.onerror = (e) => rej(new Error(`cartesia ws: ${e.message || "connect failed"}`));
|
|
4639
4658
|
});
|
|
4640
|
-
this.ws.onclose = (ev) =>
|
|
4659
|
+
this.ws.onclose = (ev) => {
|
|
4660
|
+
log10.warn(`cartesia ws closed (${ev.code} ${ev.reason || ""})`);
|
|
4661
|
+
if (!this.closed) {
|
|
4662
|
+
this.connecting = this.doConnect().catch((e) => log10.error(`cartesia reconnect failed: ${e.message}`));
|
|
4663
|
+
}
|
|
4664
|
+
};
|
|
4641
4665
|
this.ws.onmessage = (ev) => {
|
|
4642
4666
|
const m = JSON.parse(String(ev.data));
|
|
4643
4667
|
if (m.context_id && m.context_id !== this.ctxId) return;
|
|
@@ -4648,6 +4672,11 @@ var CartesiaTTS = class {
|
|
|
4648
4672
|
else if (m.type === "error" && !/already been cancelled|does not exist/.test(m.message || "")) log10.warn(`cartesia: ${JSON.stringify(m)}`);
|
|
4649
4673
|
};
|
|
4650
4674
|
}
|
|
4675
|
+
/** Ensure the WS is open before sending — reconnects if idle-closed. */
|
|
4676
|
+
async ensureConnected() {
|
|
4677
|
+
if (this.connecting) await this.connecting;
|
|
4678
|
+
if (this.ws?.readyState !== WebSocket.OPEN) await this.connect();
|
|
4679
|
+
}
|
|
4651
4680
|
newContext() {
|
|
4652
4681
|
this.ctxId = `ctx-${++this.ctxSeq}`;
|
|
4653
4682
|
this.firstAudioAt = 0;
|
|
@@ -4665,6 +4694,7 @@ var CartesiaTTS = class {
|
|
|
4665
4694
|
}
|
|
4666
4695
|
speak(text, cont) {
|
|
4667
4696
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(text, cont));
|
|
4697
|
+
else void this.ensureConnected().then(() => this.ws?.readyState === WebSocket.OPEN && this.ws.send(this.frame(text, cont)));
|
|
4668
4698
|
}
|
|
4669
4699
|
end() {
|
|
4670
4700
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame("", false));
|
|
@@ -4673,6 +4703,7 @@ var CartesiaTTS = class {
|
|
|
4673
4703
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify({ context_id: this.ctxId, cancel: true }));
|
|
4674
4704
|
}
|
|
4675
4705
|
close() {
|
|
4706
|
+
this.closed = true;
|
|
4676
4707
|
if (this.ws) this.ws.onclose = null;
|
|
4677
4708
|
this.ws?.close();
|
|
4678
4709
|
}
|