agent.libx.js 0.93.1 → 0.93.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -876,6 +876,7 @@ declare class VoiceEngine {
876
876
  private pausedAt;
877
877
  private lastOverlapPartial;
878
878
  private resumeTimer;
879
+ private turnStartAt;
879
880
  constructor(options?: Partial<VoiceEngineOptions>);
880
881
  start(): Promise<void>;
881
882
  get usingAec(): boolean;
@@ -942,6 +943,7 @@ declare class SonioxSTT {
942
943
  private lastChangeAt;
943
944
  private lastCombined;
944
945
  private endpointTimer;
946
+ private firstTokenAt;
945
947
  constructor(options?: Partial<SonioxSTTOptions>);
946
948
  get usingAec(): boolean;
947
949
  private connectWs;
@@ -967,7 +969,12 @@ declare class CartesiaTTS {
967
969
  onDone: () => void;
968
970
  firstAudioAt: number;
969
971
  constructor(options?: Partial<CartesiaTTSOptions>);
972
+ private closed;
973
+ private connecting;
970
974
  connect(): Promise<void>;
975
+ private doConnect;
976
+ /** Ensure the WS is open before sending — reconnects if idle-closed. */
977
+ private ensureConnected;
971
978
  newContext(): string;
972
979
  private frame;
973
980
  speak(text: string, cont: boolean): void;
package/dist/index.js CHANGED
@@ -3601,7 +3601,7 @@ var DuplexAgentOptions = class {
3601
3601
  ai;
3602
3602
  /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3603
3603
  fs;
3604
- voiceModel = "anthropic/claude-haiku-4-5";
3604
+ voiceModel = "groq/openai/gpt-oss-20b";
3605
3605
  workerModel = "anthropic/claude-sonnet-4-6";
3606
3606
  /** Escape hatches merged over the derived per-agent options. */
3607
3607
  voiceOptions;
@@ -4198,6 +4198,8 @@ var VoiceEngine = class {
4198
4198
  lastOverlapPartial = "";
4199
4199
  // change-detection: only NEW partial text counts as activity
4200
4200
  resumeTimer = null;
4201
+ turnStartAt = 0;
4202
+ // timestamp when the current turn began (for TTFT logging)
4201
4203
  constructor(options) {
4202
4204
  this.options = { ...new VoiceEngineOptions(), ...options };
4203
4205
  const o = this.options;
@@ -4258,6 +4260,7 @@ var VoiceEngine = class {
4258
4260
  this.spokeDeltas = true;
4259
4261
  this.ackAt = now();
4260
4262
  }
4263
+ this.turnStartAt = now();
4261
4264
  this.setState("thinking");
4262
4265
  }
4263
4266
  speakDelta(text) {
@@ -4266,6 +4269,7 @@ var VoiceEngine = class {
4266
4269
  this.reply += text;
4267
4270
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4268
4271
  this.tts.speak(text, true);
4272
+ if (!this.spokeDeltas && this.turnStartAt) log8.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4269
4273
  this.spokeDeltas = true;
4270
4274
  this.setState("speaking");
4271
4275
  }
@@ -4286,6 +4290,7 @@ var VoiceEngine = class {
4286
4290
  }
4287
4291
  this.drainTimer = null;
4288
4292
  this.speaking = false;
4293
+ if (this.turnStartAt) log8.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4289
4294
  this.echoUntil = now() + 2500;
4290
4295
  if (!this.usingAec) this.stt.reset();
4291
4296
  this.setState("listening");
@@ -4322,7 +4327,7 @@ var VoiceEngine = class {
4322
4327
  this.ctxOpen = false;
4323
4328
  this.interrupted = true;
4324
4329
  this.suspectUntil = 0;
4325
- this.echoUntil = now() + 2500;
4330
+ this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
4326
4331
  this.tts.cancel();
4327
4332
  this.player.kill();
4328
4333
  if (!this.usingAec) this.stt.reset();
@@ -4517,6 +4522,8 @@ var SonioxSTT = class {
4517
4522
  lastChangeAt = 0;
4518
4523
  lastCombined = "";
4519
4524
  endpointTimer = null;
4525
+ firstTokenAt = 0;
4526
+ // first speech token in current utterance
4520
4527
  constructor(options) {
4521
4528
  this.options = { ...new SonioxSTTOptions(), ...options };
4522
4529
  }
@@ -4556,6 +4563,7 @@ var SonioxSTT = class {
4556
4563
  this.endpointTimer = setInterval(() => {
4557
4564
  const combined = (this.finalText + this.partialText).trim();
4558
4565
  if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
4566
+ if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4559
4567
  this.reset();
4560
4568
  this.onUtterance(combined, now2());
4561
4569
  }, 120);
@@ -4583,10 +4591,12 @@ var SonioxSTT = class {
4583
4591
  if (combined !== this.lastCombined) {
4584
4592
  this.lastCombined = combined;
4585
4593
  this.lastChangeAt = now2();
4594
+ if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
4586
4595
  }
4587
4596
  this.onPartial(combined);
4588
4597
  if (endpoint && this.finalText.trim()) {
4589
4598
  const utterance = this.finalText.trim();
4599
+ if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4590
4600
  this.reset();
4591
4601
  this.onUtterance(utterance, now2());
4592
4602
  }
@@ -4595,6 +4605,7 @@ var SonioxSTT = class {
4595
4605
  this.finalText = "";
4596
4606
  this.partialText = "";
4597
4607
  this.lastCombined = "";
4608
+ this.firstTokenAt = 0;
4598
4609
  }
4599
4610
  stop() {
4600
4611
  this.stopped = true;
@@ -4629,7 +4640,15 @@ var CartesiaTTS = class {
4629
4640
  constructor(options) {
4630
4641
  this.options = { ...new CartesiaTTSOptions(), ...options };
4631
4642
  }
4643
+ closed = false;
4644
+ connecting = null;
4632
4645
  async connect() {
4646
+ this.closed = false;
4647
+ this.connecting = this.doConnect();
4648
+ await this.connecting;
4649
+ this.connecting = null;
4650
+ }
4651
+ async doConnect() {
4633
4652
  const key = await resolveAuth(this.options.auth);
4634
4653
  const param = this.options.authMode === "token" ? "access_token" : "api_key";
4635
4654
  this.ws = new WebSocket(`wss://api.cartesia.ai/tts/websocket?cartesia_version=2026-03-01&${param}=${key}`);
@@ -4637,7 +4656,12 @@ var CartesiaTTS = class {
4637
4656
  this.ws.onopen = () => res();
4638
4657
  this.ws.onerror = (e) => rej(new Error(`cartesia ws: ${e.message || "connect failed"}`));
4639
4658
  });
4640
- this.ws.onclose = (ev) => log10.warn(`cartesia ws closed (${ev.code} ${ev.reason || ""})`);
4659
+ this.ws.onclose = (ev) => {
4660
+ log10.warn(`cartesia ws closed (${ev.code} ${ev.reason || ""})`);
4661
+ if (!this.closed) {
4662
+ this.connecting = this.doConnect().catch((e) => log10.error(`cartesia reconnect failed: ${e.message}`));
4663
+ }
4664
+ };
4641
4665
  this.ws.onmessage = (ev) => {
4642
4666
  const m = JSON.parse(String(ev.data));
4643
4667
  if (m.context_id && m.context_id !== this.ctxId) return;
@@ -4648,6 +4672,11 @@ var CartesiaTTS = class {
4648
4672
  else if (m.type === "error" && !/already been cancelled|does not exist/.test(m.message || "")) log10.warn(`cartesia: ${JSON.stringify(m)}`);
4649
4673
  };
4650
4674
  }
4675
+ /** Ensure the WS is open before sending — reconnects if idle-closed. */
4676
+ async ensureConnected() {
4677
+ if (this.connecting) await this.connecting;
4678
+ if (this.ws?.readyState !== WebSocket.OPEN) await this.connect();
4679
+ }
4651
4680
  newContext() {
4652
4681
  this.ctxId = `ctx-${++this.ctxSeq}`;
4653
4682
  this.firstAudioAt = 0;
@@ -4665,6 +4694,7 @@ var CartesiaTTS = class {
4665
4694
  }
4666
4695
  speak(text, cont) {
4667
4696
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(text, cont));
4697
+ else void this.ensureConnected().then(() => this.ws?.readyState === WebSocket.OPEN && this.ws.send(this.frame(text, cont)));
4668
4698
  }
4669
4699
  end() {
4670
4700
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame("", false));
@@ -4673,6 +4703,7 @@ var CartesiaTTS = class {
4673
4703
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify({ context_id: this.ctxId, cancel: true }));
4674
4704
  }
4675
4705
  close() {
4706
+ this.closed = true;
4676
4707
  if (this.ws) this.ws.onclose = null;
4677
4708
  this.ws?.close();
4678
4709
  }