agent.libx.js 0.93.43 → 0.93.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.d.ts CHANGED
@@ -92,6 +92,10 @@ interface Args {
92
92
  scratch?: boolean;
93
93
  }
94
94
  declare function parseArgs(argv: string[]): Args;
95
+ /** Synthetic task-event user messages ("[task t2 completed] <multi-KB worker dump>") aren't real user
96
+ * utterances — replaying the raw payload newline-collapsed + hard-cut at N chars garbles it (slices URLs
97
+ * mid-token). Condense to a one-line marker. Non-task text is returned unchanged. */
98
+ declare function condenseReplay(t: string): string;
95
99
  /** Render a resumed conversation (like CC) so the user sees the context they're continuing: user
96
100
  * prompts, assistant narration, and a condensed line per tool action. Tool *results* are omitted
97
101
  * (verbose) and inlined @-mention blocks are stripped from prompts. Pure → unit-testable. */
@@ -200,4 +204,4 @@ declare function jsonResult(res: RunResult, session: SessionData): {
200
204
  */
201
205
  declare function readMultiline(readLine: (continuing: boolean) => Promise<string | null>): Promise<string | null>;
202
206
 
203
- export { type PermMode, appendMemoryNote, cacheMultipliers, costOf, estimateTranscriptTokens, expandMentions, exportMarkdown, fmtUsd, formatHistory, formatStatus, jsonResult, parseArgs, pastePathClassifier, readImageParts, readMultiline, resolvePermMode, runShellLine };
207
+ export { type PermMode, appendMemoryNote, cacheMultipliers, condenseReplay, costOf, estimateTranscriptTokens, expandMentions, exportMarkdown, fmtUsd, formatHistory, formatStatus, jsonResult, parseArgs, pastePathClassifier, readImageParts, readMultiline, resolvePermMode, runShellLine };
package/dist/cli.js CHANGED
@@ -2659,6 +2659,11 @@ function reasoningToChatFragment(model, effort) {
2659
2659
 
2660
2660
  // src/Agent.ts
2661
2661
  var log3 = forComponent("Agent");
2662
+ function isAbortError(err2) {
2663
+ const e = err2;
2664
+ const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
2665
+ return /operation was aborted|\bAbortError\b|ABORT_ERR|\[canceled\]/i.test(blob);
2666
+ }
2662
2667
  var AgentOptions = class {
2663
2668
  /** Any ai.libx.js AIClient (or a FakeAIClient). */
2664
2669
  ai;
@@ -2985,7 +2990,7 @@ var Agent = class _Agent {
2985
2990
  }
2986
2991
  } catch (err2) {
2987
2992
  if (err2?.code === "budget") return kill("budget");
2988
- if (o.signal?.aborted) return kill("aborted");
2993
+ if (o.signal?.aborted || isAbortError(err2)) return kill("aborted");
2989
2994
  log3.error(`chat() failed: ${err2?.message ?? err2}`, err2);
2990
2995
  return { text: "", steps, finishReason: "error", messages: this.transcript, usage, usageEstimated, error: err2 };
2991
2996
  }
@@ -3004,11 +3009,14 @@ var Agent = class _Agent {
3004
3009
  usage.cacheReadTokens += res.usage.cacheReadTokens ?? 0;
3005
3010
  }
3006
3011
  const toolCalls = res.toolCalls ?? [];
3007
- this.transcript.push({
3008
- role: "assistant",
3009
- content: res.content ?? "",
3010
- ...toolCalls.length ? { tool_calls: toolCalls } : {}
3011
- });
3012
+ const emptyTurn = toolCalls.length === 0 && contentText(res.content ?? "").trim() === "";
3013
+ if (!emptyTurn) {
3014
+ this.transcript.push({
3015
+ role: "assistant",
3016
+ content: res.content ?? "",
3017
+ ...toolCalls.length ? { tool_calls: toolCalls } : {}
3018
+ });
3019
+ }
3012
3020
  if (toolCalls.length === 0) {
3013
3021
  log3.verbose(`completed in ${steps} step(s)`);
3014
3022
  await this.ctx.jobs?.drain();
@@ -3768,7 +3776,7 @@ var DuplexAgentOptions = class {
3768
3776
  memoryUserDir;
3769
3777
  };
3770
3778
  var RESERVED_EVENT_MARKER = /\[task\b[^\]\n]*\b(?:completed|failed|progress|asks)\b/i;
3771
- var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3779
+ var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nThis holds even when asked to "print", "list", "show", or "make a table" \u2014 there is no screen for the spoken channel. Speak it as flowing prose ("Tuesday is half a meter, Wednesday a bit less\u2026"), or if they truly need it on screen, route it to Act to render. Never emit dashes or pipes into speech.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nYou are NOT a knowledge base. For any question whose answer needs SPECIFIC verifiable facts you do not already have in hand \u2014 how to build/configure/implement something, exact API, library, entitlement, command or option names, current events, or particular numbers, dates, or names \u2014 do NOT answer from your own memory: you will confidently make things up (a fake API, a wrong entitlement, an event that did not happen). Route it to `Act`, which can search and verify, and speak only what its report says. Answer inline ONLY for general conversation, chit-chat, and trivia you are sure of, or facts you can see via QuickLook. When elaborating on a completed task ("tell me more", "the gist"), stay strictly within what that result actually said \u2014 if the user asks for something the result did not cover, that is NEW information: dispatch `Act`, do not improvise.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\nDo NOT end every turn with the same canned offer ("want a rundown?", "want the steps?"). Offer once at most; if the user pushes back, repeats themselves, or sounds unsatisfied ("you know what I mean?", "think deeper", "are you sure?"), do NOT re-offer the same thing \u2014 change approach: dispatch `Act`/`Think` to actually dig in, or ask one concrete clarifying question. Repeating a non-answer is worse than silence.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3772
3780
  var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
3773
3781
  var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
3774
3782
  var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
@@ -4409,6 +4417,7 @@ init_logging();
4409
4417
  init_logging();
4410
4418
  var log8 = forComponent("VoiceEngine");
4411
4419
  var now = () => performance.now();
4420
+ var forSpeech = (t) => t.replace(/[*_`#]+/g, "").replace(/^[ \t]*[-•]\s+/gm, "").replace(/\s*[\u2013\u2014]\s*/g, ", ").replace(/[\u2010\u2011]/g, "-").replace(/\s*\|\s*/g, ", ").replace(/(\d)\s+%/g, "$1%").replace(/\.{3,}/g, ".");
4412
4421
  var VoiceEngineOptions = class {
4413
4422
  stt;
4414
4423
  tts;
@@ -4565,7 +4574,7 @@ var VoiceEngine = class _VoiceEngine {
4565
4574
  if (!this.speaking || !this.ctxOpen) this.beginSpeech();
4566
4575
  this.reply += text;
4567
4576
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4568
- this.tts.speak(text, true);
4577
+ this.tts.speak(forSpeech(text), true);
4569
4578
  if (!this.spokeDeltas && this.turnStartAt) log8.debug(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4570
4579
  this.spokeDeltas = true;
4571
4580
  this.setState("speaking");
@@ -4971,9 +4980,14 @@ var CartesiaTTS = class _CartesiaTTS {
4971
4980
  firstAudioAt = 0;
4972
4981
  /** Circuit breaker: consecutive error count + down flag. */
4973
4982
  consecutiveErrors = 0;
4983
+ consecutiveOk = 0;
4974
4984
  down = false;
4985
+ downAt = 0;
4975
4986
  probeTimer = null;
4976
4987
  static CB_THRESHOLD = 3;
4988
+ // open after 3 consecutive errors
4989
+ static CB_RECOVER_OK = 2;
4990
+ // close only after 2 consecutive good frames (no single-frame flap)
4977
4991
  static CB_PROBE_MS = 3e4;
4978
4992
  constructor(options) {
4979
4993
  this.options = { ...new CartesiaTTSOptions(), ...options };
@@ -5005,26 +5019,20 @@ var CartesiaTTS = class _CartesiaTTS {
5005
5019
  if (m.context_id && m.context_id !== this.ctxId) return;
5006
5020
  if (m.type === "chunk" && m.data) {
5007
5021
  this.consecutiveErrors = 0;
5008
- if (this.down) {
5009
- this.down = false;
5010
- log10.info("TTS recovered");
5011
- this.stopProbe();
5012
- }
5022
+ this.markRecovered();
5013
5023
  if (!this.firstAudioAt) this.firstAudioAt = now3();
5014
5024
  this.onAudio(base64ToBytes(m.data));
5015
5025
  } else if (m.type === "done") {
5016
5026
  this.consecutiveErrors = 0;
5017
- if (this.down) {
5018
- this.down = false;
5019
- log10.info("TTS recovered");
5020
- this.stopProbe();
5021
- }
5027
+ this.markRecovered();
5022
5028
  this.onDone();
5023
5029
  } else if (m.type === "error") {
5024
5030
  if (/already been cancelled|does not exist/.test(m.message || "")) return;
5025
5031
  this.consecutiveErrors++;
5026
5032
  if (!this.down && this.consecutiveErrors >= _CartesiaTTS.CB_THRESHOLD) {
5027
5033
  this.down = true;
5034
+ this.downAt = now3();
5035
+ this.consecutiveOk = 0;
5028
5036
  log10.warn(`TTS circuit breaker open \u2014 ${this.consecutiveErrors} consecutive errors, switching to text-only`);
5029
5037
  this.onDone();
5030
5038
  this.startProbe();
@@ -5034,6 +5042,17 @@ var CartesiaTTS = class _CartesiaTTS {
5034
5042
  }
5035
5043
  };
5036
5044
  }
5045
+ /** Close the breaker only after CB_RECOVER_OK consecutive good frames, so a single straggler chunk
5046
+ * after a 503 burst doesn't flap open→recover in <1s. A sub-2s down-window is a transient blip → debug. */
5047
+ markRecovered() {
5048
+ if (!this.down) return;
5049
+ if (++this.consecutiveOk < _CartesiaTTS.CB_RECOVER_OK) return;
5050
+ this.down = false;
5051
+ this.consecutiveOk = 0;
5052
+ this.stopProbe();
5053
+ const downMs = this.downAt ? now3() - this.downAt : 0;
5054
+ (downMs < 2e3 ? log10.debug : log10.info)(`TTS recovered${downMs ? ` (down ${downMs}ms)` : ""}`);
5055
+ }
5037
5056
  /** Ensure the WS is open before sending — reconnects if idle-closed. */
5038
5057
  async ensureConnected() {
5039
5058
  if (this.connecting) await this.connecting;
@@ -5056,6 +5075,7 @@ var CartesiaTTS = class _CartesiaTTS {
5056
5075
  }
5057
5076
  speak(text, cont) {
5058
5077
  if (this.down) return;
5078
+ if (cont && !text) return;
5059
5079
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(text, cont));
5060
5080
  else void this.ensureConnected().then(() => this.ws?.readyState === WebSocket.OPEN && this.ws.send(this.frame(text, cont)));
5061
5081
  }
@@ -7911,6 +7931,9 @@ var vis = (s) => needsBidi(s) ? bidiLine(s).visual : s;
7911
7931
  function displayText(s) {
7912
7932
  return s.replace(/`([^`]+)`/g, "$1").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/(?<![\w*])[*_]([^*_\s][^*_]*?)[*_](?![\w*])/g, "$1");
7913
7933
  }
7934
+ function plainLine(s) {
7935
+ return displayText(s).replace(/^\s{0,3}#{1,6}\s+/, "").replace(/^(\s*)[-*+]\s+/, "$1");
7936
+ }
7914
7937
  function mdInline(line, p) {
7915
7938
  const re = /(`[^`]+`)|(\[[^\]]+\]\([^)]+\))|(\*\*[^*]+\*\*)|(~~[^~]+~~)|((?<![\w*])[*_][^*_\s][^*_]*?[*_](?![\w*]))/g;
7916
7939
  return line.replace(re, (_m, code, link2, bold2, strike2, italic2) => {
@@ -8405,14 +8428,23 @@ function displayHooks(fs, opts) {
8405
8428
  }
8406
8429
  };
8407
8430
  }
8431
+ function condenseReplay(t) {
8432
+ const m = t.match(/^\[task (t\d+) (completed|failed|progress|asks)\]\s*([\s\S]*)$/);
8433
+ if (!m) return t;
8434
+ const [, id, kind, rest] = m;
8435
+ const gist = rest.replace(/https?:\/\/\S+/g, "").replace(/[#>*`|_\[\]()]/g, " ").replace(/\s+/g, " ").trim().slice(0, 80);
8436
+ return `\u29BF task ${id} ${kind}${gist ? " \u2014 " + gist + "\u2026" : ""}`;
8437
+ }
8438
+ var clipReplay = (t, n) => t.length > n ? t.slice(0, n).replace(/\S*$/, "").trimEnd() : t;
8408
8439
  function formatHistory(messages) {
8409
8440
  const shown = messages.filter((m) => m.role !== "system");
8410
8441
  if (!shown.length) return "";
8411
8442
  const out = [dim("\n \u2500\u2500 prior conversation \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n")];
8412
8443
  for (const m of shown) {
8413
8444
  if (m.role === "user") {
8414
- const t = contentText(m.content).split("\n\n--- @")[0].replace(/\n+/g, " ").trim();
8415
- if (t) out.push("\n" + bold(cyan(" \u203A ")) + (t.length > 1500 ? t.slice(0, 1500) + dim(" \u2026") : t) + "\n");
8445
+ const raw = contentText(m.content).split("\n\n--- @")[0].replace(/\n+/g, " ").trim();
8446
+ const t = condenseReplay(raw);
8447
+ if (t) out.push("\n" + bold(cyan(" \u203A ")) + (t.length > 1500 ? clipReplay(t, 1500) + dim(" \u2026") : t) + "\n");
8416
8448
  } else if (m.role === "assistant") {
8417
8449
  const at = contentText(m.content);
8418
8450
  if (at.trim()) out.push(dim(" ") + at.trim() + "\n");
@@ -8448,7 +8480,7 @@ function exportMarkdown(meta, messages) {
8448
8480
  const body = [];
8449
8481
  for (const m of shown) {
8450
8482
  if (m.role === "user") {
8451
- const t = contentText(m.content).split("\n\n--- @")[0].trim();
8483
+ const t = condenseReplay(contentText(m.content).split("\n\n--- @")[0].trim());
8452
8484
  if (t) body.push("## \u{1F464} User", "", t, "");
8453
8485
  } else if (m.role === "assistant") {
8454
8486
  const parts = [];
@@ -8956,6 +8988,19 @@ async function repl(args, ai, cfg, cwd) {
8956
8988
  const duplex = args.duplex;
8957
8989
  let dx;
8958
8990
  let voiceIO;
8991
+ let voiceLineOpen = false;
8992
+ const voiceEcho = (text) => {
8993
+ const s = forSpeech(text);
8994
+ if (!s) return;
8995
+ process.stdout.write(s);
8996
+ voiceLineOpen = true;
8997
+ };
8998
+ const voiceEchoEnd = () => {
8999
+ if (voiceLineOpen) {
9000
+ process.stdout.write("\n");
9001
+ voiceLineOpen = false;
9002
+ }
9003
+ };
8959
9004
  let toggleVoice;
8960
9005
  let editorRef;
8961
9006
  let repaintStash = () => {
@@ -9012,9 +9057,12 @@ async function repl(args, ai, cfg, cwd) {
9012
9057
  const host = {
9013
9058
  ...base,
9014
9059
  notify(e) {
9060
+ if (voiceIO && (e.kind === "thinking_delta" || e.kind === "turn_start")) return;
9015
9061
  if (e.kind === "text_delta" && voiceIO) {
9016
9062
  voiceIO.speakDelta(e.message);
9017
9063
  editorRef?.suspend();
9064
+ voiceEcho(e.message);
9065
+ return;
9018
9066
  } else if (e.kind === "text_delta" && stashBuf) {
9019
9067
  process.stdout.write("\r\x1B[K");
9020
9068
  base.notify(e);
@@ -9026,8 +9074,11 @@ async function repl(args, ai, cfg, cwd) {
9026
9074
  return;
9027
9075
  }
9028
9076
  if (e.kind === "revoice_done") {
9029
- base.flushText();
9030
- process.stdout.write("\n");
9077
+ if (voiceIO) voiceEchoEnd();
9078
+ else {
9079
+ base.flushText();
9080
+ process.stdout.write("\n");
9081
+ }
9031
9082
  voiceIO?.endSpeech();
9032
9083
  duplexPersist();
9033
9084
  editorRef?.resume();
@@ -9038,7 +9089,7 @@ async function repl(args, ai, cfg, cwd) {
9038
9089
  const lines = String(e.data.text).split("\n");
9039
9090
  const shown = lines.slice(0, previewLines());
9040
9091
  err("\r\x1B[0J\n" + dim(` \u29BF ${e.message}
9041
- `) + shown.map((l) => dim(` ${l}
9092
+ `) + shown.map((l) => dim(` ${plainLine(l)}
9042
9093
  `)).join(""));
9043
9094
  if (lines.length > shown.length) err(dim(` \u2026 (+${lines.length - shown.length} more lines)
9044
9095
  `));
@@ -9379,7 +9430,7 @@ async function repl(args, ai, cfg, cwd) {
9379
9430
  const turn = async (task) => {
9380
9431
  const r = await runTurn(face, store, session, task, duplex ? void 0 : checkpoints, cwd, sendVia);
9381
9432
  if (voiceIO) {
9382
- process.stdout.write("\n");
9433
+ voiceEchoEnd();
9383
9434
  editorRef?.resume();
9384
9435
  }
9385
9436
  voiceIO?.endSpeech();
@@ -10274,9 +10325,13 @@ ${extra}` : body);
10274
10325
  editorRef?.redrawNow();
10275
10326
  }, 250);
10276
10327
  },
10328
+ // voiceEchoEnd closes the open echo line; '\r\x1b[0J' wipes the stale prompt/footer before the
10329
+ // notice — every other async-chrome writer does this, and without it "✋ interrupted" overprints
10330
+ // the footer's leading chars (the "interrupted% ctx" glue).
10277
10331
  onBargeIn: (phase) => {
10278
10332
  activeTurn?.abort();
10279
- if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
10333
+ voiceEchoEnd();
10334
+ if (phase === "speaking") err("\r\x1B[0J" + yellow(" \u270B interrupted\n"));
10280
10335
  },
10281
10336
  onUtterance: (text) => {
10282
10337
  voicePartial = "";
@@ -10574,6 +10629,7 @@ if (import.meta.main) main().catch((e) => {
10574
10629
  export {
10575
10630
  appendMemoryNote,
10576
10631
  cacheMultipliers,
10632
+ condenseReplay,
10577
10633
  costOf,
10578
10634
  estimateTranscriptTokens,
10579
10635
  expandMentions,