npm - jeo-code - Versions diffs - 0.6.29 → 0.6.31 - Mend

jeo-code 0.6.29 → 0.6.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +24 -0
package/README.ja.md +2 -2
package/README.ko.md +2 -2
package/README.md +2 -2
package/README.zh.md +2 -2
package/package.json +1 -1
package/src/agent/AGENTS.md +1 -1
package/src/agent/engine.ts +20 -33
package/src/agent/loop-guards.ts +135 -0
package/src/agent/session.ts +3 -0
package/src/ai/providers/anthropic.ts +4 -0
package/src/ai/types.ts +5 -0
package/src/commands/launch.ts +83 -13
package/src/tui/app.ts +39 -0
package/src/tui/components/input-box.ts +12 -5
package/src/tui/components/session-picker.ts +226 -0
package/src/agent/tool-registry.ts +0 -54

package/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 The README mirrors the latest 5 entries — regenerate with `bun run changelog:sync`.
+## [0.6.31] - 2026-06-19
+_Live "Thinking" indicator for signature-only reasoning models (Anthropic opus-4-7/4-8), a live color cue when a `/command` or `$skill` trigger is recognized in the prompt, and a rich gjc-style `/resume` session picker — plus a fresh `jeo --tmux` no-leak re-verification._
+### Added
+- **The prompt box now recolors the `/command` / `$skill` trigger token live as you type it.** While typing an invocation, the active trigger token (anywhere on the line, mention-style via `activeTriggerToken`) is repainted inside the input box so the user can SEE the trigger was recognized: a valid, matchable invocation turns neon green (`#39ff14`), while a typo with no match turns pink (`#ff6b81`) — a visual heads-up that it will be sent as plain text. Wired through a new `InputBoxOptions.highlight` ({start,end,paint}, code-point offsets over `Array.from(line)`) into both the idle prompt (`launch.ts` `previewLines`) and the mid-turn live box (`app.ts` `setLivePromptHighlight`, reset at each new turn). Scroll ellipses now use ANSI-safe `truncateToWidth` so a painted token never gets sliced mid-escape.
+- **Rich `/resume` session picker (gjc parity).** A new `src/tui/components/session-picker.ts` renders a search/filter line, a scrolling window of multi-line entries (title + dimmed first-message preview + a `relative-time · size · N msgs` metadata line), a position indicator, and Del-to-delete / Enter-to-resume / Esc-to-cancel hints. `SessionSummary` now carries `sizeBytes` for the metadata line.
+### Fixed
+- **Signature-only reasoning models now show a live Thinking block while the model thinks.** Models that reason internally and stream a `signature` but NO `thinking_delta` text (claude-opus-4-7/4-8) opened a thinking block that produced zero visible deltas, so the TUI's dimmed live "Thinking" trace never appeared — the response wait read as a frozen "calling model …". The Anthropic stream adapter now fires a new display-only `onReasoningStart` signal the instant a `thinking` / `redacted_thinking` block opens, and the TUI renders a live `Thinking · Ns` block with a `(thinking…)` placeholder that is replaced the moment any real thought or answer text streams. Replay/artifact capture is unchanged.
+### Verified
+- **`jeo --tmux` has no bun memory leak and stays responsive.** A real `--tmux` session flooded with ~30,000 SGR mouse-report sequences via `tmux send-keys` plateaus in RSS (147 → 246 MB asymptotically: +83 / +12 / +3 / +0.2 / +0.4 MB per 6k-report round → no per-event linear growth) and stays responsive afterward (`/model` preview renders in 14 ms with the trigger highlight intact). The mouse-report swallow guard drops the reports instead of buffering/echoing them.
+- **Full suite green:** `bun run typecheck` clean and `bun test` 1703 pass / 0 fail across 211 files (includes the new `test/input-box.test.ts`, `test/tui-app.test.ts`, and `test/session-picker.test.ts` highlight/picker coverage).
+## [0.6.30] - 2026-06-19
+_gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down._
+### Changed
+- **Loop intermediate-judgment guards extracted into a classified module (`src/agent/loop-guards.ts`).** The mid-run "continue / self-correct / stop" decisions that were inlined across `engine.ts`'s `while` loop as scattered booleans and message strings are now a named `GuardState` discriminated-union taxonomy — jeo's descendant of gjc's `ultragoal-guard` `UltragoalGuardState` pattern. A single frozen `GUARD_LIMITS` object is the source of truth for every threshold (`MAX_REPEAT`, `MAX_FAILURES`, `MAX_REFUSAL_RETRIES`, `MAX_INVALID_CALLS`, `MAX_PARSE_BOUNCES`, `CYCLE_WINDOW`), and pure classifiers (`isVerificationSignal`, `repeatHint`, `nearestToolName`, `classifyDoneGate`) are now independently testable. `engine.ts` still owns all control flow (history mutation, `step++`, `continue`, `return finish(...)`) — only the JUDGMENT moved, so behavior is unchanged (net −19 lines in `engine.ts`). Removed the now-unused `src/agent/tool-registry.ts`.
+### Verified
+- **`jeo --tmux` has no bun memory leak and does not slow down.** An in-process probe streaming 5,000,000 SGR mouse-report escapes through `queuePromptInputChunk` (10 × 500k, `Bun.gc(true)` between batches) holds RSS flat (133.9 → 135.2 MB, slope ≈0.13 MB/round) with zero prompt-queue accumulation; a real `jeo --tmux` session flooded with 60k live mouse reports via `tmux send-keys` plateaus in RSS (129,456 → 129,472 KB). `jeo --tmux -p` end-to-end creates the profiled session, runs the turn, and tears down cleanly.
+- **Full suite green:** `bun run typecheck` clean and `bun test` 1687 pass / 0 fail across 210 files (includes the new `test/loop-guards.test.ts`, 9 tests, and the signature-only Anthropic replay test).
 ## [0.6.29] - 2026-06-19
 _Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak._

package/README.ja.md CHANGED Viewed

@@ -200,11 +200,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
 ## 変更履歴 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.6.31]** (2026-06-19) — Live "Thinking" indicator for signature-only reasoning models (Anthropic opus-4-7/4-8), a live color cue when a `/command` or `$skill` trigger is recognized in the prompt, and a rich gjc-style `/resume` session picker — plus a fresh `jeo --tmux` no-leak re-verification.
+- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
 - **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
 - **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
 - **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
-- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
-- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.ko.md CHANGED Viewed

@@ -200,11 +200,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
 ## 변경 이력 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.6.31]** (2026-06-19) — Live "Thinking" indicator for signature-only reasoning models (Anthropic opus-4-7/4-8), a live color cue when a `/command` or `$skill` trigger is recognized in the prompt, and a rich gjc-style `/resume` session picker — plus a fresh `jeo --tmux` no-leak re-verification.
+- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
 - **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
 - **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
 - **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
-- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
-- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.md CHANGED Viewed

@@ -200,11 +200,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
 ## Changelog
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.6.31]** (2026-06-19) — Live "Thinking" indicator for signature-only reasoning models (Anthropic opus-4-7/4-8), a live color cue when a `/command` or `$skill` trigger is recognized in the prompt, and a rich gjc-style `/resume` session picker — plus a fresh `jeo --tmux` no-leak re-verification.
+- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
 - **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
 - **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
 - **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
-- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
-- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.zh.md CHANGED Viewed

@@ -200,11 +200,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
 ## 更新日志 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.6.31]** (2026-06-19) — Live "Thinking" indicator for signature-only reasoning models (Anthropic opus-4-7/4-8), a live color cue when a `/command` or `$skill` trigger is recognized in the prompt, and a rich gjc-style `/resume` session picker — plus a fresh `jeo --tmux` no-leak re-verification.
+- **[0.6.30]** (2026-06-19) — gjc-style intermediate-judgment guard classification extracted from the engine loop, plus a re-verification that `jeo --tmux` does not leak bun memory or slow down.
 - **[0.6.29]** (2026-06-19) — Signature-only thinking-block replay (Anthropic opus-4-7/4-8), plus a tmux mouse-flood memory guard confirming `jeo --tmux` does not leak.
 - **[0.6.28]** (2026-06-19) — Signed thinking-block replay: native reasoning is now sent BACK to providers across steps/turns, restoring multi-step reasoning continuity (gajae parity).
 - **[0.6.27]** (2026-06-19) — Ponytail pass on the reasoning-tier mapper, plus a real-tmux verification of `jeo --tmux`.
-- **[0.6.26]** (2026-06-19) — The forge emblem is redrawn again as the mascot crayfish, foregrounding its signature pincer claws (집게).
-- **[0.6.25]** (2026-06-19) — Reasoning works at every thinking level (gajae parity), and the forge emblem is redrawn as the neon-lens coding wizard.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "jeo-code",
-  "version": "0.6.29",
+  "version": "0.6.31",
   "description": "Clean, highly optimized AI coding agent using spec-first loop",
   "type": "module",
   "main": "src/cli.ts",

package/src/agent/AGENTS.md CHANGED Viewed

@@ -17,6 +17,7 @@ The core runtime loop, tool registry, session management, and state persistence
 | `hooks.ts` | Brief description of purpose |
 | `json.ts` | Brief description of purpose |
 | `loop.ts` | The primary execution loop orchestrating model calls and tool execution |
+| `loop-guards.ts` | Intermediate-judgment classification (gjc ultragoal-guard parity): named `GuardState` taxonomy, `GUARD_LIMITS` thresholds, and pure classifiers (`isVerificationSignal`, `repeatHint`, `classifyDoneGate`) consumed by `engine.ts` |
 | `memory.ts` | OKF concept-bundle memory: session distill, query-aware budget injection, legacy MEMORY.md migration (`migrateLegacyMemory`) + `JEO_MEMORY_LEGACY` rollback toggle |
 | `memory-okf.ts` | OKF v0.1 format layer: frontmatter parse/serialize, concept IDs, conformance validation |
 | `memory-graph.ts` | Concept cross-link graph: build/expand (1-hop search), broken-link-tolerant lint, optional graphify detection |
@@ -35,7 +36,6 @@ The core runtime loop, tool registry, session management, and state persistence
 | `todo-tool.ts` | Brief description of purpose |
 | `tokenizer.ts` | Brief description of purpose |
 | `tool-output.ts` | Brief description of purpose |
-| `tool-registry.ts` | Brief description of purpose |
 | `tools.ts` | Built-in tool definitions (bash, read, write, edit, etc.) |
 | `web-search.ts` | Brief description of purpose |

package/src/agent/engine.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFA
 import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
 import { historyTokens, trimToolResultsInPlace } from "./compaction";
 import { jeoEnv } from "../util/env";
+import { GUARD_LIMITS, isVerificationSignal, repeatHint, classifyDoneGate } from "./loop-guards";
 async function invokeCallLlm(history: Message[], options: {
@@ -34,6 +35,7 @@ async function invokeCallLlm(history: Message[], options: {
   onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
   onToken?: (delta: string) => void;
   onReasoning?: (delta: string) => void;
+  onReasoningStart?: () => void;
   onReasoningArtifact?: (artifact: import("../ai/types").ReasoningArtifact) => void;
   tools?: import("../ai/types").NativeToolSchema[];
 }): Promise<string> {
@@ -195,6 +197,10 @@ export interface AgentLoopEvents {
   /** Accumulated native reasoning/thinking text so far — drives a transient dimmed
    *  "thinking" view. Only requested when a consumer (TUI) attaches. */
   onReasoningStream?(textSoFar: string): void;
+  /** Fired once when the model opens an extended-thinking block (before/without any
+   *  thinking text). Lets the TUI show a live "thinking" indicator for signature-only
+   *  reasoning models (opus-4-7/4-8) whose wait would otherwise look frozen. */
+  onReasoningStart?(): void;
   /** Each provider-native reasoning ARTIFACT as it is captured (signature / thoughtSignature /
    *  reasoning item). Lets the final-reply path (launch.ts) persist artifacts for replay. */
   onReasoningArtifactStream?(artifact: import("../ai/types").ReasoningArtifact): void;
@@ -378,29 +384,15 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
     } catch { /* best-effort; fall through to the plain stop message */ }
     return finish({ done: false, steps: step, doneReason: `Stopped: ${stopReason}` });
   };
-  // Result-aware repeat nudge (A): tell the model WHY repeating won't help and what to
-  // try instead, tailored to the repeated tool and its last actual result.
-  const repeatHint = (tool: string, prev?: { success: boolean; output: string }): string => {
-    const out = prev?.output ?? "";
-    const empty = !prev || !prev.success || out.trim() === "" || /no match|0 match|no result|not found|no file/i.test(out);
-    if (tool === "search" || tool === "find" || tool === "ls") {
-      return empty
-        ? `That '${tool}' returned nothing useful and will again — BROADEN it (a looser pattern, a parent directory, or a different tool such as ${tool === "search" ? "find" : "search"}), or call done if this lookup isn't needed.`
-        : `That '${tool}' already returned results — open one of the hits with read, or move on; re-running it changes nothing.`;
-    }
-    if (tool === "read") return `You already read that and its content is unchanged — use what you read, or read a DIFFERENT file.`;
-    if (tool === "bash") return `That command already ran with the same output — change the command, or call done.`;
-    return `That call's result is unchanged — take a different action, or call done.`;
-  };
   // No-progress guard: weak/local models often repeat the same tool call without
   // ever emitting `done`. Two escalating corrections (B), then a consolidated stop.
-  const MAX_REPEAT = 4;
+  const MAX_REPEAT = GUARD_LIMITS.MAX_REPEAT;
   // Last executed step's per-call results — fed to repeatHint so a corrective bounce
   // can cite the repeated call's ACTUAL last outcome (A).
   let lastResults: { success: boolean; output: string; executed: boolean }[] = [];
   // Consecutive-failure guard: a model that keeps emitting *different* but failing
   // calls (bad edits, failing commands) would otherwise burn the whole step budget.
-  const MAX_FAILURES = 5;
+  const MAX_FAILURES = GUARD_LIMITS.MAX_FAILURES;
   let consecutiveFailures = 0;
   // done-verification guard (plan/gjc-inheritance.md B4, gjc ultragoal-guard 경량 계승):
   // a turn that MUTATED files but shows no verification signal gets ONE pushback on
@@ -424,16 +416,15 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
   // as-is, then once more with an explicit re-grounding note; only a third
   // refusal in the turn surfaces the (friendly) error. Bounded per turn so a
   // genuinely refused request can never burn billed calls in a loop.
-  const MAX_REFUSAL_RETRIES = 3;
+  const MAX_REFUSAL_RETRIES = GUARD_LIMITS.MAX_REFUSAL_RETRIES;
   let refusalRetries = 0;
-  const VERIFY_SIGNAL_RE = /\b(test|tests|tsc|typecheck|lint|build|check|spec|pytest|vitest|jest)\b/i;
   let lastSig = "";
   let repeatCount = 0;
   // Cycle guard (the A↔B ping-pong the exact-repeat guard cannot see): the recent
   // executed step signatures, as fixed-size digests. When a full window cycles
   // through ≤2 distinct calls, bounce ONCE with an explicit correction; a spin that
   // persists through the correction stops the turn.
-  const CYCLE_WINDOW = 6;
+  const CYCLE_WINDOW = GUARD_LIMITS.CYCLE_WINDOW;
   const recentStepSigs: string[] = [];
   let cycleBounceUsed = false;
   // Invalid-tool-call guard: a model that returns JSON without a usable `tool`
@@ -441,10 +432,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
   let invalidToolCalls = 0;
   // A JSON reply with no usable `tool` field can't drive the loop — stop sooner than the
   // repeat-spin guard (no escalating correction helps a model that isn't producing a call).
-  const MAX_INVALID_CALLS = 3;
+  const MAX_INVALID_CALLS = GUARD_LIMITS.MAX_INVALID_CALLS;
   // Prose-bounce guard: after this many invalid-JSON corrections, salvage the
   // model's text as the final answer instead of burning the whole step budget.
-  const MAX_PARSE_BOUNCES = 2;
+  const MAX_PARSE_BOUNCES = GUARD_LIMITS.MAX_PARSE_BOUNCES;
   let parseFailures = 0;
   while (true) {
     if (turnBudgetMs > 0 && Date.now() - turnStartedAt > turnBudgetMs) {
@@ -540,6 +531,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
               onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
               onToken,
               onReasoning,
+              onReasoningStart: ev.onReasoningStart,
               onReasoningArtifact,
               // Make provider auto-retry visible: previously a rate-limited call sat in a
               // silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
@@ -703,19 +695,14 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
     ev.onAssistant?.(responseText, toolCalls[0]);
     if (toolCalls.length === 1 && toolCalls[0].tool === "done") {
-      if (sawMutation && (!sawVerification || pendingHookFailure !== null) && !donePushbackUsed) {
+      // done-verification gate — jeo's descendant of gjc's ultragoal-guard completion
+      // state machine (plan/gjc-inheritance.md B4). The classifier owns the JUDGMENT
+      // (which named state, which message); the loop owns the once-pushback latch.
+      const doneGate = classifyDoneGate({ sawMutation, sawVerification, pendingHookFailure });
+      if (doneGate.block && !donePushbackUsed) {
         donePushbackUsed = true; // second done always passes — escape hatch
         pushAssistantTurn(history, responseText, reasonBuf, artifactBuf);
-        history.push({
-          role: "user",
-          content: pendingHookFailure !== null
-            ? `Your latest mutation left the post-turn hook "${pendingHookFailure}" FAILING (non-zero exit) — its diagnostics were shown in the tool result above. ` +
-              "Fix the reported problems (the hook re-runs on your next mutation), then call done. " +
-              "If the hook failure is a false positive, call done again and say why in the reason."
-            : "You modified files this turn but ran NO verification (no test/build/typecheck command succeeded). " +
-              "Run the narrowest command that proves your change works, then call done. " +
-              "If verification is genuinely not applicable (docs/config-only change), call done again and say why in the reason.",
-        });
+        history.push({ role: "user", content: doneGate.message });
         step++;
         continue;
       }
@@ -1039,7 +1026,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
       if (t === "write" || t === "edit") sawMutation = true;
       else if (t === "bash") {
         const cmd = String(toolCalls[i].arguments?.command ?? "");
-        if (VERIFY_SIGNAL_RE.test(cmd) || VERIFY_SIGNAL_RE.test(results[i].output.slice(0, 2000))) sawVerification = true;
+        if (isVerificationSignal(cmd, results[i].output)) sawVerification = true;
       }
     }
     // F6 (round 4 architect, Low): judge the step by its NON-TRIVIAL calls — a

package/src/agent/loop-guards.ts ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Intermediate-judgment guards for the agent loop — the mid-run "should this turn
+ * continue, correct itself, or stop" decisions that run between model calls.
+ *
+ * gjc keeps this concern in its own layer: `gjc-runtime/ultragoal-guard.ts` computes a
+ * named `UltragoalGuardState` discriminated union PURELY, and the runtime merely acts on
+ * the verdict. jeo previously inlined the same logic inside `engine.ts`'s `while` loop as
+ * scattered booleans and message strings. This module gives jeo the same classification:
+ * a named `GuardState` taxonomy plus pure, independently-testable classifier functions.
+ * `engine.ts` still owns the control flow (history mutation, `step++`, `continue`,
+ * `return finish(...)`) — only the JUDGMENT moves here, so behavior is unchanged.
+ */
+/**
+ * Named taxonomy of the loop's intermediate judgments — jeo's descendant of gjc's
+ * `UltragoalGuardState`. Each member names one decision the loop can reach mid-turn.
+ */
+export type GuardState =
+  | "ok" // proceed: emit / execute the tool call as-is
+  | "repeat_correct" // exact-repeat detected → ONE corrective bounce (skip execution)
+  | "repeat_stop" // exact-repeat survived the correction → consolidate-stop
+  | "cycle_correct" // A↔B alternation detected → ONE corrective bounce
+  | "cycle_stop" // cycle survived the correction → consolidate-stop
+  | "consecutive_failure_stop" // MAX_FAILURES different-but-failing steps → stop
+  | "invalid_tool_stop" // MAX_INVALID_CALLS replies with no usable tool field → stop
+  | "parse_salvage" // repeated non-JSON prose → salvage the text as the final answer
+  | "context_overflow_retry" // provider reported context overflow → ONE trim + retry
+  | "refusal_retry" // transient safety refusal → bounded resend ladder
+  | "done_unverified" // mutated files, no verification signal → pushback on done
+  | "done_hook_failing" // post-turn hook still failing → pushback on done
+  | "done_ok"; // done accepted — the turn is finished
+/**
+ * Bounded thresholds for every loop guard — the single, named source of truth.
+ * Kept in one frozen object so the limits are discoverable and testable instead of
+ * sprinkled as bare literals through the loop body.
+ */
+export const GUARD_LIMITS = Object.freeze({
+  /** Identical step repeats tolerated before a consolidated stop (with corrections en route). */
+  MAX_REPEAT: 4,
+  /** Consecutive different-but-failing steps before the turn stops. */
+  MAX_FAILURES: 5,
+  /** Safety-refusal resends per turn before surfacing the friendly error. */
+  MAX_REFUSAL_RETRIES: 3,
+  /** Replies with no usable `tool`/`tools` field before the turn stops. */
+  MAX_INVALID_CALLS: 3,
+  /** Consecutive non-JSON parse failures before the prose is salvaged as the answer. */
+  MAX_PARSE_BOUNCES: 2,
+  /** Recent-signature window scanned for an A↔B (≤2 distinct calls) cycle. */
+  CYCLE_WINDOW: 6,
+});
+/**
+ * Commands (or their output) that count as a verification signal: a test, build,
+ * typecheck, or lint invocation. The done-verification guard treats a turn that mutated
+ * files without any such signal as "unverified".
+ */
+export const VERIFY_SIGNAL_RE = /\b(test|tests|tsc|typecheck|lint|build|check|spec|pytest|vitest|jest)\b/i;
+/**
+ * True when a bash command (or the head of its output) proves the work was verified.
+ * Output is examined only up to the first 2000 chars — enough to catch a tool runner's
+ * banner without rescanning a megabyte of logs.
+ */
+export function isVerificationSignal(cmd: string, output = ""): boolean {
+  return VERIFY_SIGNAL_RE.test(cmd) || VERIFY_SIGNAL_RE.test(output.slice(0, 2000));
+}
+/**
+ * Result-aware repeat nudge: tells the model WHY repeating the call won't help and what
+ * to try instead, tailored to the repeated tool and its last actual result.
+ */
+export function repeatHint(tool: string, prev?: { success: boolean; output: string }): string {
+  const out = prev?.output ?? "";
+  const empty = !prev || !prev.success || out.trim() === "" || /no match|0 match|no result|not found|no file/i.test(out);
+  if (tool === "search" || tool === "find" || tool === "ls") {
+    return empty
+      ? `That '${tool}' returned nothing useful and will again — BROADEN it (a looser pattern, a parent directory, or a different tool such as ${tool === "search" ? "find" : "search"}), or call done if this lookup isn't needed.`
+      : `That '${tool}' already returned results — open one of the hits with read, or move on; re-running it changes nothing.`;
+  }
+  if (tool === "read") return `You already read that and its content is unchanged — use what you read, or read a DIFFERENT file.`;
+  if (tool === "bash") return `That command already ran with the same output — change the command, or call done.`;
+  return `That call's result is unchanged — take a different action, or call done.`;
+}
+/** Inputs for the done-verification gate (jeo's descendant of gjc's ultragoal-guard). */
+export interface DoneGateInput {
+  /** A write/edit succeeded this turn. */
+  sawMutation: boolean;
+  /** A test/build/typecheck/lint command succeeded this turn. */
+  sawVerification: boolean;
+  /** The run-command of the most recent still-failing post-turn hook, or null. */
+  pendingHookFailure: string | null;
+}
+/** Verdict from {@link classifyDoneGate}: whether to bounce `done`, and the message. */
+export interface DoneGateVerdict {
+  state: Extract<GuardState, "done_ok" | "done_unverified" | "done_hook_failing">;
+  /** When true, `done` should be bounced ONCE with `message` (the caller owns the once-gate). */
+  block: boolean;
+  /** Corrective message to push back on `done`; empty when `state === "done_ok"`. */
+  message: string;
+}
+/**
+ * Classify whether a `done` should be accepted or bounced — the direct descendant of
+ * gjc's `ultragoal-guard` completion gate (plan/gjc-inheritance.md B4).
+ *
+ * A turn that MUTATED files but has either NO verification signal or a still-failing
+ * post-turn hook is blocked ONCE. The caller owns the single-pushback latch; a second
+ * `done` always passes (the escape hatch for genuinely-unverifiable docs/config changes).
+ */
+export function classifyDoneGate(input: DoneGateInput): DoneGateVerdict {
+  const hookFailing = input.pendingHookFailure !== null;
+  const block = input.sawMutation && (!input.sawVerification || hookFailing);
+  if (!block) return { state: "done_ok", block: false, message: "" };
+  if (hookFailing) {
+    return {
+      state: "done_hook_failing",
+      block: true,
+      message:
+        `Your latest mutation left the post-turn hook "${input.pendingHookFailure}" FAILING (non-zero exit) — its diagnostics were shown in the tool result above. ` +
+        "Fix the reported problems (the hook re-runs on your next mutation), then call done. " +
+        "If the hook failure is a false positive, call done again and say why in the reason.",
+    };
+  }
+  return {
+    state: "done_unverified",
+    block: true,
+    message:
+      "You modified files this turn but ran NO verification (no test/build/typecheck command succeeded). " +
+      "Run the narrowest command that proves your change works, then call done. " +
+      "If verification is genuinely not applicable (docs/config-only change), call done again and say why in the reason.",
+  };
+}

package/src/agent/session.ts CHANGED Viewed

@@ -32,6 +32,8 @@ export interface SessionSummary {
   messageCount: number;
   preview: string;
   mtimeMs?: number;
+  /** Session file size in bytes (for the resume picker's metadata line). */
+  sizeBytes?: number;
   title?: string;
 }
@@ -288,6 +290,7 @@ export async function listSessions(cwd = process.cwd()): Promise<SessionSummary[
         messageCount,
         preview,
         mtimeMs: stat.mtimeMs,
+        sizeBytes: stat.size,
         title: header.title,
       });
     } catch {

package/src/ai/providers/anthropic.ts CHANGED Viewed

@@ -353,8 +353,12 @@ export const anthropicAdapter: ProviderAdapter = {
         toolBlocks.set(evt.index, { name: evt.content_block.name ?? "", args: "" });
       } else if (evt.type === "content_block_start" && evt.content_block?.type === "thinking" && typeof evt.index === "number") {
         thinkBlocks.set(evt.index, { text: "" });
+        // Signal the thinking phase started so the UI shows a live "thinking" indicator
+        // even for signature-only models (opus-4-7/4-8) that stream NO thinking_delta text.
+        options.onReasoningStart?.();
       } else if (evt.type === "content_block_start" && evt.content_block?.type === "redacted_thinking" && evt.content_block.data) {
         // Redacted thinking carries opaque `data` directly (no deltas) — emit immediately.
+        options.onReasoningStart?.();
         options.onReasoningArtifact?.({ provider: "anthropic", model: options.model, redacted: evt.content_block.data });
       } else if (evt.type === "content_block_delta" && evt.delta?.type === "input_json_delta" && typeof evt.index === "number") {
         const b = toolBlocks.get(evt.index);

package/src/ai/types.ts CHANGED Viewed

@@ -116,6 +116,11 @@ export interface CallOptions {
    *  answer text). Surfaced as a transient dimmed view; absent for models that emit no
    *  thought text. */
   onReasoning?: (delta: string) => void;
+  /** Fired ONCE when the model opens an extended-thinking block, before (or without) any
+   *  thinking-text deltas. Lets a UI show a live "thinking" indicator even for models
+   *  (e.g. claude-opus-4-7/4-8) that reason internally and stream NO visible thought text,
+   *  so the response wait does not look frozen. Display-only — carries no content. */
+  onReasoningStart?: () => void;
   /** Sink for provider-native reasoning ARTIFACTS captured during streaming (signature /
    *  thoughtSignature / reasoning item id+encrypted). Separate from `onReasoning` (display
    *  text) because these arrive on different SSE events and are opaque replay data. */

package/src/commands/launch.ts CHANGED Viewed

@@ -45,6 +45,7 @@ import { openaiCompatDef, SUBSCRIPTION_PROVIDER_NAMES } from "../ai/providers/op
 import { allSubagentRoles, getSubagentRole, resolveSubagentModel, resolveSubagentMaxSteps, resolveSubagentThinking, parseMaxSteps, withSubagentSetting, clearSubagentSetting } from "../agent/subagents";
 import { SelectList, renderSelectList, type SelectItem } from "../tui/components/select-list";
+import { SessionPicker, renderSessionPicker } from "../tui/components/session-picker";
 import {
   formatModelLine,
   formatProviderPanel,
@@ -760,6 +761,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
               queueBusyClear?.();
               tui.setLivePromptInput("");
               tui.setLivePromptHint([]);
+              tui.setLivePromptHighlight(undefined);
               if (classifyMidTurnLine(line) === "command") {
                 // Run it as a real COMMAND: queue it for immediate dispatch by the prompt
                 // loop and abort the turn (the same controller Esc uses). The abort ends a
@@ -798,6 +800,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
             tui.setLivePromptHint(
               /^\s*[/$]/.test(draft) ? formatMidTurnHint(draft.trimStart(), completionContext(), 5) : [],
             );
+            tui.setLivePromptHighlight(triggerHighlight(expandSentinel(draft)));
           }
         },
         onAbortNotice: msg => {
@@ -1790,6 +1793,26 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
     if (sessionId) { const hex = SESSION_BOX_ACCENTS[sessionBoxColorIdx]!; return { accent: hexPaint(hex), shadow: hexShadowPaint(hex) }; }
     return { accent: uiAccent, shadow: uiAccentShadow };
   };
+  // Recolor the active `/command` or `$skill` trigger token INSIDE the input box so a
+  // real invocation is visibly recognized as it is typed: neon green once the token
+  // resolves to ≥1 command/skill, caution pink while it matches none (a likely typo
+  // that would be sent as plain text). Offsets are code-point indices into the SAME
+  // string the box renders, so multi-byte preceding text stays aligned with the box's
+  // Array.from() char model. Returns undefined for colorless themes / no active trigger.
+  const TRIGGER_HL_VALID = "#39ff14";
+  const TRIGGER_HL_UNKNOWN = "#ff6b81";
+  const triggerHighlight = (
+    rendered: string,
+  ): { start: number; end: number; paint: (s: string) => string } | undefined => {
+    if (!uiTheme.color) return undefined;
+    const trigger = activeTriggerToken(rendered);
+    if (!trigger) return undefined;
+    const start = Array.from(rendered.slice(0, trigger.start)).length;
+    const end = start + Array.from(trigger.token).length;
+    const valid = slashPreviewMatches(rendered, skillSlashDetails, resolvedSkills).length > 0;
+    const hex = valid ? TRIGGER_HL_VALID : TRIGGER_HL_UNKNOWN;
+    return { start, end, paint: (s: string) => chalk.hex(hex)(s) };
+  };
   const refreshUiTheme = (): void => {
     uiTheme = resolveTheme(process.env);
     uiAccent = accentPaint(uiTheme);
@@ -1825,7 +1848,8 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
     const rli = rl as unknown as { line?: string; cursor?: number };
     const caret = rli.line === line && typeof rli.cursor === "number" ? rli.cursor : line.length;
     const { accent: boxAccent, shadow: boxShadow } = boxAccents(line);
-    const frame = renderInputFrame(expandSentinel(line), {
+    const rendered = expandSentinel(line);
+    const frame = renderInputFrame(rendered, {
       // Full terminal width (cols is already columns - 1, leaving the last column free
       // so a full-width row never wraps). Matches the live-turn box, user/forge cards,
       // and the welcome banner — all share this cols-1 width so nothing jumps on the
@@ -1841,6 +1865,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
         : undefined,
       maxBodyRows: Math.max(1, footerRows - 7),
       cursor: caret,
+      highlight: triggerHighlight(rendered),
     });
     const input = frame.lines.map(l => truncateAnsi(l, cols));
     // jeo-ref layout: a blank spacer row between the status bar (row 0) and the
@@ -2930,26 +2955,71 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
           if (arg) { await applyResume(arg); continue; }
           // No id → only sessions with a real conversation are resumable (every launch
           // creates an empty session; those are noise).
-          const sessions = (await listSessions(cwd)).filter(s => s.messageCount > 0);
-          if (sessions.length === 0) {
+          let pool = (await listSessions(cwd)).filter(s => s.messageCount > 0);
+          if (pool.length === 0) {
             console.log("(no saved sessions with history)");
             continue;
           }
-          // Interactive arrow-key picker on a TTY: ↑↓ to move, Enter to resume, Esc cancels.
+          // Interactive gjc-style picker on a TTY: type to filter, ↑↓/PgUp/PgDn to
+          // move, Enter resumes, Del deletes (press Del twice to confirm), Esc cancels.
           if (process.stdin.isTTY && process.stdout.isTTY) {
-            const items: SelectItem<string>[] = sessions.slice(0, 50).map(s => ({
-              value: s.id,
-              label: `${s.title ? `[${s.title}] ` : ""}${(s.preview || s.id).replace(/\s+/g, " ")}`.slice(0, 76) || s.id,
-              hint: `${s.messageCount} msgs${s.id === sessionId ? " · current" : ""}`,
-            }));
-            const picked = await pickFromOptions("Resume a session  ↑↓ move · Enter resume · Esc cancel", items);
-            if (picked) await applyResume(picked);
-            else console.log("(resume cancelled)");
+            // Loop so a delete refreshes the list and re-opens the picker in place.
+            for (;;) {
+              const picker = new SessionPicker(pool);
+              let action: { kind: "resume" | "delete"; id: string } | undefined;
+              let confirmDeleteId: string | undefined;
+              await runSelectPicker(
+                (cols, rows) => renderSessionPicker(picker, {
+                  title: "Resume a session",
+                  cols,
+                  rows: Math.max(8, rows),
+                  unicode: true,
+                  color: true,
+                  confirmDeleteId,
+                }),
+                (ch, key) => {
+                  if (key?.name === "up") { confirmDeleteId = undefined; picker.up(); return false; }
+                  if (key?.name === "down") { confirmDeleteId = undefined; picker.down(); return false; }
+                  if (key?.name === "pageup") { confirmDeleteId = undefined; picker.page(-1); return false; }
+                  if (key?.name === "pagedown") { confirmDeleteId = undefined; picker.page(1); return false; }
+                  if (key?.name === "escape" || (key?.ctrl && key.name === "c")) return true;
+                  if (key?.name === "delete") {
+                    const sel = picker.selected();
+                    if (!sel) return false;
+                    if (confirmDeleteId === sel.id) { action = { kind: "delete", id: sel.id }; return true; }
+                    confirmDeleteId = sel.id;
+                    return false;
+                  }
+                  if (key?.name === "return" || key?.name === "enter") {
+                    const sel = picker.selected();
+                    if (sel) { action = { kind: "resume", id: sel.id }; return true; }
+                    return false;
+                  }
+                  confirmDeleteId = undefined;
+                  if (key?.name === "backspace") { picker.backspace(); return false; }
+                  if (ch && ch >= " " && !key?.ctrl && !key?.meta) picker.typeChar(ch);
+                  return false;
+                },
+              );
+              if (!action) { console.log("(resume cancelled)"); break; }
+              if (action.kind === "resume") { await applyResume(action.id); break; }
+              // Delete: drop the file, refresh the pool, and re-open the picker.
+              const delId = action.id;
+              try {
+                const removed = await deleteSession(delId, cwd);
+                console.log(removed ? `(deleted session ${delId})` : `(session ${delId} already gone)`);
+              } catch (err) {
+                console.log(`! delete failed: ${(err as Error).message}`);
+              }
+              if (delId === sessionId) await startFreshSession("dropped current session");
+              pool = pool.filter(s => s.id !== delId);
+              if (pool.length === 0) { console.log("(no saved sessions with history)"); break; }
+            }
             continue;
           }
           // Non-TTY fallback: static list (resume with /session resume <id>).
           console.log("Saved sessions — resume with /session resume <id>:");
-          for (const s of sessions.slice(0, 15)) {
+          for (const s of pool.slice(0, 15)) {
             const marker = s.id === sessionId ? "*" : " ";
             console.log(` ${marker}${s.id}  (${s.messageCount} msgs)  ${s.title ? `[${s.title}] ` : ""}${s.preview}`);
           }

package/src/tui/app.ts CHANGED Viewed

@@ -68,6 +68,9 @@ export interface AgentEventsLike {
   onUsage?(usage: { inputTokens: number; outputTokens: number }): void;
   onModelStream?(textSoFar: string): void;
   onReasoningStream?(textSoFar: string): void;
+  /** Fired once when the model opens an extended-thinking block — drives a live "thinking"
+   *  placeholder for signature-only reasoning models (opus-4-7/4-8) that stream no thought text. */
+  onReasoningStart?(): void;
   /** Per-artifact native reasoning replay records (signature / thoughtSignature / reasoning
    *  item). The TUI ignores these; launch.ts uses them to persist the final reply's artifacts. */
   onReasoningArtifactStream?(artifact: import("../ai/types").ReasoningArtifact): void;
@@ -247,6 +250,11 @@ export class LaunchTui {
    *  streams, then persisted once into scrollback as a "Thinking" block on commit so the
    *  model's reasoning stays visible above the answer (gjc "think → answer" parity). */
   private streamingThought = "";
+  /** True once the model opens an extended-thinking block this step. Signature-only
+   *  reasoning models (opus-4-7/4-8) stream NO thinking text, so without this flag the
+   *  live Thinking block never appears and the wait looks frozen. Drives a placeholder
+   *  Thinking block until real thought/answer text streams. Reset each step / on commit. */
+  private thinkingActive = false;
   /** Uniform live-activity text for the live status field (reasoning OR derived fallback). */
   private streamingActivity = "";
   /** Last stream-driven draw (ms epoch) — throttles per-delta repaints to ≤10/s. */
@@ -410,6 +418,7 @@ export class LaunchTui {
         this.retryNotice = null; // a new step starts a fresh model call
         this.streamingReasoning = ""; // fresh model response this step
         this.streamingThought = "";
+        this.thinkingActive = false;
         this.streamingActivity = "";
         this.flushedReasoning = "";
         this.flushedThought = "";
@@ -452,6 +461,14 @@ export class LaunchTui {
           this.draw();
         }
       },
+      onReasoningStart: () => {
+        // The model opened an extended-thinking block. Signature-only reasoning models
+        // (opus-4-7/4-8) stream no thinking text, so flag the thinking phase so the live
+        // Thinking block renders a placeholder instead of leaving the wait blank.
+        if (this.finished || this.thinkingActive) return;
+        this.thinkingActive = true;
+        this.draw();
+      },
       onAssistant: (_raw, invocation) => {
         this.thinking = false; // model replied; now dispatching the tool
         this.retryNotice = null; // the call got through — clear any backoff notice
@@ -484,6 +501,7 @@ export class LaunchTui {
         }
         this.streamingReasoning = "";
         this.streamingThought = "";
+        this.thinkingActive = false;
         this.streamingActivity = "";
         if (invocation && invocation.tool !== "done") {
           this.runningTool = true;
@@ -650,6 +668,18 @@ export class LaunchTui {
     this.draw();
   }
+  private livePromptHighlight?: { start: number; end: number; paint: (s: string) => string };
+  /** Recolor the active `/command`·`$skill` trigger token inside the mid-turn live
+   *  input box (idle-prompt parity). Caller supplies code-point offsets into the
+   *  draft text + a painter; undefined clears it. */
+  setLivePromptHighlight(hl?: { start: number; end: number; paint: (s: string) => string }): void {
+    if (this.finished) return;
+    const a = this.livePromptHighlight, b = hl;
+    if (a?.start === b?.start && a?.end === b?.end && (!a) === (!b)) return;
+    this.livePromptHighlight = hl;
+    this.draw();
+  }
   private livePromptHint: string[] = [];
   /** Mid-turn command/skill preview lines shown above the live input box, so a
    *  /command or $skill typed WHILE a turn runs visibly reacts (idle-prompt parity). */
@@ -672,6 +702,7 @@ export class LaunchTui {
       accentShadow: this.theme.color ? accentShadowPaint(this.theme) : undefined,
       placeholder: "Type your next message...",
       maxBodyRows: 2,
+      highlight: this.livePromptHighlight,
     });
     if (this.livePromptHint.length === 0) return box;
     const dim = this.theme.color ? chalk.dim : (s: string) => s;
@@ -925,6 +956,7 @@ export class LaunchTui {
     this.lastLedgerKind = null; // fresh turn: no leading spacer before the first ledger line
     this.livePromptInput = ""; // fresh turn: no next-prompt draft yet
     this.livePromptHint = []; // fresh turn: no mid-turn command preview yet
+    this.livePromptHighlight = undefined; // fresh turn: no active trigger token
     this.subagentLive = null; // fresh turn: no nested subagent in flight
     this.activityLog.length = 0; // per-turn ring: timestamps are turn-relative
     this.spinner.updateStep(0, this.footer.maxSteps);
@@ -1382,6 +1414,13 @@ export class LaunchTui {
       const liveMs = this.currentStepStartedAt ? Date.now() - this.currentStepStartedAt : undefined;
       const liveLabel = liveMs !== undefined ? `Thinking · ${(liveMs / 1000).toFixed(1)}s` : "Thinking";
       tail.push(...this.renderLiveBlock(liveLabel, liveThink, cols, rows, 6, "Thinking"));
+    } else if (isThinking && this.thinkingActive) {
+      // Signature-only reasoning models (opus-4-7/4-8) open a thinking block but stream no
+      // thought text — show a live placeholder so the wait reads as active thinking, not a
+      // frozen screen. Replaced the instant any real thought/answer text streams (branch above).
+      const liveMs = this.currentStepStartedAt ? Date.now() - this.currentStepStartedAt : undefined;
+      const liveLabel = liveMs !== undefined ? `Thinking · ${(liveMs / 1000).toFixed(1)}s` : "Thinking";
+      tail.push(...this.renderLiveBlock(liveLabel, "(thinking…)", cols, rows, 6, "Thinking"));
     }
     // Live tool output (gjc-style streaming bash stdout): while a tool runs, its

package/src/tui/components/input-box.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import chalk from "chalk";
 import { BOX_ASCII, BOX_UNICODE, boxBlock } from "./layout";
-import { visibleWidth } from "./width";
+import { visibleWidth, truncateToWidth } from "./width";
 export interface InputBoxOptions {
   cols?: number;
@@ -18,6 +18,11 @@ export interface InputBoxOptions {
   /** Shadow painter for the bottom/right "shaded" edges; defaults to a dim accent.
    *  The lit-vs-shaded two-tone contrast gives the box visible depth. */
   accentShadow?: (s: string) => string;
+  /** Paint a contiguous CHARACTER range of the typed text (e.g. the active
+   *  `/command` or `$skill` trigger token) so the user sees the invocation is
+   *  recognized as it is typed. Offsets index `Array.from(line)` code points
+   *  ([start, end)). Ignored for the placeholder and when `color` is false. */
+  highlight?: { start: number; end: number; paint: (s: string) => string };
 }
 export interface InputFrame {
@@ -38,6 +43,7 @@ function wrapWithCursor(
   text: string,
   cursor: number,
   width: number,
+  highlight?: { start: number; end: number; paint: (s: string) => string },
 ): { rows: string[]; row: number; col: number } {
   const rows: string[] = [];
   let cur = "";
@@ -67,7 +73,8 @@ function wrapWithCursor(
       continue;
     }
     if (ch !== "") {
-      cur += rendered;
+      const lit = highlight && i >= highlight.start && i < highlight.end;
+      cur += lit ? highlight.paint(rendered) : rendered;
       curW += w;
     }
   }
@@ -95,7 +102,7 @@ export function renderInputFrame(line: string, opts: InputBoxOptions = {}): Inpu
     rows = [placeholder];
     placeholderRow = true;
   } else {
-    const wrapped = wrapWithCursor(line, opts.cursor ?? line.length, textWidth);
+    const wrapped = wrapWithCursor(line, opts.cursor ?? line.length, textWidth, useColor ? opts.highlight : undefined);
     rows = wrapped.rows;
     crow = wrapped.row;
     ccol = wrapped.col;
@@ -110,10 +117,10 @@ export function renderInputFrame(line: string, opts: InputBoxOptions = {}): Inpu
     hidden = Math.min(Math.max(0, crow - maxBodyRows + 1), totalRows - maxBodyRows);
     if (crow < hidden) hidden = crow; // caret above the window → scroll up to it
     rows = rows.slice(hidden, hidden + maxBodyRows);
-    if (hidden > 0) rows[0] = `…${rows[0] ?? ""}`.slice(0, textWidth);
+    if (hidden > 0) rows[0] = truncateToWidth(`…${rows[0] ?? ""}`, textWidth);
     if (hidden + maxBodyRows < totalRows) {
       const last = rows.length - 1;
-      rows[last] = `${rows[last] ?? ""}…`.slice(0, textWidth);
+      rows[last] = truncateToWidth(`${rows[last] ?? ""}…`, textWidth);
     }
   }
   let visRow = Math.max(0, Math.min(crow - hidden, rows.length - 1));

package/src/tui/components/session-picker.ts ADDED Viewed

@@ -0,0 +1,226 @@
+/**
+ * Rich, gjc-style session picker for `/resume`.
+ *
+ * Mirrors Gajae-Code's session selector UX: a search/filter line at the top, a
+ * scrolling window of multi-line entries (title + dimmed first-message preview +
+ * a "relative time · size · N msgs" metadata line), a position indicator, and a
+ * footer with Del-to-delete / Enter-to-resume / Esc-to-cancel hints.
+ *
+ * Pure rendering — no I/O. The owning REPL drives navigation/deletion via the
+ * `SessionPicker` model and feeds the rendered lines to its picker loop.
+ */
+import chalk from "chalk";
+import { truncateToWidth } from "./width";
+import type { SessionSummary } from "../../agent/session";
+/** Human-readable byte size (e.g. "0 B", "12.3 KB", "4.2 MB"). */
+export function formatBytes(n: number | undefined): string {
+  const v = typeof n === "number" && Number.isFinite(n) && n >= 0 ? n : 0;
+  if (v < 1024) return `${v} B`;
+  const units = ["KB", "MB", "GB", "TB"];
+  let size = v / 1024;
+  let i = 0;
+  while (size >= 1024 && i < units.length - 1) {
+    size /= 1024;
+    i++;
+  }
+  return `${size < 10 ? size.toFixed(1) : Math.round(size)} ${units[i]}`;
+}
+/** Relative "X ago" timestamp, matching gjc's session-selector phrasing. */
+export function formatRelativeTime(fromMs: number | undefined, nowMs: number = Date.now()): string {
+  if (typeof fromMs !== "number" || !Number.isFinite(fromMs) || fromMs <= 0) return "unknown";
+  const diff = Math.max(0, nowMs - fromMs);
+  const mins = Math.floor(diff / 60000);
+  const hours = Math.floor(diff / 3600000);
+  const days = Math.floor(diff / 86400000);
+  if (mins < 1) return "just now";
+  if (mins < 60) return `${mins} minute${mins !== 1 ? "s" : ""} ago`;
+  if (hours < 24) return `${hours} hour${hours !== 1 ? "s" : ""} ago`;
+  if (days === 1) return "1 day ago";
+  if (days < 7) return `${days} days ago`;
+  return new Date(fromMs).toLocaleDateString();
+}
+/**
+ * Navigable model for the resume picker: an ordered session list with a
+ * case-insensitive AND-of-terms filter across id/title/preview/cwd, a cursor
+ * into the *filtered* view, and in-place removal for delete.
+ */
+export class SessionPicker {
+  private readonly all: SessionSummary[];
+  private query = "";
+  private cursor = 0;
+  constructor(sessions: readonly SessionSummary[]) {
+    this.all = sessions.slice();
+  }
+  /** Sessions matching the current filter (every whitespace term must match). */
+  visible(): SessionSummary[] {
+    const q = this.query.trim().toLowerCase();
+    if (!q) return this.all;
+    const terms = q.split(/\s+/);
+    return this.all.filter(s => {
+      const hay = [s.id, s.title ?? "", s.preview ?? "", s.cwd ?? ""].join(" ").toLowerCase();
+      return terms.every(t => hay.includes(t));
+    });
+  }
+  cursorIndex(): number {
+    const n = this.visible().length;
+    if (n === 0) return 0;
+    return Math.max(0, Math.min(this.cursor, n - 1));
+  }
+  selected(): SessionSummary | undefined {
+    return this.visible()[this.cursorIndex()];
+  }
+  isEmpty(): boolean {
+    return this.visible().length === 0;
+  }
+  filter(): string {
+    return this.query;
+  }
+  setFilter(query: string): void {
+    this.query = query;
+    this.cursor = 0;
+  }
+  typeChar(ch: string): void {
+    this.setFilter(this.query + ch);
+  }
+  backspace(): void {
+    this.setFilter(this.query.slice(0, -1));
+  }
+  up(): void {
+    const n = this.visible().length;
+    if (n === 0) return;
+    this.cursor = (this.cursorIndex() - 1 + n) % n;
+  }
+  down(): void {
+    const n = this.visible().length;
+    if (n === 0) return;
+    this.cursor = (this.cursorIndex() + 1) % n;
+  }
+  /** Move by a window without wrapping (PageUp/PageDown). */
+  page(dir: 1 | -1, size = 3): void {
+    const n = this.visible().length;
+    if (n === 0) return;
+    this.cursor = Math.max(0, Math.min(n - 1, this.cursorIndex() + dir * Math.max(1, size)));
+  }
+  /** Remove the highlighted session from the model; returns it (or undefined). */
+  removeSelected(): SessionSummary | undefined {
+    const sel = this.selected();
+    if (!sel) return undefined;
+    const idx = this.all.findIndex(s => s.id === sel.id);
+    if (idx >= 0) this.all.splice(idx, 1);
+    const n = this.visible().length;
+    if (this.cursor >= n) this.cursor = Math.max(0, n - 1);
+    return sel;
+  }
+}
+export interface RenderSessionPickerOptions {
+  /** Title line(s) shown above the search line. */
+  title?: string;
+  /** Total width to fit each line to (default 80). */
+  cols?: number;
+  /** Total body rows available; the visible window is derived from this (default 24). */
+  rows?: number;
+  /** Use unicode glyphs (default true). */
+  unicode?: boolean;
+  /** Apply chalk color (default true). */
+  color?: boolean;
+  /** Clock override for relative-time formatting (tests). */
+  nowMs?: number;
+  /** When set, the matching session shows a "press Del again to delete" prompt. */
+  confirmDeleteId?: string;
+}
+/** Render a `SessionPicker` to lines (gjc-style multi-line entries). Pure. */
+export function renderSessionPicker(picker: SessionPicker, opts: RenderSessionPickerOptions = {}): string[] {
+  const unicode = opts.unicode !== false;
+  const color = opts.color !== false;
+  const cols = Math.max(20, opts.cols ?? 80);
+  const nowMs = opts.nowMs ?? Date.now();
+  const tint = (s: string, fn: (x: string) => string): string => (color ? fn(s) : s);
+  const fit = (s: string): string => truncateToWidth(s, cols);
+  const pointer = unicode ? "\u276f" : ">"; // ❯
+  const dot = unicode ? "\u00b7" : "-"; // ·
+  const arrow = unicode ? "\u203a" : ">"; // ›
+  const out: string[] = [];
+  const titleLines = opts.title ? opts.title.split("\n") : [];
+  for (const t of titleLines) out.push(fit(t ? tint(t, chalk.bold) : ""));
+  // Search/filter line (gjc places an input box at the top).
+  const q = picker.filter();
+  const searchValue = q ? q : tint("type to filter", chalk.gray);
+  out.push(fit(`${tint("search", chalk.gray)} ${tint(arrow, chalk.cyan)} ${searchValue}`));
+  out.push("");
+  const items = picker.visible();
+  const footerKeys = unicode
+    ? `\u2191/\u2193 move \u00b7 enter resume \u00b7 del delete \u00b7 esc cancel`
+    : `up/down move - enter resume - del delete - esc cancel`;
+  if (items.length === 0) {
+    out.push(fit(tint("  no sessions match", chalk.gray)));
+    out.push("");
+    out.push(fit(tint(`  [${footerKeys}]`, chalk.gray)));
+    return out;
+  }
+  // Each entry occupies up to 3 content lines + 1 blank; derive the window from
+  // available rows, leaving room for title/search/footer chrome.
+  const linesPerItem = 4;
+  const chrome = titleLines.length + 2 /* search + blank */ + 2 /* position + footer */;
+  const avail = Math.max(linesPerItem, (opts.rows ?? 24) - chrome);
+  const maxVisible = Math.max(1, Math.min(items.length, Math.floor(avail / linesPerItem)));
+  const cur = picker.cursorIndex();
+  let start = Math.max(0, cur - Math.floor(maxVisible / 2));
+  start = Math.min(start, Math.max(0, items.length - maxVisible));
+  const end = Math.min(items.length, start + maxVisible);
+  for (let i = start; i < end; i++) {
+    const s = items[i]!;
+    const isCur = i === cur;
+    const isConfirm = !!opts.confirmDeleteId && s.id === opts.confirmDeleteId;
+    const cursorStr = isCur ? tint(`${pointer} `, chalk.cyan) : "  ";
+    const maxw = Math.max(1, cols - 2); // cursor/indent prefix is 2 columns
+    const firstMsg = (s.preview || "(no preview)").replace(/\s+/g, " ").trim();
+    if (s.title) {
+      const titleTxt = truncateToWidth(s.title, maxw);
+      out.push(fit(cursorStr + (isCur ? tint(titleTxt, (x: string) => chalk.cyan.bold(x)) : titleTxt)));
+      out.push(fit("  " + tint(truncateToWidth(firstMsg, maxw), chalk.dim)));
+    } else {
+      const msg = truncateToWidth(firstMsg, maxw);
+      out.push(fit(cursorStr + (isCur ? tint(msg, (x: string) => chalk.cyan.bold(x)) : msg)));
+    }
+    if (isConfirm) {
+      out.push(fit(tint(`  press Del again to delete ${dot} any other key cancels`, chalk.yellow)));
+    } else {
+      const meta = `  ${formatRelativeTime(s.mtimeMs, nowMs)} ${dot} ${formatBytes(s.sizeBytes)} ${dot} ${s.messageCount} msg${s.messageCount !== 1 ? "s" : ""}`;
+      out.push(fit(tint(truncateToWidth(meta, cols), chalk.dim)));
+    }
+    out.push("");
+  }
+  if (start > 0 || end < items.length) {
+    out.push(fit(tint(`  (${cur + 1}/${items.length})`, chalk.gray)));
+  }
+  out.push(fit(tint(`  [${footerKeys}]`, chalk.gray)));
+  return out;
+}

package/src/agent/tool-registry.ts DELETED Viewed

@@ -1,54 +0,0 @@
-import { readTool, writeTool, editTool, bashTool, findTool, searchTool, lsTool, type ToolResult } from "./tools";
-export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
-export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
-  read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
-  write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
-  edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
-  bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
-  find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
-  search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
-  ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
-};
-export const TOOL_PROTOCOL = [
-  "You have these tools (call exactly ONE per step):",
-  "1. read   {filePath, lineRange?, raw?} — read a file",
-  "2. write  {filePath, content}         — create/overwrite a file",
-  "3. edit   {filePath, editBlock}       — replace/insert lines",
-  "4. bash   {command, timeoutMs?, cwd?, env?} — run a shell command",
-  "5. find   {globPattern}               — find files by name",
-  "6. search {pattern, globPattern?, ignoreCase?, context?, maxMatches?} — grep",
-  "7. ls     {dirPath}                   — list a directory",
-  "8. done   {reason?}                   — call when done",
-  "",
-  "Reply with STRICT JSON only:",
-  '{ "tool": "<name>", "arguments": { ... } }',
-].join("\n");
-export const READONLY_TOOL_PROTOCOL = [
-  "You have these READ-ONLY tools:",
-  "1. read   {filePath, lineRange?}      — read a file",
-  "2. find   {globPattern}               — find files by name",
-  "3. search {pattern, globPattern?, ignoreCase?} — grep",
-  "4. ls     {dirPath}                   — list a directory",
-  "5. done   {reason?}                   — call when complete",
-  "",
-  "Reply with STRICT JSON only:",
-  '{ "tool": "<name>", "arguments": { ... } }',
-].join("\n");
-export function nearestToolName(name: string, known: string[]): string | undefined {
-  const want = name.trim().toLowerCase();
-  if (!want) return undefined;
-  let best: string | undefined;
-  let bestD = Infinity;
-  for (const k of known) {
-    const kl = k.toLowerCase();
-    if (kl === want) return k;
-    const d = kl.startsWith(want) || want.startsWith(kl) ? 1 : 10;
-    if (d < bestD) { bestD = d; best = k; }
-  }
-  return bestD <= 2 ? best : undefined;
-}