@vextlabs/theron-cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/repl.js CHANGED
@@ -9,9 +9,13 @@ import chalk from "chalk";
9
9
  import { spawnSync } from "node:child_process";
10
10
  import { streamChat, fetchInteractionPlan } from "./api.js";
11
11
  import { loadCapConfig, resolveCapPolicy } from "./cap_config.js";
12
+ import { loadProjectMemory, formatProjectMemoryForRequest } from "./project_memory.js";
12
13
  import { rankProfilesForPrompt } from "./profile_match.js";
13
- import { TOOL_REGISTRY, TOOL_SCHEMAS } from "./tools/index.js";
14
+ import { TOOL_REGISTRY, TOOL_SCHEMAS, READONLY_TOOL_SCHEMAS, MUTATING_TOOLS } from "./tools/index.js";
14
15
  import { renderMarkdown, ui } from "./render.js";
16
+ import { loadCustomCommands, substituteArgs } from "./slash_commands.js";
17
+ import { resolveFileRefs } from "./file_refs.js";
18
+ import { sessionIdForCwd, loadSession, saveSession, deleteSession, listSessions, } from "./sessions.js";
15
19
  import { getProfileOrDefault, listProfiles, DEFAULT_PROFILE_SLUG } from "./profiles/index.js";
16
20
  import { runVerifiers, summarizeIssues, formatForNextTurn } from "./verifiers/index.js";
17
21
  import { connectionsCommand } from "./connections.js";
@@ -43,11 +47,129 @@ export async function runRepl(opts) {
43
47
  /** Verifier issues to prepend to the NEXT user message — surfaces
44
48
  * after the model self-corrects. Cleared once consumed. */
45
49
  pendingVerifierBlock: "",
50
+ /** Auto-loaded project memory (THERON.md / CLAUDE.md / AGENTS.md).
51
+ * Reloaded whenever cwd changes (/cd). Injected into every turn so
52
+ * the model honors repo-local rules without the user re-typing them
53
+ * — Theron's CLAUDE.md analogue. */
54
+ projectMemory: loadProjectMemory(opts.cwd),
55
+ /** Plan mode — read-only tools + plan instruction + hard write deny.
56
+ * Toggled by --plan / /plan / "approve". */
57
+ planMode: opts.planMode === true,
58
+ /** Custom slash commands loaded from ~/.theron/commands + ./.theron/
59
+ * commands. Reloaded on /cd and /commands reload. */
60
+ customCommands: loadCustomCommands(opts.cwd),
61
+ /** Pretty-render the assistant's markdown at end of turn. */
62
+ renderMode: opts.renderMode === true,
63
+ /** Stable session id keyed to cwd, for save/resume. Updated on /cd. */
64
+ sessionId: sessionIdForCwd(opts.cwd),
65
+ /** Creation timestamp of the active on-disk session. */
66
+ sessionCreated: new Date().toISOString(),
46
67
  };
47
68
  const updateCtx = () => {
48
69
  ctx.cwd = session.cwd;
49
70
  ctx.yolo = session.yolo;
50
71
  };
72
+ // Persist the conversation under the cwd-keyed session id. Called after
73
+ // every turn (and after /clear truncation). Never throws — saveSession
74
+ // fails open. Skipped in headless mode where we don't want to mutate
75
+ // the user's saved history from a one-shot pipe.
76
+ const persistSession = () => {
77
+ if (opts.headless)
78
+ return;
79
+ const state = {
80
+ id: session.sessionId,
81
+ cwd: session.cwd,
82
+ created: session.sessionCreated,
83
+ updated: new Date().toISOString(),
84
+ profile: session.profile.slug,
85
+ messages,
86
+ };
87
+ saveSession(state);
88
+ };
89
+ // The memory text we inject as a leading system note + send as the
90
+ // `project_context` body field. Recomputed when memory reloads.
91
+ let projectContext = formatProjectMemoryForRequest(session.projectMemory);
92
+ // Reload project memory from the current cwd (after /cd) and refresh
93
+ // the injected context string. Returns true if any memory is now loaded.
94
+ const reloadProjectMemory = () => {
95
+ session.projectMemory = loadProjectMemory(session.cwd);
96
+ projectContext = formatProjectMemoryForRequest(session.projectMemory);
97
+ return session.projectMemory.sources.length > 0;
98
+ };
99
+ // After a /cd the new directory may carry its own .theron/commands and
100
+ // maps to a different session id. Refresh both so custom commands and
101
+ // save/resume track the directory the user is actually in.
102
+ const reloadDirScopedState = () => {
103
+ session.customCommands = loadCustomCommands(session.cwd);
104
+ session.sessionId = sessionIdForCwd(session.cwd);
105
+ };
106
+ // ── Session resume ────────────────────────────────────────────────
107
+ // --continue → the cwd-keyed session; --resume [id] → that session or a
108
+ // picker. Seed the loaded messages before the loop. Headless skips
109
+ // resume (a one-shot pipe shouldn't replay a stale conversation).
110
+ //
111
+ // Numbered session picker for `--resume` with no id. Reuses the REPL's
112
+ // own readline (never opens a competing Interface on the same stdin).
113
+ // Returns the chosen session id, or null on empty list / invalid pick.
114
+ const pickSession = async () => {
115
+ const sessions = listSessions();
116
+ if (sessions.length === 0) {
117
+ process.stdout.write(ui.info("no saved sessions to resume.\n"));
118
+ return null;
119
+ }
120
+ process.stdout.write(ui.info("\nsaved sessions — pick a number to resume:\n"));
121
+ const home = process.env.HOME || "";
122
+ sessions.slice(0, 20).forEach((s, i) => {
123
+ const shortCwd = home && s.cwd.startsWith(home) ? "~" + s.cwd.slice(home.length) : s.cwd;
124
+ process.stdout.write(` ${ui.actionChip(i + 1, `${s.id} · ${s.messageCount} msgs · ${s.profile} · ${shortCwd}`)}\n`);
125
+ });
126
+ if (!rl || rlClosed)
127
+ return null;
128
+ const answer = await new Promise((resolve) => {
129
+ try {
130
+ rl.question(ui.prompt(), (a) => resolve(a));
131
+ }
132
+ catch {
133
+ resolve("");
134
+ }
135
+ });
136
+ const n = Number(answer.trim());
137
+ if (Number.isInteger(n) && n >= 1 && n <= Math.min(sessions.length, 20)) {
138
+ return sessions[n - 1].id;
139
+ }
140
+ process.stdout.write(ui.info("no valid selection — starting fresh.\n"));
141
+ return null;
142
+ };
143
+ let resumedNotice = "";
144
+ if (!opts.headless && (opts.continueSession || opts.resumeSession)) {
145
+ let toLoad = null;
146
+ if (opts.continueSession) {
147
+ toLoad = session.sessionId;
148
+ }
149
+ else if (opts.resumeSession) {
150
+ if (opts.resumeId) {
151
+ toLoad = opts.resumeId;
152
+ }
153
+ else {
154
+ // Numbered picker — reuse a transient readline over stdin.
155
+ toLoad = await pickSession();
156
+ }
157
+ }
158
+ if (toLoad) {
159
+ const loaded = loadSession(toLoad);
160
+ if (loaded && loaded.messages.length > 0) {
161
+ messages.push(...loaded.messages);
162
+ session.sessionId = loaded.id;
163
+ session.sessionCreated = loaded.created;
164
+ resumedNotice = `◉ resumed session ${loaded.id} (${loaded.messages.length} messages)`;
165
+ }
166
+ else {
167
+ resumedNotice = opts.continueSession
168
+ ? "◉ no saved session for this directory yet — starting fresh"
169
+ : `◉ session not found: ${toLoad} — starting fresh`;
170
+ }
171
+ }
172
+ }
51
173
  if (!opts.oneShot) {
52
174
  // Branded welcome — block-letter THERON banner + pill + numbered
53
175
  // security notes + quickstart status line. Same flow Claude Code
@@ -93,6 +215,26 @@ export async function runRepl(opts) {
93
215
  if (session.profile.promptStarters && session.profile.promptStarters.length > 0) {
94
216
  process.stdout.write(ui.info(` try: "${session.profile.promptStarters[0]}"\n`));
95
217
  }
218
+ // Project-memory notice — same affordance Claude Code prints when it
219
+ // picks up a CLAUDE.md. Tells the user which file(s) Theron is now
220
+ // honoring so the behavior is never invisible.
221
+ if (session.projectMemory.sources.length > 0) {
222
+ const names = session.projectMemory.sources.map((s) => path.basename(s)).join(", ");
223
+ process.stdout.write(ui.info(`◉ loaded project memory: ${names}${session.projectMemory.truncated ? " (truncated)" : ""}\n`));
224
+ }
225
+ // Custom-command notice — tell the user which /<name> commands are live.
226
+ if (session.customCommands.map.size > 0) {
227
+ const names = Array.from(session.customCommands.map.keys()).map((n) => "/" + n).join(", ");
228
+ process.stdout.write(ui.info(`◉ custom commands: ${names}\n`));
229
+ }
230
+ // Resume notice — which session we restored (if any).
231
+ if (resumedNotice) {
232
+ process.stdout.write(ui.info(resumedNotice + "\n"));
233
+ }
234
+ // Plan-mode notice — make the read-only stance visible on launch.
235
+ if (session.planMode) {
236
+ process.stdout.write(ui.warn("plan mode — read-only. Write / Edit / Bash / Stoa are blocked until you /plan or type 'approve'.\n"));
237
+ }
96
238
  process.stdout.write("\n");
97
239
  process.stdout.write(ui.info("type a message · /help for commands · /mode list to see all 33 · Ctrl-C to quit\n\n"));
98
240
  }
@@ -128,7 +270,76 @@ export async function runRepl(opts) {
128
270
  if (trimmed === "/quit" || trimmed === "/exit")
129
271
  break;
130
272
  if (trimmed === "/help") {
131
- process.stdout.write("\n" + renderSlashHelp() + "\n\n");
273
+ const customHelp = Array.from(session.customCommands.map.values()).map((c) => ({
274
+ trigger: "/" + c.name,
275
+ desc: c.description ?? `custom command (${c.source}) — ${path.basename(c.file)}`,
276
+ }));
277
+ process.stdout.write("\n" + renderSlashHelp(customHelp) + "\n\n");
278
+ continue;
279
+ }
280
+ // /plan — toggle plan mode. Also exits on the bare word "approve"
281
+ // (handled further down so it can't be a slash). Read-only tools +
282
+ // plan instruction when ON; normal policy when OFF.
283
+ if (trimmed === "/plan") {
284
+ session.planMode = !session.planMode;
285
+ if (session.planMode) {
286
+ process.stdout.write(ui.warn("plan mode ON — read-only. Write / Edit / Bash / Stoa are blocked (even with --yes). " +
287
+ "The model will investigate and propose a plan. Type 'approve' or /plan to exit.\n\n"));
288
+ }
289
+ else {
290
+ process.stdout.write(ui.info("plan mode OFF — normal tool policy restored.\n\n"));
291
+ }
292
+ continue;
293
+ }
294
+ // /render — toggle end-of-turn markdown rendering.
295
+ if (trimmed === "/render") {
296
+ session.renderMode = !session.renderMode;
297
+ process.stdout.write(session.renderMode
298
+ ? ui.info("markdown rendering ON — the reply is pretty-printed once the turn ends.\n\n")
299
+ : ui.info("markdown rendering OFF — raw streamed text.\n\n"));
300
+ continue;
301
+ }
302
+ // /commands — list custom slash commands; `/commands reload` re-scans.
303
+ if (trimmed === "/commands" || trimmed === "/commands reload") {
304
+ if (trimmed === "/commands reload") {
305
+ session.customCommands = loadCustomCommands(session.cwd);
306
+ process.stdout.write(ui.info("re-scanned commands directories.\n"));
307
+ }
308
+ const cmds = Array.from(session.customCommands.map.values());
309
+ if (cmds.length === 0) {
310
+ process.stdout.write(ui.info("\nno custom commands. Add a markdown file at ./.theron/commands/<name>.md " +
311
+ "(or ~/.theron/commands/<name>.md) — typing /<name> sends its body as the prompt, " +
312
+ "with $ARGUMENTS / $1 / $2 substituted.\n\n"));
313
+ }
314
+ else {
315
+ process.stdout.write(ui.info(`\ncustom commands (${cmds.length}):\n`));
316
+ for (const c of cmds) {
317
+ const desc = c.description ? ` — ${c.description}` : "";
318
+ process.stdout.write(ui.info(` /${c.name.padEnd(14)} (${c.source})${desc}\n`));
319
+ }
320
+ if (session.customCommands.dirs.length > 0) {
321
+ process.stdout.write(ui.info(`\nscanned: ${session.customCommands.dirs.join(", ")}\n`));
322
+ }
323
+ process.stdout.write("\n");
324
+ }
325
+ continue;
326
+ }
327
+ // /sessions — list saved sessions so the user knows what --resume can pick.
328
+ if (trimmed === "/sessions") {
329
+ const sessions = listSessions();
330
+ if (sessions.length === 0) {
331
+ process.stdout.write(ui.info("\nno saved sessions yet. They're written under ~/.theron/sessions/.\n\n"));
332
+ }
333
+ else {
334
+ process.stdout.write(ui.info(`\nsaved sessions (${sessions.length}) — resume with \`theron --resume <id>\`:\n`));
335
+ for (const s of sessions.slice(0, 30)) {
336
+ const here = s.id === session.sessionId ? "◉ " : " ";
337
+ const home = process.env.HOME || "";
338
+ const shortCwd = home && s.cwd.startsWith(home) ? "~" + s.cwd.slice(home.length) : s.cwd;
339
+ process.stdout.write(` ${here}${ui.toolLabel(s.id, "")} ${ui.info(`${s.messageCount} msgs · ${s.profile} · ${shortCwd}`)}\n`);
340
+ }
341
+ process.stdout.write("\n");
342
+ }
132
343
  continue;
133
344
  }
134
345
  if (trimmed === "/status") {
@@ -143,9 +354,41 @@ export async function runRepl(opts) {
143
354
  if (session.pinnedSpecs.length > 0) {
144
355
  process.stdout.write(ui.info(`pinned: ${session.pinnedSpecs.map((s) => "@" + s).join(", ")}\n`));
145
356
  }
357
+ if (session.projectMemory.sources.length > 0) {
358
+ process.stdout.write(ui.info(`memory: ${session.projectMemory.sources.join(", ")}\n`));
359
+ }
360
+ else {
361
+ process.stdout.write(ui.info(`memory: none (add a THERON.md to this repo)\n`));
362
+ }
363
+ process.stdout.write(ui.info(`plan mode: ${session.planMode ? "ON (read-only)" : "off"} · render: ${session.renderMode ? "on" : "off"}\n`));
364
+ process.stdout.write(ui.info(`session: ${session.sessionId} (${messages.length} messages)\n`));
365
+ if (session.planMode) {
366
+ process.stdout.write(ui.warn("Write / Edit / Bash / Stoa are blocked. /plan or 'approve' to exit.\n"));
367
+ }
146
368
  process.stdout.write("\n");
147
369
  continue;
148
370
  }
371
+ // /memory — show what project-memory files are loaded; `/memory reload`
372
+ // re-reads them from disk (handy after you edit THERON.md mid-session).
373
+ if (trimmed === "/memory" || trimmed === "/memory reload") {
374
+ if (trimmed === "/memory reload")
375
+ reloadProjectMemory();
376
+ if (session.projectMemory.sources.length === 0) {
377
+ process.stdout.write(ui.info("\nno project memory loaded. Add a THERON.md (or CLAUDE.md / AGENTS.md) " +
378
+ "to this directory or any parent, or ~/.theron/THERON.md for global rules.\n\n"));
379
+ }
380
+ else {
381
+ process.stdout.write(ui.info(`\nproject memory${trimmed.endsWith("reload") ? " (reloaded)" : ""}:\n`));
382
+ for (const s of session.projectMemory.sources) {
383
+ process.stdout.write(ui.info(` ${s}\n`));
384
+ }
385
+ if (session.projectMemory.truncated) {
386
+ process.stdout.write(ui.warn(" (one or more files were truncated to 32KB)\n"));
387
+ }
388
+ process.stdout.write("\n");
389
+ }
390
+ continue;
391
+ }
149
392
  if (trimmed === "/mode" || trimmed === "/mode list") {
150
393
  // Render the full profile registry. Grouped two columns for
151
394
  // readability — 33 entries is enough to want layout.
@@ -185,6 +428,12 @@ export async function runRepl(opts) {
185
428
  if (trimmed === "/clear") {
186
429
  messages.length = 0;
187
430
  pendingActions = [];
431
+ // Truncate the on-disk session too, otherwise the next save would
432
+ // re-persist an empty conversation under the same id and a later
433
+ // --continue would resume nothing useful. Deleting fully resets it.
434
+ if (!opts.headless)
435
+ deleteSession(session.sessionId);
436
+ session.sessionCreated = new Date().toISOString();
188
437
  process.stdout.write(ui.info("conversation cleared\n\n"));
189
438
  continue;
190
439
  }
@@ -210,7 +459,17 @@ export async function runRepl(opts) {
210
459
  throw new Error("not a directory");
211
460
  session.cwd = next;
212
461
  updateCtx();
213
- process.stdout.write(ui.info(`cwd ${session.cwd}\n\n`));
462
+ // New directory may carry a different (or no) project-memory
463
+ // file — reload so the model honors THIS repo's rules. Also
464
+ // re-scan custom commands and re-key the save/resume session id.
465
+ reloadProjectMemory();
466
+ reloadDirScopedState();
467
+ process.stdout.write(ui.info(`cwd → ${session.cwd}\n`));
468
+ if (session.projectMemory.sources.length > 0) {
469
+ const names = session.projectMemory.sources.map((s) => path.basename(s)).join(", ");
470
+ process.stdout.write(ui.info(`◉ project memory: ${names}\n`));
471
+ }
472
+ process.stdout.write("\n");
214
473
  }
215
474
  catch (err) {
216
475
  process.stdout.write(ui.error(`cd failed: ${err instanceof Error ? err.message : String(err)}\n\n`));
@@ -442,10 +701,10 @@ export async function runRepl(opts) {
442
701
  }
443
702
  }
444
703
  if (trimmed === "/bug") {
445
- // Try to open the issue tracker in the user's browser. We fall
704
+ // Open the public feedback surface in the user's browser. We fall
446
705
  // back to printing the URL when the platform opener is missing
447
706
  // (e.g. headless CI) — either way the user sees the link.
448
- const issueUrl = "https://github.com/Vext-Labs-Inc/sucrityflash/issues/new?labels=cli&title=Theron+CLI+bug";
707
+ const issueUrl = "https://tryvext.com";
449
708
  const opener = process.platform === "darwin" ? "open" :
450
709
  process.platform === "win32" ? "start" :
451
710
  "xdg-open";
@@ -464,11 +723,45 @@ export async function runRepl(opts) {
464
723
  process.stdout.write(ui.info("include `theron --version` output + the prompt that broke.\n\n"));
465
724
  continue;
466
725
  }
467
- // Unknown slash → friendly nudge.
726
+ // Custom slash command substitute args into its body and FALL
727
+ // THROUGH into the normal prompt path (set `trimmed`, do NOT
728
+ // continue) so it becomes this turn's prompt and runs through pins /
729
+ // verifier / message-push like any typed message. Runs AFTER every
730
+ // built-in check + the unknown-slash guard is below, so a custom
731
+ // command can never shadow a built-in (and load-time rejects
732
+ // reserved names anyway).
733
+ let isExpandedCommand = false;
468
734
  if (trimmed.startsWith("/")) {
735
+ const [head, ...argTokens] = trimmed.slice(1).split(/\s+/);
736
+ const cmdName = (head || "").toLowerCase();
737
+ const custom = session.customCommands.map.get(cmdName);
738
+ if (custom) {
739
+ const argString = argTokens.join(" ");
740
+ const expanded = substituteArgs(custom.body, argString).trim();
741
+ if (expanded) {
742
+ process.stdout.write(ui.info(`▸ /${cmdName}${argString ? " " + argString : ""}\n`));
743
+ trimmed = expanded;
744
+ isExpandedCommand = true;
745
+ }
746
+ else {
747
+ process.stdout.write(ui.error(`/${cmdName} expanded to an empty prompt — nothing to send.\n\n`));
748
+ continue;
749
+ }
750
+ }
751
+ }
752
+ // Unknown slash → friendly nudge. (Custom commands already matched
753
+ // above and set isExpandedCommand; only a real unknown reaches here.)
754
+ if (!isExpandedCommand && trimmed.startsWith("/")) {
469
755
  process.stdout.write(ui.error(`unknown command: ${trimmed.split(/\s/)[0]}. type /help for the list.\n\n`));
470
756
  continue;
471
757
  }
758
+ // "approve" — exit plan mode and restore the normal tool policy. Only
759
+ // meaningful while in plan mode; otherwise it's just a chat message.
760
+ if (session.planMode && /^approve$/i.test(trimmed)) {
761
+ session.planMode = false;
762
+ process.stdout.write(ui.info("plan approved — plan mode OFF, normal tool policy restored. Re-send your go-ahead to execute.\n\n"));
763
+ continue;
764
+ }
472
765
  // If the user typed a bare 1-4 and we just rendered action chips,
473
766
  // expand it into the action's prompt — terminal analogue of
474
767
  // clicking an amber chip on web.
@@ -489,6 +782,26 @@ export async function runRepl(opts) {
489
782
  // forced-spec extractor (interaction.ts) picks them up. Cleared
490
783
  // after one turn — same shape the web composer uses.
491
784
  let toSend = trimmed;
785
+ // @-FILE refs — resolve BEFORE the pin/specialist @-scan so any
786
+ // @<path> that names a real file is inlined (and stripped) rather
787
+ // than mistaken for a specialist. Path traversal is confined to cwd
788
+ // and secrets/binaries are skipped (see file_refs.ts). Bare @words
789
+ // that aren't files are left untouched for the @-mention router.
790
+ {
791
+ const refs = resolveFileRefs(toSend, session.cwd);
792
+ if (refs.attachments.length > 0) {
793
+ toSend = refs.text;
794
+ for (const a of refs.attachments) {
795
+ const kb = (Buffer.byteLength(a.content, "utf8") / 1024).toFixed(1);
796
+ process.stdout.write(ui.info(`◉ inlined @${a.token} (${kb} KB${a.truncated ? ", truncated" : ""})\n`));
797
+ }
798
+ }
799
+ // Surface refused path-like tokens so a silently-skipped @file
800
+ // doesn't look like it worked.
801
+ for (const s of refs.skipped) {
802
+ process.stdout.write(ui.warn(`@${s.token}: ${s.reason}\n`));
803
+ }
804
+ }
492
805
  let activePins = [];
493
806
  if (session.pinnedSpecs.length > 0) {
494
807
  activePins = [...session.pinnedSpecs];
@@ -520,7 +833,24 @@ export async function runRepl(opts) {
520
833
  toSend = need.map((s) => "@" + s).join(" ") + " " + toSend;
521
834
  }
522
835
  }
523
- messages.push({ role: "user", content: toSend });
836
+ // Seed auto-loaded project memory (THERON.md / CLAUDE.md / AGENTS.md)
837
+ // as the FIRST message of a fresh conversation. The substrate's CLI
838
+ // message schema has no `system` role, so the memory rides as a
839
+ // labeled leading `user` note — this works with NO server change. The
840
+ // same text is ALSO sent as the structured `project_context` field
841
+ // (see runOneTurn → streamChat) for servers that handle it natively.
842
+ // Re-seeded after /clear (which empties `messages`) so the model never
843
+ // loses the repo's rules mid-session.
844
+ if (messages.length === 0 && projectContext) {
845
+ messages.push({ role: "user", content: projectContext });
846
+ }
847
+ // Plan-mode instruction. The CLI message schema has no `system` role,
848
+ // so — exactly like projectContext — the instruction rides as a
849
+ // leading note on this turn's user message. The model MAY ignore it;
850
+ // the executor-side hard deny (see runOneTurn) is the real safety net,
851
+ // so correctness never depends on the model honoring this text.
852
+ const planPrefixed = session.planMode ? PLAN_MODE_INSTRUCTION + "\n\n" + toSend : toSend;
853
+ messages.push({ role: "user", content: planPrefixed });
524
854
  // Fire the interaction-plan classifier in parallel with the first
525
855
  // model turn. The plan is shared across web/CLI/IDE — if it wins
526
856
  // the race we print the amber headline above the streaming text
@@ -536,17 +866,22 @@ export async function runRepl(opts) {
536
866
  });
537
867
  let planPrinted = false;
538
868
  void planPromise.then((p) => {
539
- if (p && !planPrinted) {
869
+ // Headline is interactive chrome — suppress it in headless mode so
870
+ // stdout stays clean for piping / JSON.
871
+ if (p && !planPrinted && !opts.headless) {
540
872
  planPrinted = true;
541
873
  process.stdout.write("\n" + ui.planHeadline(p.headline) + "\n");
542
874
  }
543
875
  });
544
876
  // Inner loop: run turn, execute tool calls, repeat until end_turn.
545
877
  // We also accumulate the files the model touched (Write/Edit args)
546
- // so the verifier pass can scope itself to just this turn's edits.
878
+ // so the verifier pass can scope itself to just this turn's edits,
879
+ // and the names of every tool the model invoked (for headless JSON).
547
880
  let turnGuard = 0;
548
881
  const touchedFiles = new Set();
882
+ const toolsUsed = [];
549
883
  let lastAssistantText = "";
884
+ let turnErrored = false;
550
885
  while (turnGuard < 20) {
551
886
  turnGuard += 1;
552
887
  const res = await runOneTurn({
@@ -560,10 +895,23 @@ export async function runRepl(opts) {
560
895
  // same pin context, no need to re-announce.
561
896
  pinnedSpecs: turnGuard === 1 ? activePins : undefined,
562
897
  profile: session.profile.slug,
898
+ projectContext: projectContext || undefined,
563
899
  touchedFilesSink: touchedFiles,
900
+ toolsUsedSink: toolsUsed,
901
+ // Plan mode: hard-deny mutating tools at the executor (even with
902
+ // --yes) and send the model only the read-only tool subset.
903
+ planMode: session.planMode,
904
+ tools: session.planMode ? READONLY_TOOL_SCHEMAS : TOOL_SCHEMAS,
905
+ // Render / headless control the streaming sink: when render-mode
906
+ // is on (or headless), buffer the text instead of echoing raw
907
+ // deltas — the answer is emitted once at end (rendered or JSON).
908
+ bufferText: session.renderMode || !!opts.headless,
909
+ headless: !!opts.headless,
564
910
  });
565
911
  if (res.kind === "error") {
566
- process.stdout.write(ui.error(res.message) + "\n\n");
912
+ if (!opts.headless)
913
+ process.stdout.write(ui.error(res.message) + "\n\n");
914
+ turnErrored = true;
567
915
  break;
568
916
  }
569
917
  if (res.kind === "end_turn") {
@@ -572,11 +920,22 @@ export async function runRepl(opts) {
572
920
  }
573
921
  // tool_use — keep looping
574
922
  }
923
+ // Render the assistant's markdown once the turn settles, when render
924
+ // mode is on and we're an interactive TTY. We buffered the raw deltas
925
+ // (bufferText above), so this is the ONLY place the answer prints —
926
+ // double-printing is structurally impossible. Skipped in headless
927
+ // (JSON/text payload handles output) and when there's no text.
928
+ if (session.renderMode && !opts.headless && lastAssistantText) {
929
+ const useColor = !!process.stdout.isTTY && !process.env.NO_COLOR;
930
+ const rendered = useColor ? renderMarkdown(lastAssistantText) : lastAssistantText;
931
+ process.stdout.write("\n" + rendered.replace(/\n+$/, "") + "\n\n");
932
+ }
575
933
  // ── Verifier pass ─────────────────────────────────────────────
576
934
  // After the turn settles, run the active profile's verifier kernels
577
935
  // against the assistant output + the files it touched. Blocking
578
936
  // issues get fed back into the NEXT user message so the model can
579
937
  // self-correct. Warnings + info surface inline as a chip.
938
+ let verifierPayload = null;
580
939
  if (session.profile.verifiers && session.profile.verifiers.length > 0) {
581
940
  const issues = await runVerifiers(session.profile.verifiers, {
582
941
  cwd: session.cwd,
@@ -585,17 +944,21 @@ export async function runRepl(opts) {
585
944
  profile: session.profile.slug,
586
945
  });
587
946
  const sum = summarizeIssues(issues);
947
+ verifierPayload = { ok: sum.ok, summary: sum.summary, details: sum.details };
588
948
  if (issues.length === 0) {
589
- process.stdout.write(ui.info(`✓ verifiers (${session.profile.verifiers.join(", ")}) green\n\n`));
949
+ if (!opts.headless)
950
+ process.stdout.write(ui.info(`✓ verifiers (${session.profile.verifiers.join(", ")}) green\n\n`));
590
951
  }
591
952
  else {
592
- const head = sum.ok
593
- ? ui.info(`verifiers · ${sum.summary}\n`)
594
- : ui.error(`verifiers · ${sum.summary}\n`);
595
- process.stdout.write("\n" + head);
596
- for (const line of sum.details)
597
- process.stdout.write(ui.info(line) + "\n");
598
- process.stdout.write("\n");
953
+ if (!opts.headless) {
954
+ const head = sum.ok
955
+ ? ui.info(`verifiers · ${sum.summary}\n`)
956
+ : ui.error(`verifiers · ${sum.summary}\n`);
957
+ process.stdout.write("\n" + head);
958
+ for (const line of sum.details)
959
+ process.stdout.write(ui.info(line) + "\n");
960
+ process.stdout.write("\n");
961
+ }
599
962
  // Stage blocking issues for the next turn — model self-corrects
600
963
  // on the user's next prompt.
601
964
  session.pendingVerifierBlock = formatForNextTurn(issues);
@@ -603,12 +966,28 @@ export async function runRepl(opts) {
603
966
  }
604
967
  // After the turn settles, surface suggested actions if the plan
605
968
  // came back. They render as numbered chips; on the next prompt
606
- // the user can type "1" / "2" / "3" to fire one.
969
+ // the user can type "1" / "2" / "3" to fire one. Suppressed in
970
+ // headless mode (chips are interactive chrome).
607
971
  const plan = await planPromise.catch(() => null);
608
- if (plan && plan.suggested_actions.length > 0) {
972
+ if (!opts.headless && plan && plan.suggested_actions.length > 0) {
609
973
  renderSuggestedActions(plan);
610
974
  pendingActions = plan.suggested_actions.slice(0, 4);
611
975
  }
976
+ // Persist the conversation after every turn so a later --continue
977
+ // resumes from here. No-op in headless mode.
978
+ persistSession();
979
+ // Headless mode: emit the single payload and exit. The wire format
980
+ // carries no usage/cost frame, so `cost` is null (never fabricated).
981
+ if (opts.headless) {
982
+ emitHeadlessPayload({
983
+ outputFormat: opts.outputFormat ?? "text",
984
+ answer: lastAssistantText,
985
+ toolsUsed,
986
+ verifier: verifierPayload,
987
+ sessionId: session.sessionId,
988
+ });
989
+ return turnErrored ? 1 : 0;
990
+ }
612
991
  if (opts.oneShot)
613
992
  break;
614
993
  }
@@ -623,49 +1002,65 @@ async function runOneTurn(args) {
623
1002
  const toolCalls = [];
624
1003
  let stopReason = null;
625
1004
  let firstDelta = true;
1005
+ const headless = args.headless === true;
1006
+ const bufferText = args.bufferText === true;
626
1007
  // Show the pin header BEFORE thinking spinner so the user knows
627
- // immediately that their /pin took effect.
628
- if (args.pinnedSpecs && args.pinnedSpecs.length > 0) {
1008
+ // immediately that their /pin took effect. (Suppressed in headless.)
1009
+ if (!headless && args.pinnedSpecs && args.pinnedSpecs.length > 0) {
629
1010
  process.stdout.write(announcePin(args.pinnedSpecs) + "\n");
630
1011
  }
631
1012
  // "thinking…" spinner — fires immediately, clears the moment the
632
- // first text delta lands. Removes the awkward silent gap between
633
- // prompt submission and first token.
1013
+ // first text delta lands. Spinner writes to stderr; we still skip it in
1014
+ // headless so a `2>&1` redirect can't contaminate parseable output.
634
1015
  const spinner = new Spinner("thinking…");
635
- spinner.start();
1016
+ if (!headless)
1017
+ spinner.start();
636
1018
  await streamChat({
637
1019
  apiUrl: args.apiUrl,
638
1020
  apiKey: args.apiKey,
639
1021
  messages: args.messages,
640
- tools: TOOL_SCHEMAS,
1022
+ tools: args.tools ?? TOOL_SCHEMAS,
641
1023
  profile: args.profile,
1024
+ projectContext: args.projectContext,
642
1025
  }, {
643
1026
  onTextDelta: (d) => {
644
1027
  if (firstDelta) {
645
- spinner.stop();
646
- process.stdout.write("\n");
1028
+ if (!headless) {
1029
+ spinner.stop();
1030
+ if (!bufferText)
1031
+ process.stdout.write("\n");
1032
+ }
647
1033
  firstDelta = false;
648
1034
  }
649
1035
  assistantText += d;
650
- process.stdout.write(d);
1036
+ // Buffer-only when render mode / headless is on, so the answer is
1037
+ // emitted once at the end (rendered or as JSON). Otherwise stream
1038
+ // raw deltas live.
1039
+ if (!bufferText)
1040
+ process.stdout.write(d);
651
1041
  },
652
1042
  onToolCall: (call) => {
653
1043
  toolCalls.push(call);
654
1044
  // Update the spinner label so the user sees what's queued.
655
- if (firstDelta)
1045
+ if (firstDelta && !headless)
656
1046
  spinner.setLabel(`${call.name}…`);
657
1047
  },
658
1048
  onTurnEnd: (reason) => { stopReason = reason; },
659
1049
  onError: (msg) => {
660
1050
  stopReason = "error";
661
- spinner.stop();
662
- process.stdout.write("\n" + announceError(msg) + "\n");
1051
+ if (!headless) {
1052
+ spinner.stop();
1053
+ process.stdout.write("\n" + announceError(msg) + "\n");
1054
+ }
663
1055
  },
664
1056
  });
665
1057
  // Always stop the spinner in case neither delta nor error fired
666
1058
  // (e.g. immediate turn_end with no content — empty model response).
667
- spinner.stop();
668
- if (assistantText)
1059
+ if (!headless)
1060
+ spinner.stop();
1061
+ // When streaming raw, close the answer block with spacing. When
1062
+ // buffering, the caller owns final spacing (render / JSON).
1063
+ if (assistantText && !bufferText && !headless)
669
1064
  process.stdout.write("\n\n");
670
1065
  args.messages.push({ role: "assistant", content: assistantText, tool_calls: toolCalls });
671
1066
  if (stopReason === "error")
@@ -683,6 +1078,24 @@ async function runOneTurn(args) {
683
1078
  });
684
1079
  continue;
685
1080
  }
1081
+ // ── PLAN-MODE HARD DENY ───────────────────────────────────────
1082
+ // This runs BEFORE the confirm()/yolo check, so it is the GUARANTEE
1083
+ // (not the schema filter) that no mutating tool can execute in plan
1084
+ // mode — even under --yes / yolo. Stoa (real SaaS side effects) and
1085
+ // Bash (can write via the shell) are denied alongside Write/Edit. The
1086
+ // deny is fed back as a tool result so the model can pivot to a plan.
1087
+ if (args.planMode && MUTATING_TOOLS.has(call.name)) {
1088
+ if (!headless) {
1089
+ process.stdout.write(announceTool(call.name, tool.describe(call.args)) + "\n");
1090
+ process.stdout.write(announceWarn(`[plan mode] ${call.name} is disabled — read-only until you /plan or 'approve'`) + "\n\n");
1091
+ }
1092
+ args.messages.push({
1093
+ role: "tool",
1094
+ tool_call_id: call.id,
1095
+ content: `[plan-mode] ${call.name} is disabled in plan mode. Investigate with read-only tools (Read/Glob/Grep/LS) and propose a numbered plan; do not modify files or run commands.`,
1096
+ });
1097
+ continue;
1098
+ }
686
1099
  // Record Write/Edit paths so the post-turn verifier pass can
687
1100
  // scope itself to just the files this turn touched.
688
1101
  if (args.touchedFilesSink && (call.name === "Write" || call.name === "Edit")) {
@@ -694,8 +1107,9 @@ async function runOneTurn(args) {
694
1107
  }
695
1108
  // Tool announcement — bullet style matches a list of actions
696
1109
  // rather than CLI chrome. Single line, brand-amber name + dim
697
- // detail.
698
- process.stdout.write(announceTool(call.name, tool.describe(call.args)) + "\n");
1110
+ // detail. (Suppressed in headless so stdout stays parseable.)
1111
+ if (!headless)
1112
+ process.stdout.write(announceTool(call.name, tool.describe(call.args)) + "\n");
699
1113
  if (!args.ctx.yolo && tool.confirmPolicy !== "never") {
700
1114
  const ok = await confirm(` Allow ${call.name}?`, args.rl);
701
1115
  if (!ok) {
@@ -704,37 +1118,74 @@ async function runOneTurn(args) {
704
1118
  tool_call_id: call.id,
705
1119
  content: `[user denied] User declined to run ${call.name}.`,
706
1120
  });
707
- process.stdout.write(announceWarn("denied") + "\n\n");
1121
+ if (!headless)
1122
+ process.stdout.write(announceWarn("denied") + "\n\n");
708
1123
  continue;
709
1124
  }
710
1125
  }
711
1126
  // Spinner during tool execution — Bash/Read on a large file can
712
1127
  // take seconds. Without this the user stares at silence.
713
1128
  const toolSpin = new Spinner(`running ${call.name}…`);
714
- toolSpin.start();
1129
+ if (!headless)
1130
+ toolSpin.start();
715
1131
  let result;
716
1132
  try {
717
1133
  result = await tool.execute(call.args, args.ctx);
718
- toolSpin.stop();
1134
+ if (!headless)
1135
+ toolSpin.stop();
1136
+ // Record only tools that actually ran (not denied / plan-blocked).
1137
+ if (args.toolsUsedSink)
1138
+ args.toolsUsedSink.push(call.name);
719
1139
  }
720
1140
  catch (err) {
721
- toolSpin.stop();
1141
+ if (!headless)
1142
+ toolSpin.stop();
722
1143
  // Hardened: malformed args / tool throws / fs errors all turn
723
1144
  // into a structured tool-result string the model can RECOVER
724
1145
  // from instead of crashing the REPL. The error gets fed back in
725
1146
  // the conversation so the model can fix its call and try again.
726
1147
  const errMsg = err instanceof Error ? err.message : String(err);
727
1148
  result = `[error] ${call.name} failed: ${errMsg}\n\nThe tool call was rejected. Common causes: missing required args, invalid path, file too large, command refused. You can retry with corrected args.`;
728
- process.stdout.write(announceError(`${call.name} failed: ${errMsg}`) + "\n");
1149
+ if (!headless)
1150
+ process.stdout.write(announceError(`${call.name} failed: ${errMsg}`) + "\n");
729
1151
  }
730
1152
  // Show more of each tool's output in the local CLI preview. The
731
1153
  // model always sees the full output server-side; the truncation
732
1154
  // only affects what the user sees in their terminal.
733
- process.stdout.write(ui.info(truncatePreview(result, 4000)) + "\n\n");
1155
+ if (!headless)
1156
+ process.stdout.write(ui.info(truncatePreview(result, 4000)) + "\n\n");
734
1157
  args.messages.push({ role: "tool", tool_call_id: call.id, content: result });
735
1158
  }
736
1159
  return { kind: "tool_use" };
737
1160
  }
1161
+ /** Plan-mode instruction. Rides as a leading user note (the CLI message
1162
+ * schema has no `system` role) — same mechanism as projectContext. The
1163
+ * executor-side hard deny is the real guarantee; this just biases the
1164
+ * model toward investigate-and-plan behavior. */
1165
+ const PLAN_MODE_INSTRUCTION = "You are in PLAN MODE. Investigate the task using ONLY read-only tools " +
1166
+ "(Read, Glob, Grep, LS). Do NOT modify files or run shell commands — " +
1167
+ "Write, Edit, Bash, and Stoa are disabled and any attempt is rejected. " +
1168
+ "Produce a numbered, ordered plan of the changes you WOULD make, then STOP " +
1169
+ "and wait for the user to approve before executing.";
1170
+ /** Emit the single headless payload. For json this is ONE JSON object on
1171
+ * stdout (no other stdout writes happen in headless mode, so it parses
1172
+ * cleanly). For text it's just the answer. `cost` is null because the
1173
+ * NDJSON wire format carries no usage/cost frame — we never fabricate it. */
1174
+ function emitHeadlessPayload(p) {
1175
+ if (p.outputFormat === "json") {
1176
+ const obj = {
1177
+ answer: p.answer,
1178
+ tools_used: p.toolsUsed,
1179
+ verifier: p.verifier,
1180
+ cost: null,
1181
+ session_id: p.sessionId,
1182
+ };
1183
+ process.stdout.write(JSON.stringify(obj) + "\n");
1184
+ }
1185
+ else {
1186
+ process.stdout.write(p.answer.replace(/\n+$/, "") + "\n");
1187
+ }
1188
+ }
738
1189
  async function confirm(question, rl) {
739
1190
  // CRITICAL: reuse the OUTER REPL's readline.Interface. Creating a
740
1191
  // new Interface + closing it would close stdin under the outer rl
@@ -814,8 +1265,4 @@ function expandActionToPrompt(a) {
814
1265
  function chalkBoldThronWord() {
815
1266
  return chalk.bold.hex("#FFAE00")("Theron");
816
1267
  }
817
- // Expose renderMarkdown so `theron --markdown` mode can pretty-print
818
- // after the stream finishes if someone wants that flow. Currently the
819
- // REPL streams raw deltas to keep latency snappy.
820
- export { renderMarkdown };
821
1268
  //# sourceMappingURL=repl.js.map