@kenkaiiii/ggcoder 5.4.3 → 5.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,8 +20,9 @@ import { parseArgs } from "node:util";
20
20
  import { formatError } from "@kenkaiiii/gg-ai";
21
21
  import { runJsonMode } from "./modes/json-mode.js";
22
22
  import { AgentSession } from "./core/agent-session.js";
23
- import { buildKenSystemPrompt } from "./core/ken-prompt.js";
24
- import { buildKenDigest } from "./core/ken-context.js";
23
+ import { buildKenSystemPrompt, buildKenAutopilotSystemPrompt } from "./core/ken-prompt.js";
24
+ import { buildKenDigest, buildKenAutopilotContext } from "./core/ken-context.js";
25
+ import { parseAutopilotVerdict } from "./core/autopilot-verdict.js";
25
26
  import { collectProjectContext } from "./system-prompt.js";
26
27
  import { AuthStorage } from "./core/auth-storage.js";
27
28
  import { MOONSHOT_OAUTH_KEY, XIAOMI_CREDITS_KEY } from "@kenkaiiii/gg-core";
@@ -39,7 +40,7 @@ import { getNextThinkingLevel, getSupportedThinkingLevels, isThinkingLevelSuppor
39
40
  import { PROMPT_COMMANDS } from "./core/prompt-commands.js";
40
41
  import { loadCustomCommands } from "./core/custom-commands.js";
41
42
  import { discoverProjects, listRecentSessions } from "./core/project-discovery.js";
42
- import { loadTasksSync, saveTasksSync, getNextPendingTask, markTaskInProgress, } from "./core/tasks-store.js";
43
+ import { loadTasksSync, saveTasksSync, pruneDoneTasksSync, getNextPendingTask, markTaskInProgress, } from "./core/tasks-store.js";
43
44
  import { initLogger, log } from "./core/logger.js";
44
45
  import { RADIO_STATIONS, getCurrentStation, playRadio, stopRadio } from "./core/radio.js";
45
46
  import { enrichProcessPath } from "./core/shell-path.js";
@@ -80,6 +81,7 @@ async function loadAppSettings() {
80
81
  // Preserve the per-project map verbatim (validated + written by the
81
82
  // model/thinking handlers below).
82
83
  projectModels: raw.projectModels && typeof raw.projectModels === "object" ? raw.projectModels : undefined,
84
+ autopilot: raw.autopilot && typeof raw.autopilot === "object" ? raw.autopilot : undefined,
83
85
  };
84
86
  }
85
87
  catch {
@@ -103,6 +105,19 @@ async function saveProjectModelPrefs(cwd, prefs) {
103
105
  s.projectModels = { ...(s.projectModels ?? {}), [key]: prefs };
104
106
  await saveAppSettings(s);
105
107
  }
108
+ /** Read this project's persisted autopilot flag (default off). */
109
+ async function loadAutopilot(cwd) {
110
+ const s = await loadAppSettings();
111
+ return s.autopilot?.[projectModelKey(cwd)] ?? false;
112
+ }
113
+ /** Persist this project's autopilot flag via read-modify-write so the rest of
114
+ * the settings file (projectsRoot, model map, other projects) is preserved. */
115
+ async function saveAutopilot(cwd, enabled) {
116
+ const s = await loadAppSettings();
117
+ const key = projectModelKey(cwd);
118
+ s.autopilot = { ...(s.autopilot ?? {}), [key]: enabled };
119
+ await saveAppSettings(s);
120
+ }
106
121
  /**
107
122
  * Persist the active model selection to ~/.gg/settings.json so it survives app
108
123
  * restarts. Mirrors the CLI's handleModelSelect persistence (App.tsx).
@@ -363,12 +378,23 @@ async function runJsonModeIfRequested() {
363
378
  model: { type: "string" },
364
379
  "max-turns": { type: "string" },
365
380
  "system-prompt": { type: "string" },
381
+ tools: { type: "string" },
366
382
  "prompt-cache-key": { type: "string" },
367
383
  },
368
384
  allowPositionals: true,
369
385
  strict: true,
370
386
  });
371
387
  const maxTurnsRaw = values["max-turns"];
388
+ // Optional tool allow-list forwarded by the subagent spawner from an agent
389
+ // definition's `tools:` frontmatter. Mirrors the identical parsing in
390
+ // cli.ts's `values.json` branch — keep both in sync (see subagent.ts).
391
+ const parsedTools = values.tools
392
+ ? values.tools
393
+ .split(",")
394
+ .map((t) => t.trim())
395
+ .filter(Boolean)
396
+ : [];
397
+ const allowedTools = parsedTools.length > 0 ? parsedTools : undefined;
372
398
  await runJsonMode({
373
399
  message: positionals[0] ?? "",
374
400
  provider: (values.provider ?? "anthropic"),
@@ -376,6 +402,7 @@ async function runJsonModeIfRequested() {
376
402
  cwd: process.cwd(),
377
403
  systemPrompt: values["system-prompt"],
378
404
  maxTurns: maxTurnsRaw ? parseInt(maxTurnsRaw, 10) : undefined,
405
+ allowedTools,
379
406
  promptCacheKey: values["prompt-cache-key"],
380
407
  }).catch((err) => {
381
408
  process.stderr.write((err instanceof Error ? err.message : String(err)) + "\n");
@@ -759,6 +786,24 @@ async function createSession(deps, opts) {
759
786
  session.eventBus.on("compaction_end", (d) => broadcast("compaction_end", d));
760
787
  let running = false;
761
788
  let titleGenerated = false;
789
+ // Autopilot (auto-review) toggle for THIS window's project. Loaded from
790
+ // gg-app.json on boot; flipped via POST /autopilot. When on, POST /prompt runs
791
+ // runAutopilotCycle after the user's turn settles — Ken auto-reviews the work
792
+ // and drives the review→prompt→review loop.
793
+ let autopilot = await loadAutopilot(cwd);
794
+ // True while an autopilot review is in flight (used to defer kenAuto model
795
+ // switches, like kenRunning does for chat Ken, and to drive the spinner).
796
+ let autopilotReviewing = false;
797
+ // True for the WHOLE autopilot cycle (reviews + injected runs). The build
798
+ // `running` flag is false during the review windows between injected runs, so
799
+ // this is the extra guard that makes a user /prompt queue as steering instead
800
+ // of starting a run that would collide with an injected one on the same
801
+ // session (AgentSession.prompt has no concurrency guard).
802
+ let autopilotActive = false;
803
+ // Set by /cancel to break out of an in-flight autopilot cycle between steps.
804
+ let autopilotCancelled = false;
805
+ // Hard cap on review→prompt→review rounds per user turn (loop safety).
806
+ const MAX_AUTOPILOT_ROUNDS = 3;
762
807
  // ── Telegram serve (remote control via Telegram) ───────────
763
808
  // A single embedded serve session lives in this sidecar process. Only the main
764
809
  // window's home screen exposes the controls, so there's one bot per app.
@@ -827,6 +872,53 @@ async function createSession(deps, opts) {
827
872
  log("INFO", "app-sidecar", "ken session ready", { provider: st.provider, model: st.model });
828
873
  return ken;
829
874
  }
875
+ // ── Autopilot Ken (auto-reviewer) ──────────────────────────
876
+ // A THIRD read-only AgentSession, separate from chat Ken. In autopilot mode
877
+ // Ken silently reviews each finished GG Coder turn and returns a verdict
878
+ // (PROMPT / ALL_CLEAR / HUMAN). Its bus is intentionally NOT bridged to the
879
+ // ken_* chat bubbles — the review is silent; we read its final assistant text
880
+ // and parse it. Uses the lean autopilot system prompt + the same read-only
881
+ // tools. Created lazily on the first autopilot cycle.
882
+ let kenAutoSession = null;
883
+ let kenAutoAbort = new AbortController();
884
+ let pendingKenAutoModel = null;
885
+ async function syncKenAutoModel(provider, model) {
886
+ if (autopilotReviewing) {
887
+ pendingKenAutoModel = { provider, model };
888
+ return;
889
+ }
890
+ if (!kenAutoSession)
891
+ return;
892
+ const st = kenAutoSession.getState();
893
+ if (st.provider === provider && st.model === model)
894
+ return;
895
+ await kenAutoSession.switchModel(provider, model);
896
+ log("INFO", "app-sidecar", "ken autopilot session model synced", { provider, model });
897
+ }
898
+ async function ensureKenAutoSession() {
899
+ if (kenAutoSession)
900
+ return kenAutoSession;
901
+ const st = session.getState();
902
+ const ken = new AgentSession({
903
+ provider: st.provider,
904
+ model: st.model,
905
+ cwd,
906
+ systemPrompt: buildKenAutopilotSystemPrompt(),
907
+ allowedTools: KEN_ALLOWED_TOOLS,
908
+ allowedMcpServers: KEN_ALLOWED_MCP_SERVERS,
909
+ transient: true,
910
+ signal: kenAutoAbort.signal,
911
+ });
912
+ await ken.initialize();
913
+ // Deliberately no bus bridge: the review is silent. Errors surface via the
914
+ // runAutopilotReview try/catch as autopilot_error frames.
915
+ kenAutoSession = ken;
916
+ log("INFO", "app-sidecar", "ken autopilot session ready", {
917
+ provider: st.provider,
918
+ model: st.model,
919
+ });
920
+ return ken;
921
+ }
830
922
  // Resumed session: if it already has a conversation, generate its title now so
831
923
  // the title bar shows it immediately on load (not just after the next prompt).
832
924
  {
@@ -861,6 +953,14 @@ async function createSession(deps, opts) {
861
953
  gitBranch = await getGitBranch(cwd).catch(() => gitBranch);
862
954
  gitIsRepo = await isGitRepo(cwd).catch(() => gitIsRepo);
863
955
  broadcast("run_end", {});
956
+ // Autopilot's review loop is driven explicitly from POST /prompt (see
957
+ // runAutopilotCycle), NOT from this shared finally — that keeps the
958
+ // injected GG Coder runs this cycle triggers from recursively re-entering
959
+ // the loop through the same bracket.
960
+ // The agent may have marked project tasks done during the run — prune the
961
+ // completed ones so they drop out of the Tasks modal automatically (users
962
+ // never have to delete finished tasks by hand).
963
+ broadcast("tasks_list", { tasks: pruneDoneTasksSync(cwd) });
864
964
  // Queue drains into the run as steering, so it's empty by run_end —
865
965
  // sync the webview indicator.
866
966
  broadcast("queued", { count: session.getQueuedCount() });
@@ -876,6 +976,79 @@ async function createSession(deps, opts) {
876
976
  }
877
977
  }
878
978
  }
979
+ // ── Autopilot orchestration ─────────────────────────────────
980
+ // One review = prompt the kenAuto session with the review digest, read its
981
+ // final assistant text, parse a verdict. Returns null on failure (surfaced as
982
+ // an autopilot_error frame) so the cycle stops rather than looping blind.
983
+ async function runAutopilotReview() {
984
+ autopilotReviewing = true;
985
+ broadcast("autopilot_review_start", {});
986
+ try {
987
+ const ken = await ensureKenAutoSession();
988
+ const projectContext = await collectProjectContext(cwd).catch(() => []);
989
+ const digest = buildKenAutopilotContext({
990
+ projectContext,
991
+ cwd,
992
+ gitBranch,
993
+ messages: session.getMessages(),
994
+ });
995
+ await ken.prompt(digest);
996
+ return parseAutopilotVerdict(lastAssistantText(ken.getMessages()));
997
+ }
998
+ catch (err) {
999
+ broadcastError("autopilot_error", "autopilot review failed", err);
1000
+ return null;
1001
+ }
1002
+ finally {
1003
+ autopilotReviewing = false;
1004
+ // Apply any model switch that landed mid-review.
1005
+ const pending = pendingKenAutoModel;
1006
+ pendingKenAutoModel = null;
1007
+ if (pending)
1008
+ await syncKenAutoModel(pending.provider, pending.model);
1009
+ }
1010
+ }
1011
+ // Drive the review→prompt→review loop for one finished user turn. Only ever
1012
+ // called from POST /prompt after the user's own run resolves — never from the
1013
+ // task runner, resume, /ken, or error paths, so there's no recursion and no
1014
+ // guard tangle. Bounded by MAX_AUTOPILOT_ROUNDS and cancellable between steps.
1015
+ async function runAutopilotCycle() {
1016
+ if (!autopilot || autopilotCancelled)
1017
+ return;
1018
+ autopilotActive = true;
1019
+ try {
1020
+ // Lean context per user turn: wipe prior review history so each new turn
1021
+ // starts cheap, while within this cycle the few review messages persist so
1022
+ // Ken remembers what he already asked GG Coder to fix.
1023
+ await kenAutoSession?.newSession().catch(() => { });
1024
+ for (let round = 1; round <= MAX_AUTOPILOT_ROUNDS; round++) {
1025
+ if (autopilotCancelled)
1026
+ return;
1027
+ const verdict = await runAutopilotReview();
1028
+ if (!verdict || autopilotCancelled)
1029
+ return;
1030
+ if (verdict.kind === "all_clear") {
1031
+ broadcast("autopilot_done", {});
1032
+ return;
1033
+ }
1034
+ if (verdict.kind === "human") {
1035
+ broadcast("autopilot_human", { reason: verdict.reason });
1036
+ return;
1037
+ }
1038
+ // prompt → show a compact Ken-tinted marker (not the prompt body), then
1039
+ // feed GG Coder. Bracketed by runAgent so the run streams normally; the
1040
+ // shared finally no longer re-triggers autopilot, so this can't recurse.
1041
+ broadcast("autopilot_prompted", { round, body: verdict.body });
1042
+ await runAgent(verdict.body, () => session.prompt(verdict.body));
1043
+ if (autopilotCancelled)
1044
+ return;
1045
+ }
1046
+ broadcast("autopilot_capped", { rounds: MAX_AUTOPILOT_ROUNDS });
1047
+ }
1048
+ finally {
1049
+ autopilotActive = false;
1050
+ }
1051
+ }
879
1052
  // ── Task runner (project task list → sessions) ──────────────
880
1053
  // Mirrors the CLI's task flow: each task runs in its OWN fresh session, with a
881
1054
  // completion hint instructing the agent to mark the task done via the tasks
@@ -897,8 +1070,8 @@ async function createSession(deps, opts) {
897
1070
  `tasks({ action: "done", id: "${shortId}" })`;
898
1071
  await runAgent(task.title, () => session.prompt(task.prompt + completionHint));
899
1072
  // The agent typically marks the task done via the tasks tool during the run;
900
- // push the refreshed list so the webview's task modal reflects it.
901
- broadcast("tasks_list", { tasks: loadTasksSync(cwd) });
1073
+ // prune completed tasks and push the refreshed list so the modal drops them.
1074
+ broadcast("tasks_list", { tasks: pruneDoneTasksSync(cwd) });
902
1075
  return true;
903
1076
  }
904
1077
  async function runTasks(startId, all) {
@@ -993,6 +1166,7 @@ async function createSession(deps, opts) {
993
1166
  thinkingLevel: session.getThinkingLevel() ?? null,
994
1167
  supportedThinkingLevels: getSupportedThinkingLevels(st.provider, st.model),
995
1168
  supportsVideo: getModel(st.model)?.supportsVideo ?? false,
1169
+ autopilot,
996
1170
  ...footerExtras(),
997
1171
  });
998
1172
  return;
@@ -1016,6 +1190,7 @@ async function createSession(deps, opts) {
1016
1190
  thinkingLevel: session.getThinkingLevel() ?? null,
1017
1191
  supportedThinkingLevels: getSupportedThinkingLevels(st.provider, st.model),
1018
1192
  supportsVideo: getModel(st.model)?.supportsVideo ?? false,
1193
+ autopilot,
1019
1194
  ...footerExtras(),
1020
1195
  },
1021
1196
  })}\n\n`);
@@ -1339,10 +1514,13 @@ async function createSession(deps, opts) {
1339
1514
  json(res, 400, { error: "empty prompt" });
1340
1515
  return;
1341
1516
  }
1342
- if (running) {
1343
- // Queue prompts as mid-run steering (mirrors the CLI). Attachments are
1344
- // persisted to .gg/uploads first so the queued media rides the same
1345
- // native-block path as a non-queued attachment prompt when it drains.
1517
+ if (running || autopilotActive) {
1518
+ // Queue prompts as mid-run steering (mirrors the CLI). Also queue while
1519
+ // an autopilot cycle is active but between injected runs (build idle,
1520
+ // Ken reviewing) so the message never starts a run that collides with
1521
+ // an injected one on the same session. Attachments are persisted to
1522
+ // .gg/uploads first so the queued media rides the same native-block
1523
+ // path as a non-queued attachment prompt when it drains.
1346
1524
  const prepared = attachments.length > 0 ? await prepareAttachments(cwd, attachments) : [];
1347
1525
  const count = session.queueMessage(text, prepared);
1348
1526
  broadcast("queued", { count });
@@ -1350,6 +1528,9 @@ async function createSession(deps, opts) {
1350
1528
  return;
1351
1529
  }
1352
1530
  json(res, 202, { accepted: true });
1531
+ // Fresh user turn: clear any cancel flag left from a prior cycle so this
1532
+ // turn's autopilot review can run.
1533
+ autopilotCancelled = false;
1353
1534
  await runAgent(text, async () => {
1354
1535
  if (attachments.length > 0) {
1355
1536
  // Persist each attachment under .gg/uploads so files are inspectable
@@ -1365,6 +1546,11 @@ async function createSession(deps, opts) {
1365
1546
  await session.prompt(text);
1366
1547
  }
1367
1548
  });
1549
+ // After the user's run settles, kick off Ken's auto-review loop. This is
1550
+ // the ONLY entry point into the cycle — it drives any follow-up GG Coder
1551
+ // runs itself, so the shared runAgent finally never recurses.
1552
+ if (autopilot && !autopilotCancelled)
1553
+ await runAutopilotCycle();
1368
1554
  });
1369
1555
  return;
1370
1556
  }
@@ -1427,6 +1613,24 @@ async function createSession(deps, opts) {
1427
1613
  json(res, 200, { cancelled: true });
1428
1614
  return;
1429
1615
  }
1616
+ if (method === "POST" && url === "/autopilot") {
1617
+ void readBody(req).then(async (raw) => {
1618
+ let enabled;
1619
+ try {
1620
+ enabled = Boolean(JSON.parse(raw).enabled);
1621
+ }
1622
+ catch {
1623
+ json(res, 400, { error: "invalid JSON body" });
1624
+ return;
1625
+ }
1626
+ autopilot = enabled;
1627
+ await saveAutopilot(cwd, enabled);
1628
+ log("INFO", "app-sidecar", "autopilot toggled", { enabled: String(enabled) });
1629
+ broadcast("autopilot", { autopilot: enabled });
1630
+ json(res, 200, { autopilot: enabled });
1631
+ });
1632
+ return;
1633
+ }
1430
1634
  if (method === "POST" && url === "/enhance") {
1431
1635
  void readBody(req).then(async (raw) => {
1432
1636
  let text;
@@ -1456,7 +1660,7 @@ async function createSession(deps, opts) {
1456
1660
  return;
1457
1661
  }
1458
1662
  if (method === "GET" && url === "/tasks") {
1459
- json(res, 200, { tasks: loadTasksSync(cwd) });
1663
+ json(res, 200, { tasks: pruneDoneTasksSync(cwd) });
1460
1664
  return;
1461
1665
  }
1462
1666
  // ── Radio (app-wide) ──────────────────────────────────────
@@ -1576,6 +1780,7 @@ async function createSession(deps, opts) {
1576
1780
  }
1577
1781
  await session.switchModel(target.provider, target.id);
1578
1782
  await syncKenModel(target.provider, target.id);
1783
+ await syncKenAutoModel(target.provider, target.id);
1579
1784
  // Clamp the reasoning level to what the new model supports (mirrors the
1580
1785
  // CLI): keep thinking on at the first supported tier if it was on but
1581
1786
  // the prior level is unsupported here; leave it off if it was off.
@@ -1657,6 +1862,13 @@ async function createSession(deps, opts) {
1657
1862
  running = false;
1658
1863
  // Stop a run-all sweep so the next pending task isn't auto-started.
1659
1864
  taskRunAll = false;
1865
+ // Stop any in-flight autopilot cycle: flag it so the loop bails between
1866
+ // steps, and abort a review that's mid-prompt on the kenAuto session.
1867
+ autopilotCancelled = true;
1868
+ kenAutoAbort.abort();
1869
+ kenAutoAbort = new AbortController();
1870
+ kenAutoSession?.setSignal(kenAutoAbort.signal);
1871
+ autopilotReviewing = false;
1660
1872
  // Drop any queued steering and return it so the webview can restore it to
1661
1873
  // the composer.
1662
1874
  const drained = session.drainQueue();
@@ -2143,7 +2355,9 @@ async function createSession(deps, opts) {
2143
2355
  for (const c of clients)
2144
2356
  c.res.end();
2145
2357
  kenAbort.abort();
2358
+ kenAutoAbort.abort();
2146
2359
  await kenSession?.dispose().catch(() => { });
2360
+ await kenAutoSession?.dispose().catch(() => { });
2147
2361
  await session.dispose().catch(() => { });
2148
2362
  }
2149
2363
  return {