@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
package/lib/deny.js ADDED
@@ -0,0 +1,199 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Destructive-command deny-list
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // These patterns are checked for EVERY shell tool call (exec/shell), in BOTH
8
+ // interactive and non-TTY mode, regardless of any --allow-* approval flags.
9
+ // The ONLY way to bypass this list is the explicit
10
+ // `--dangerously-skip-permissions` flag, which opts out of all safety.
11
+ //
12
+ // This is a defense-in-depth net, not a complete sandbox: shell is infinitely
13
+ // expressive and a determined model can evade pattern matching. The goal is to
14
+ // stop the obvious, high-blast-radius mistakes (wiping a disk, rm -rf /,
15
+ // piping the internet into a root shell) from slipping through auto-approve.
16
+ //
17
+ // To extend: add a `{ label, test }` entry. `test(command)` receives the raw
18
+ // command string (already whitespace-normalised for matching convenience) and
19
+ // returns true when the command should be blocked. Keep the `label` short and
20
+ // user-facing — it is shown in the refusal message and written to the audit log.
21
+ //
22
+ // Each entry is deliberately narrow so legitimate commands are not caught:
23
+ // e.g. `rm -r build/` is allowed, but `rm -rf` (recursive AND force) is not.
24
+ // ---------------------------------------------------------------------------
25
+
26
+ const DENYLIST = [
27
+ {
28
+ label: 'recursive force delete (rm -rf and variants)',
29
+ // `rm` invoked with BOTH a recursive flag (-r/-R/--recursive) AND a force
30
+ // flag (-f/--force), in either order, whether bundled (-rf, -fr) or split
31
+ // (-r -f). Plain `rm -r dir` or `rm -f file` are intentionally allowed.
32
+ test: (c) => {
33
+ if (!/(^|[;&|]|\s)rm(\s|$)/.test(c)) return false;
34
+ const hasRecursive = /\srm\s+[^;&|]*(-{1,2}[a-z]*r|--recursive)/i.test(' ' + c);
35
+ const hasForce = /\srm\s+[^;&|]*(-{1,2}[a-z]*f|--force)/i.test(' ' + c);
36
+ // Bundled short flags such as -rf / -fr satisfy both in one token.
37
+ const bundled = /\srm\s+[^;&|]*-[a-z]*r[a-z]*f|-[a-z]*f[a-z]*r/i.test(' ' + c);
38
+ return bundled || (hasRecursive && hasForce);
39
+ },
40
+ },
41
+ {
42
+ label: 'piping remote content into a shell or interpreter',
43
+ // curl/wget/fetch whose output is piped straight into a shell or scripting
44
+ // interpreter — the classic `curl … | sh` / `wget … | bash` RCE pattern.
45
+ test: (c) =>
46
+ /\b(curl|wget|fetch)\b[^\n|]*\|\s*(sudo\s+)?(sh|bash|zsh|dash|ksh|fish|python[0-9.]*|perl|ruby|node|php)\b/i.test(c),
47
+ },
48
+ {
49
+ label: 'disk-wipe / block-device write',
50
+ // dd writing to a raw disk, mkfs (format), or any redirect that overwrites
51
+ // a block device under /dev. These destroy data with no undo.
52
+ catastrophic: true,
53
+ test: (c) =>
54
+ /\bdd\b[^\n]*\bof=\/dev\/(sd|nvme|hd|mmcblk|vd|disk|rdisk)/i.test(c) ||
55
+ /\bmkfs(\.[a-z0-9]+)?\b/i.test(c) ||
56
+ /\b(shred|wipefs)\b[^\n]*\/dev\//i.test(c) ||
57
+ />\s*\/dev\/(sd|nvme|hd|mmcblk|vd|disk|rdisk)/i.test(c),
58
+ },
59
+ {
60
+ label: 'fork bomb',
61
+ // The canonical `:(){ :|:& };:` and named-function variants that recursively
62
+ // spawn until the process table is exhausted.
63
+ catastrophic: true,
64
+ test: (c) => /(^|\s|;)([a-zA-Z_:][\w:]*)\s*\(\)\s*\{[^}]*\|\s*\2[^}]*&[^}]*\}\s*;\s*\2/.test(c),
65
+ },
66
+ {
67
+ label: 'recursive chmod/chown on a system root',
68
+ // Recursively changing ownership/permissions from a filesystem root or a
69
+ // top-level system directory bricks the machine.
70
+ test: (c) =>
71
+ /\b(chmod|chown)\b[^\n]*\s-{0,2}[a-z]*R[a-z]*\s[^\n]*\s(\/|\/etc|\/usr|\/bin|\/boot|\/lib|\/sys|\/var)(\s|\/|$)/i.test(c),
72
+ },
73
+ {
74
+ label: 'destructive write to a system / out-of-tree path',
75
+ // Redirects, tee, or truncation targeting sensitive absolute paths outside
76
+ // the working tree (system config, init, secrets). Best-effort: shell is
77
+ // too expressive to catch every case, but the obvious ones are covered.
78
+ test: (c) =>
79
+ /(>>?|\btee\b[^\n]*)\s*\/(etc|boot|sys|proc|usr|bin|sbin|lib|lib64|root)\b/i.test(c) ||
80
+ /\brm\s+[^\n]*\s\/(etc|boot|sys|proc|usr|bin|sbin|lib|lib64|root)(\s|\/|$)/i.test(c) ||
81
+ // rm targeting the filesystem root itself (`rm -rf /`, `rm -rf /*`).
82
+ /\brm\s+[^\n]*\s\/(\s|\*|$)/.test(c),
83
+ },
84
+ // -------------------------------------------------------------------------
85
+ // Windows (cmd.exe / PowerShell) destructive set (Task 4.4).
86
+ // -------------------------------------------------------------------------
87
+ // Native Windows has no OS sandbox primitive, so the deny-list is the only
88
+ // shell guard there. The POSIX patterns above never match `del /s /q`,
89
+ // `format C:`, `rd /s`, or `Remove-Item -Recurse -Force`, so cover them
90
+ // explicitly. These are checked on every platform (a deny-list miss on the
91
+ // wrong OS is harmless; a hit on the wrong OS just fails to run a command
92
+ // that wouldn't have worked anyway).
93
+ {
94
+ label: 'Windows recursive delete (del /s, rd /s, rmdir /s)',
95
+ // `del /s` recurses into subdirectories; `rd /s` / `rmdir /s` delete a
96
+ // directory tree. Flags are case-insensitive and may carry /q (quiet) /f
97
+ // (force) in any order. Plain `del file.txt` / `rd emptydir` are allowed.
98
+ test: (c) =>
99
+ /\bdel\b[^\n|&]*\s\/s\b/i.test(c) ||
100
+ /\b(rd|rmdir)\b[^\n|&]*\s\/s\b/i.test(c),
101
+ },
102
+ {
103
+ label: 'PowerShell recursive force delete (Remove-Item -Recurse -Force)',
104
+ // Remove-Item (aliases ri/del/erase/rd/rmdir under PowerShell) with BOTH
105
+ // -Recurse AND -Force — the Windows analogue of `rm -rf`. Flags may be
106
+ // abbreviated (-rec, -r*) per PowerShell parameter matching.
107
+ test: (c) => {
108
+ if (!/\bRemove-Item\b/i.test(c) && !/\bri\b/i.test(c)) return false;
109
+ const hasRecurse = /-Recurse?\b|-rec\b|-r\b/i.test(c);
110
+ const hasForce = /-Force\b|-f\b/i.test(c);
111
+ return hasRecurse && hasForce;
112
+ },
113
+ },
114
+ {
115
+ label: 'Windows format / disk wipe (format, Format-Volume, Clear-Disk, cipher /w, diskpart clean)',
116
+ // Formatting a volume, clearing a disk, or securely wiping free space — the
117
+ // Windows equivalents of mkfs / dd-to-disk. Catastrophic: unrecoverable.
118
+ catastrophic: true,
119
+ test: (c) =>
120
+ /\bformat\s+[^\n|&]*?\b[a-z]:/i.test(c) || // format C: /fs:ntfs
121
+ /\bFormat-Volume\b/i.test(c) ||
122
+ /\bClear-Disk\b/i.test(c) ||
123
+ /\bcipher\s+\/w/i.test(c) || // cipher /w:C (wipe free space)
124
+ (/\bdiskpart\b/i.test(c) && /\bclean\b/i.test(c)),
125
+ },
126
+ ];
127
+
128
+ // Path-rewriting canonicalization (constraint #3). A denylist that matches the
129
+ // textual `/etc` is bypassed by `/proc/self/root/etc` or `/proc/1234/root/etc`,
130
+ // which resolve to the SAME real path but dodge the pattern. Rewrite those
131
+ // procfs-root prefixes back to `/` so the existing system-path matchers see the
132
+ // resolved path. Cheap, global, and false-positive-safe (these prefixes only
133
+ // ever mean "the root of some process's mount namespace").
134
+ function _canonicalizeProcRoot(command) {
135
+ return command.replace(/\/proc\/(?:self|\d+)\/root(?=\/|$)/gi, '');
136
+ }
137
+
138
+ // Check a raw shell command against the deny-list.
139
+ // Returns { label, catastrophic } for the first matching rule, or null when the
140
+ // command is allowed. `catastrophic` is true for the highest-blast-radius subset
141
+ // (disk wipe / block-device write, fork bomb) — see classifyShellCommand for how
142
+ // that flag changes handling of user-initiated shells. Callers that only care
143
+ // about agent-initiated calls can keep treating any non-null result as a refusal.
144
+ function checkShellDenylist(command) {
145
+ if (typeof command !== 'string' || !command) return null;
146
+ // Normalise whitespace so multi-space / tab / newline separated tokens match
147
+ // the same way a single space would. The original command is still executed;
148
+ // this normalisation only feeds the matchers.
149
+ const normalised = command.replace(/[\t\r\n]+/g, ' ');
150
+ // Also match against a procfs-root-canonicalized variant so a
151
+ // /proc/self/root/etc rewrite is caught by the same /etc matchers (constraint
152
+ // #3). When the two are identical (the common case) we only test once.
153
+ const canonical = _canonicalizeProcRoot(normalised);
154
+ const subjects = canonical === normalised ? [normalised] : [normalised, canonical];
155
+ for (const rule of DENYLIST) {
156
+ for (const subject of subjects) {
157
+ try {
158
+ if (rule.test(subject)) return { label: rule.label, catastrophic: !!rule.catastrophic };
159
+ } catch {
160
+ // A malformed matcher must never crash the agent loop — skip it.
161
+ }
162
+ }
163
+ }
164
+ return null;
165
+ }
166
+
167
+ // Decide how a shell command should be handled given who initiated it.
168
+ //
169
+ // initiator 'agent' (default): the model asked to run this. Any deny-list hit
170
+ // is a hard block — the model must not be able to wipe a disk or rm -rf no
171
+ // matter how it is auto-approved. Only --dangerously-skip-permissions (checked
172
+ // by the caller, not here) bypasses it.
173
+ //
174
+ // initiator 'user': a human typed it (`!cmd` / `semalt-code shell`). Humans own
175
+ // their machine, so deny-list hits are NOT hard-blocked. The exception is the
176
+ // catastrophic subset (disk wipe / fork bomb), which is so unrecoverable that
177
+ // we interpose a single y/N confirmation as a typo guard before running it.
178
+ //
179
+ // Returns one of:
180
+ // { action: 'allow' } — run it
181
+ // { action: 'allow', label, bypassed: true} — deny-listed but user-exempt; run it
182
+ // { action: 'block', label } — refuse (agent-initiated deny-list hit)
183
+ // { action: 'confirm', label } — user-initiated catastrophic; ask first
184
+ function classifyShellCommand(command, initiator = 'agent') {
185
+ const denied = checkShellDenylist(command);
186
+ if (!denied) return { action: 'allow' };
187
+ if (initiator === 'user') {
188
+ return denied.catastrophic
189
+ ? { action: 'confirm', label: denied.label }
190
+ : { action: 'allow', label: denied.label, bypassed: true };
191
+ }
192
+ return { action: 'block', label: denied.label };
193
+ }
194
+
195
+ module.exports = {
196
+ DENYLIST,
197
+ checkShellDenylist,
198
+ classifyShellCommand,
199
+ };
package/lib/doctor.js ADDED
@@ -0,0 +1,160 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Self-diagnostics (Task 2.6) — `/doctor` and `semalt-code doctor`.
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // Aggregates a set of pass/warn/fail checks across the install: config validity
8
+ // and the resolved layers (Task 2.2), dashboard reachability, the selected
9
+ // model and whether its context limit is known, audit-log writability, the API
10
+ // key source (Phase 0), and the loaded project-memory files (Task 2.3).
11
+ //
12
+ // The aggregation and formatting are pure; gathering is injected via `deps` so
13
+ // the network/fs checks are testable with mocks.
14
+
15
+ const STATUS_ICON = { pass: '✓', warn: '⚠', fail: '✗' };
16
+
17
+ // Reduce a list of { name, status, detail } checks to an overall verdict.
18
+ // overall = fail if any fail, else warn if any warn, else pass.
19
+ function aggregateChecks(checks) {
20
+ const list = Array.isArray(checks) ? checks : [];
21
+ const counts = { pass: 0, warn: 0, fail: 0 };
22
+ for (const c of list) {
23
+ if (c && (c.status === 'pass' || c.status === 'warn' || c.status === 'fail')) counts[c.status]++;
24
+ }
25
+ const overall = counts.fail ? 'fail' : counts.warn ? 'warn' : 'pass';
26
+ return { overall, counts, checks: list };
27
+ }
28
+
29
+ function formatDoctorReport(result) {
30
+ const lines = ['semalt-code doctor'];
31
+ for (const c of result.checks) {
32
+ lines.push(` ${STATUS_ICON[c.status] || '?'} ${c.name}: ${c.detail}`);
33
+ }
34
+ lines.push('');
35
+ lines.push(` Overall: ${result.overall.toUpperCase()} — ${result.counts.pass} pass, ${result.counts.warn} warn, ${result.counts.fail} fail`);
36
+ return lines.join('\n');
37
+ }
38
+
39
+ // Gather every diagnostic into a check list, then aggregate. All external
40
+ // access goes through injected deps:
41
+ // config resolved (merged) config object
42
+ // layers { userPresent, projectPath, envKeys[], flagKeys[] }
43
+ // apiKeySource 'env' | 'keychain' | 'config' | 'none'
44
+ // memoryFiles array of loaded memory file metas (from loadProjectMemory)
45
+ // auditWritable () => boolean
46
+ // pingDashboard async () => boolean | null (null = skipped/not-logged-in)
47
+ async function runDoctor(deps) {
48
+ const {
49
+ config = {},
50
+ layers = {},
51
+ apiKeySource = 'none',
52
+ memoryFiles = [],
53
+ auditWritable = () => true,
54
+ pingDashboard = async () => null,
55
+ } = deps || {};
56
+
57
+ const checks = [];
58
+
59
+ // 1. Config + resolved layers.
60
+ {
61
+ const parts = [];
62
+ parts.push(layers.userPresent ? 'user' : 'user(default)');
63
+ if (layers.projectPath) parts.push(`project(${layers.projectPath})`);
64
+ if (Array.isArray(layers.envKeys) && layers.envKeys.length) parts.push(`env(${layers.envKeys.join(',')})`);
65
+ if (Array.isArray(layers.flagKeys) && layers.flagKeys.length) parts.push(`flags(${layers.flagKeys.join(',')})`);
66
+ checks.push({ name: 'config', status: 'pass', detail: `loaded; layers: ${parts.join(' → ')}` });
67
+ }
68
+
69
+ // 2. API key source (Phase 0).
70
+ checks.push(apiKeySource === 'none'
71
+ ? { name: 'api key', status: 'warn', detail: "no key (env/keychain/config all empty); requests may 401" }
72
+ : { name: 'api key', status: 'pass', detail: `source: ${apiKeySource}` });
73
+
74
+ // 3. Selected model + context limit.
75
+ {
76
+ const model = config.default_model;
77
+ if (!model) {
78
+ checks.push({ name: 'model', status: 'warn', detail: 'no default_model selected (run /models)' });
79
+ } else {
80
+ const known = Number.isInteger(config.context_length) && config.context_length > 0;
81
+ checks.push({
82
+ name: 'model',
83
+ status: known ? 'pass' : 'warn',
84
+ detail: known ? `${model} (context limit ${config.context_length})` : `${model} (context limit unknown — learned on first overflow)`,
85
+ });
86
+ }
87
+ }
88
+
89
+ // 4. Dashboard reachability.
90
+ {
91
+ let reachable = null;
92
+ try { reachable = await pingDashboard(); } catch { reachable = false; }
93
+ if (reachable === null) {
94
+ checks.push({ name: 'dashboard', status: 'warn', detail: `${config.dashboard_url || '(unset)'} — not logged in (skipped)` });
95
+ } else if (reachable) {
96
+ checks.push({ name: 'dashboard', status: 'pass', detail: `${config.dashboard_url} reachable` });
97
+ } else {
98
+ checks.push({ name: 'dashboard', status: 'fail', detail: `${config.dashboard_url} unreachable` });
99
+ }
100
+ }
101
+
102
+ // 5. Audit-log writability.
103
+ {
104
+ let ok = false;
105
+ try { ok = !!auditWritable(); } catch { ok = false; }
106
+ checks.push(ok
107
+ ? { name: 'audit log', status: 'pass', detail: 'writable' }
108
+ : { name: 'audit log', status: 'fail', detail: 'not writable' });
109
+ }
110
+
111
+ // 6. Project memory (Task 2.3).
112
+ {
113
+ const n = Array.isArray(memoryFiles) ? memoryFiles.length : 0;
114
+ checks.push(n
115
+ ? { name: 'memory', status: 'pass', detail: `${n} file(s): ${memoryFiles.map((f) => f.path).join(', ')}` }
116
+ : { name: 'memory', status: 'pass', detail: 'no AGENTS.md/CLAUDE.md found (optional)' });
117
+ }
118
+
119
+ return aggregateChecks(checks);
120
+ }
121
+
122
+ // Production gatherer: assemble the real deps (config layers, key source, memory,
123
+ // audit writability) and run the diagnostics. `pingDashboard` is supplied by the
124
+ // caller (built from the api client) so this module stays network-agnostic.
125
+ async function diagnose({ getConfig, pingDashboard } = {}) {
126
+ const fs = require('fs');
127
+ const path = require('path');
128
+ const { readUserConfig, findProjectConfigPath, envConfigLayer, flagsConfigLayer } = require('./config');
129
+ const { apiKeySource } = require('./secrets');
130
+ const { loadProjectMemory } = require('./memory');
131
+ const { AUDIT_LOG } = require('./audit');
132
+
133
+ const config = (typeof getConfig === 'function' ? getConfig() : {}) || {};
134
+ const layers = {
135
+ userPresent: !!readUserConfig(),
136
+ projectPath: findProjectConfigPath(process.cwd()),
137
+ envKeys: Object.keys(envConfigLayer(process.env)),
138
+ flagKeys: Object.keys(flagsConfigLayer(process.argv.slice(2))),
139
+ };
140
+ const auditWritable = () => {
141
+ try {
142
+ fs.mkdirSync(path.dirname(AUDIT_LOG), { recursive: true });
143
+ fs.appendFileSync(AUDIT_LOG, '');
144
+ return true;
145
+ } catch { return false; }
146
+ };
147
+ let memoryFiles = [];
148
+ try { memoryFiles = loadProjectMemory().files; } catch { memoryFiles = []; }
149
+
150
+ return runDoctor({
151
+ config,
152
+ layers,
153
+ apiKeySource: apiKeySource(config),
154
+ memoryFiles,
155
+ auditWritable,
156
+ pingDashboard: pingDashboard || (async () => null),
157
+ });
158
+ }
159
+
160
+ module.exports = { aggregateChecks, formatDoctorReport, runDoctor, diagnose, STATUS_ICON };
@@ -0,0 +1,202 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Headless output surface (Task 2.4) — `-p/--print` + --output-format
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // Three formats:
8
+ // text human output (default) — handled by the caller, not here.
9
+ // json a single JSON object { result, toolCalls, usage, cost } to
10
+ // stdout, nothing else.
11
+ // stream-json newline-delimited JSON events (assistant / tool / result),
12
+ // one per line, for piping.
13
+ //
14
+ // Machine modes must keep stdout byte-pure: no spinners, no status bar, no
15
+ // ANSI. The two chrome sinks in a headless run both honor the tools.js
16
+ // "UI active" flag: tools' _log (the ✓/✗ lines) and the write/append permission
17
+ // diff (writer.scrollback). Flipping setUIActive(true) for the duration of the
18
+ // run suppresses both, so nothing but the structured JSON is produced. The JSON
19
+ // itself is written through an injectable sink (default process.stdout) so the
20
+ // formatter is unit-testable without touching the global stream.
21
+ //
22
+ // Phase 0 safety is unchanged: headless still refuses deny-listed / interactive
23
+ // approvals unless --dangerously-skip-permissions, because that gate lives in
24
+ // the permission layer the agent loop already runs through.
25
+
26
+ const { setUIActive, isUIActive } = require('./tools');
27
+ const { priceForModel, computeCost } = require('./pricing');
28
+ const { DEFAULT_MAX_ITERATIONS } = require('./constants');
29
+ const { buildToolOperation } = require('./ui/tool-operation');
30
+ const { renderOperation } = require('./ui/render-operation');
31
+
32
+ const MACHINE_MODES = new Set(['json', 'stream-json']);
33
+
34
+ function isMachineMode(mode) { return MACHINE_MODES.has(mode); }
35
+
36
+ // Aggregate token usage from the Metrics turns. prompt/completion are summed
37
+ // across turns (total processed); context_tokens is the last turn's prompt.
38
+ function usageFromMetrics(metrics) {
39
+ const turns = metrics && Array.isArray(metrics.turns) ? metrics.turns : [];
40
+ let prompt = 0;
41
+ let completion = 0;
42
+ for (const t of turns) {
43
+ prompt += (t && t.promptTokens) || 0;
44
+ completion += (t && t.completionTokens) || 0;
45
+ }
46
+ const last = turns[turns.length - 1];
47
+ return {
48
+ prompt_tokens: prompt,
49
+ completion_tokens: completion,
50
+ total_tokens: prompt + completion,
51
+ context_tokens: last ? (last.promptTokens || 0) : 0,
52
+ // Additive ESTIMATED split of the current context (Variant B, display-only).
53
+ // Clearly named *_est so they never read as measured; the real
54
+ // prompt_tokens/total_tokens/context_tokens above are unchanged. Reflect the
55
+ // last turn (current context), like context_tokens.
56
+ context_base_est: last ? (last.baseEst || 0) : 0,
57
+ context_working_est: last ? (last.workingEst || 0) : 0,
58
+ turns: turns.length,
59
+ };
60
+ }
61
+
62
+ // The final result is the last assistant message — the reply that ended the
63
+ // loop. Falls back to the last streamed assistant message if messages lack one.
64
+ function finalResult(messages, assistantMsgs) {
65
+ if (Array.isArray(messages)) {
66
+ for (let i = messages.length - 1; i >= 0; i--) {
67
+ if (messages[i] && messages[i].role === 'assistant') return messages[i].content || '';
68
+ }
69
+ }
70
+ return assistantMsgs && assistantMsgs.length ? assistantMsgs[assistantMsgs.length - 1] : '';
71
+ }
72
+
73
+ // Build the callbacks + finalize for a given mode. `emitLine(obj)` writes one
74
+ // JSON line to the real stdout. The sink records tool calls and assistant
75
+ // messages, streams events in stream-json mode, and prints the final object in
76
+ // json mode. cost is null until the price table lands (Task 2.6).
77
+ function createHeadlessSink(mode, emitLine, { model = null, priceOverrides = null } = {}) {
78
+ const toolCalls = [];
79
+ const assistantMsgs = [];
80
+ let lastError = null;
81
+ const machine = isMachineMode(mode);
82
+ const price = priceForModel(model, priceOverrides);
83
+
84
+ const callbacks = {};
85
+ if (machine) {
86
+ callbacks.onAssistantMessage = (m) => {
87
+ assistantMsgs.push(m);
88
+ if (mode === 'stream-json') emitLine({ type: 'assistant', content: m });
89
+ };
90
+ callbacks.onToolEnd = (tag, resultStr, ms, meta) => {
91
+ const call = meta && Array.isArray(meta.call) ? meta.call : null;
92
+ const args = call ? call.slice(1) : [];
93
+ const ok = !(meta && meta.error);
94
+ // Legacy per-tool fields — computed EXACTLY as before so their names,
95
+ // types, and values can never drift (the contract pin).
96
+ const legacy = { tool: tag, args, ok, ms };
97
+ // Phase 6d-ii — sink-local descriptor build (option A): build the same
98
+ // ToolOperation the interactive sink builds (chat-turn.js) from the `meta`
99
+ // already passed, then merge its json-mode core (descriptor-native plain
100
+ // data: status/category/durationMs/detail/meta/target/attrs/…) ADDITIVELY
101
+ // BENEATH the legacy fields. `legacy` spreads last so tool/args/ok/ms win
102
+ // on any name clash → byte-identical to pre-6d-ii. Web ops are ordinary
103
+ // tools here (NO web-activity collapse — N per-op events is the contract).
104
+ let core = null;
105
+ try {
106
+ const attrs = meta ? meta.attrs : null;
107
+ const operation = buildToolOperation({
108
+ id: meta ? meta.id : null,
109
+ tag,
110
+ arg: attrs ? (attrs.command || attrs.path || attrs.url || attrs.src || attrs.key || attrs.name || attrs.pattern) : '',
111
+ attrs,
112
+ status: ok ? 'ok' : 'error',
113
+ durationMs: ms,
114
+ meta: meta ? meta.meta : null,
115
+ error: meta ? meta.error : null,
116
+ diff: meta ? meta.diff : null,
117
+ // Model-facing result → lets the descriptor derive an output-preview
118
+ // detail (shell/MCP/subagent). Chrome-only; context is untouched.
119
+ output: typeof resultStr === 'string' ? resultStr : null,
120
+ noDuration: tag === 'ask_user',
121
+ });
122
+ core = renderOperation(operation, { mode: 'json' });
123
+ } catch (_e) {
124
+ // No-descriptor safety: fall back to the bare legacy-only rec, never crash.
125
+ core = null;
126
+ }
127
+ const rec = core ? { ...core, ...legacy } : { ...legacy };
128
+ toolCalls.push(rec);
129
+ if (mode === 'stream-json') emitLine({ type: 'tool', ...rec });
130
+ };
131
+ callbacks.onError = (e) => { if (e && !e.isWarning && e.message) lastError = e.message; };
132
+ }
133
+
134
+ function finalize({ messages, metrics, stopReason, verifyStatus } = {}) {
135
+ if (!machine) return;
136
+ const result = finalResult(messages, assistantMsgs);
137
+ const usage = usageFromMetrics(metrics);
138
+ // cost is null (rendered "unknown" downstream) when the model has no price.
139
+ const cost = computeCost(usage, price);
140
+ // stopReason (Pre-Task 4.0a): why the loop ended — 'end_turn' normally,
141
+ // 'max_iterations' when the cap was hit, 'verify_failed' when enforcing
142
+ // self-verification exhausted its attempts. Always reported so consumers can
143
+ // distinguish a finished task from a truncated one.
144
+ const stop = stopReason || 'end_turn';
145
+ // verifyStatus (Task 4.2): 'skipped' (no verify ran / --no-verify / no
146
+ // command), 'passed', or 'failed'. Surfaced alongside stopReason.
147
+ const verify = verifyStatus || 'skipped';
148
+ if (mode === 'json') {
149
+ emitLine({ result, toolCalls, usage, cost, stopReason: stop, verifyStatus: verify, ...(lastError ? { error: lastError } : {}) });
150
+ } else {
151
+ emitLine({ type: 'result', result, usage, cost, stopReason: stop, verifyStatus: verify, ...(lastError ? { error: lastError } : {}) });
152
+ }
153
+ }
154
+
155
+ return { callbacks, finalize, toolCalls, assistantMsgs };
156
+ }
157
+
158
+ // Run the agent loop in headless mode. For machine modes, chrome is suppressed
159
+ // (setUIActive) for the duration and only the structured JSON — written through
160
+ // `write` (default process.stdout) — is produced. Returns { messages, metrics }.
161
+ async function runHeadless({
162
+ runAgentLoop,
163
+ messages,
164
+ model,
165
+ tokenLimit = null,
166
+ maxIterations,
167
+ agentOpts = {},
168
+ mode = 'text',
169
+ write,
170
+ priceOverrides = null,
171
+ }) {
172
+ const machine = isMachineMode(mode);
173
+ const out = write || ((s) => process.stdout.write(s));
174
+ const emitLine = (obj) => out(JSON.stringify(obj) + '\n');
175
+ const sink = createHeadlessSink(mode, emitLine, { model, priceOverrides });
176
+
177
+ let prevUIActive = null;
178
+ if (machine) { prevUIActive = isUIActive(); setUIActive(true); }
179
+
180
+ try {
181
+ const callbacks = { ...(agentOpts.callbacks || {}), ...sink.callbacks };
182
+ const res = await runAgentLoop(
183
+ messages,
184
+ model,
185
+ maxIterations === undefined ? DEFAULT_MAX_ITERATIONS : maxIterations,
186
+ tokenLimit,
187
+ { ...agentOpts, callbacks },
188
+ );
189
+ sink.finalize(res);
190
+ return res;
191
+ } finally {
192
+ if (machine) setUIActive(prevUIActive);
193
+ }
194
+ }
195
+
196
+ module.exports = {
197
+ isMachineMode,
198
+ usageFromMetrics,
199
+ finalResult,
200
+ createHeadlessSink,
201
+ runHeadless,
202
+ };