@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
package/lib/payload.js ADDED
@@ -0,0 +1,54 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Optional chat/completions payload augmentations (Task 2.7).
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // Pure functions, gated by config + model support, applied to the request body
8
+ // just before it is serialized. Kept separate from api.js so the exact field
9
+ // presence/absence is unit-testable without a live endpoint.
10
+
11
+ // Prompt caching: mark the STABLE PREFIX — the last system message and the end
12
+ // of the tools array — with Anthropic-style cache_control:{type:'ephemeral'} so
13
+ // providers that honor it reuse the prefix across turns. Only mutates when
14
+ // enabled; absent otherwise. Gated upstream by config.prompt_caching, so it is
15
+ // never sent to endpoints the user hasn't opted in for.
16
+ function applyPromptCaching(payload, enabled) {
17
+ if (!enabled || !payload) return payload;
18
+ if (Array.isArray(payload.messages)) {
19
+ for (let i = payload.messages.length - 1; i >= 0; i--) {
20
+ const m = payload.messages[i];
21
+ if (m && m.role === 'system') {
22
+ payload.messages[i] = { ...m, cache_control: { type: 'ephemeral' } };
23
+ break;
24
+ }
25
+ }
26
+ }
27
+ if (Array.isArray(payload.tools) && payload.tools.length) {
28
+ const last = payload.tools.length - 1;
29
+ payload.tools[last] = { ...payload.tools[last], cache_control: { type: 'ephemeral' } };
30
+ }
31
+ return payload;
32
+ }
33
+
34
+ // Heuristic for OpenAI-style `reasoning_effort` support: reasoning model
35
+ // families (o1–o4, gpt-5, *-reasoning/-thinking, deepseek-r1, qwq).
36
+ function supportsReasoningEffort(model) {
37
+ if (typeof model !== 'string' || !model) return false;
38
+ return /(^|[/\-])o[1-4]([-/]|$|mini|preview)|gpt-5|reason|deepseek-r1|(^|[/\-])r1([-/]|$)|thinking|qwq/i.test(model);
39
+ }
40
+
41
+ const VALID_EFFORTS = new Set(['minimal', 'low', 'medium', 'high']);
42
+
43
+ // Add reasoning_effort when configured and the model supports it (or support is
44
+ // forced for a model the heuristic misses). No-op otherwise.
45
+ function applyReasoningEffort(payload, effort, model, { force = false } = {}) {
46
+ if (!payload || !effort) return payload;
47
+ const e = String(effort).toLowerCase();
48
+ if (!VALID_EFFORTS.has(e)) return payload;
49
+ if (!force && !supportsReasoningEffort(model)) return payload;
50
+ payload.reasoning_effort = e;
51
+ return payload;
52
+ }
53
+
54
+ module.exports = { applyPromptCaching, supportsReasoningEffort, applyReasoningEffort, VALID_EFFORTS };
@@ -0,0 +1,401 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Per-pattern permission rules (Task 4.1) — the pure rule engine.
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // Extends the coarse per-tier permission model (--allow-fs/exec/net, --readonly,
8
+ // per-session "always") with rich rules that match on a TOOL plus its ARGUMENTS
9
+ // (glob OR regex) and resolve to one of `allow` / `deny` / `ask`. Rules are
10
+ // layered across user scope (~/.semalt-ai/config.json) and project scope
11
+ // (.semalt/config.json — attacker-controllable in a cloned repo).
12
+ //
13
+ // EVERYTHING in this module is a pure function: no I/O beyond fs.realpathSync for
14
+ // path canonicalization (constraint 3), which is unavoidable to resolve symlinks.
15
+ // The manager (lib/permissions.js) and the agent gate (lib/agent.js) consume the
16
+ // decisions; composition with the unbypassable Phase 0 controls (deny-list,
17
+ // secret-file guard, --readonly, isPathSafe) happens THERE and downstream in the
18
+ // executors — an `allow` rule can never re-enable something those forbid.
19
+ //
20
+ // The six security constraints (see Task 4.1 brief), and where each lives:
21
+ // 1. Project can only NARROW — collectMatches drops every project `allow` rule
22
+ // structurally before resolution, so a project rule can only ever contribute
23
+ // `deny`/`ask`. Enforced here, not by convention.
24
+ // 2. Precedence is total + deterministic — deny > ask > allow; more-specific
25
+ // beats less-specific; equal specificity resolves by deny>ask>allow (so it
26
+ // is order-independent). Across layers: most-restrictive wins.
27
+ // 3. Canonicalize before matching — normalizeCall resolves `..`, symlinks, and
28
+ // absolute/relative forms; matching is on the canonical form.
29
+ // 4. Regex safety — normalizeRule rejects pathological patterns (ReDoS guard)
30
+ // and bounds subject length; a regex that errors/over-runs fails closed.
31
+ // 5. Fail closed — a malformed rule is dropped at load; a matcher error never
32
+ // GRANTS (an erroring `allow` is treated as no-match) and still RESTRICTS
33
+ // (an erroring `deny`/`ask` is treated as a match).
34
+ // 6. Compose, don't bypass — the resolver only ever returns allow/deny/ask/null
35
+ // for the RULE layer; the manager keeps the deny-list/secret/readonly checks.
36
+
37
+ const fs = require('fs');
38
+ const path = require('path');
39
+
40
+ // Per canonical action (call[0]): its public tag (for matching by either name)
41
+ // and the argument shape used to derive matchable subjects.
42
+ // category 'shell' → args[0] is the command string
43
+ // category 'file' → `paths` indices are filesystem paths (canonicalized)
44
+ // category 'net' → `urls` indices are URLs; `paths` indices are dest files
45
+ // category 'other' → no matchable argument subject (only tool-only rules match)
46
+ const ACTION_META = {
47
+ shell: { tag: 'exec', category: 'shell' },
48
+ read: { tag: 'read_file', category: 'file', paths: [0] },
49
+ write: { tag: 'write_file', category: 'file', paths: [0] },
50
+ append: { tag: 'append_file', category: 'file', paths: [0] },
51
+ list_dir: { tag: 'list_dir', category: 'file', paths: [0] },
52
+ delete_file: { tag: 'delete_file', category: 'file', paths: [0] },
53
+ make_dir: { tag: 'make_dir', category: 'file', paths: [0] },
54
+ remove_dir: { tag: 'remove_dir', category: 'file', paths: [0] },
55
+ move_file: { tag: 'move_file', category: 'file', paths: [0, 1] },
56
+ copy_file: { tag: 'copy_file', category: 'file', paths: [0, 1] },
57
+ edit_file: { tag: 'edit_file', category: 'file', paths: [0] },
58
+ search_in_file: { tag: 'search_in_file', category: 'file', paths: [0] },
59
+ replace_in_file: { tag: 'replace_in_file', category: 'file', paths: [0] },
60
+ search_files: { tag: 'search_files', category: 'file', paths: [1] },
61
+ file_stat: { tag: 'file_stat', category: 'file', paths: [0] },
62
+ upload: { tag: 'upload', category: 'file', paths: [0] },
63
+ grep: { tag: 'grep', category: 'file' },
64
+ glob: { tag: 'glob', category: 'file' },
65
+ download: { tag: 'download', category: 'net', urls: [0], paths: [1] },
66
+ http_get: { tag: 'http_get', category: 'net', urls: [0] },
67
+ ask_user: { tag: 'ask_user', category: 'other' },
68
+ store_memory: { tag: 'store_memory', category: 'other' },
69
+ recall_memory: { tag: 'recall_memory', category: 'other' },
70
+ list_memories: { tag: 'list_memories', category: 'other' },
71
+ get_env: { tag: 'get_env', category: 'other' },
72
+ set_env: { tag: 'set_env', category: 'other' },
73
+ system_info: { tag: 'system_info', category: 'other' },
74
+ };
75
+
76
+ const VALID_ACTIONS = new Set(['allow', 'deny', 'ask']);
77
+ // Restrictiveness rank — used to pick the most-restrictive decision across layers.
78
+ const RANK = { deny: 3, ask: 2, allow: 1 };
79
+
80
+ // ── ReDoS guard (constraint 4) ─────────────────────────────────────────────
81
+ // Mirror of the cheap heuristic in lib/tools.js: reject pathologically long
82
+ // patterns and the common catastrophic-backtracking anti-patterns. A pattern
83
+ // that trips this is dropped at load time (fail closed). Subject length is
84
+ // additionally bounded at match time.
85
+ const MAX_PATTERN_LEN = 1000;
86
+ const MAX_SUBJECT_LEN = 8192;
87
+
88
+ function isPatternUnsafe(source) {
89
+ if (typeof source !== 'string') return true;
90
+ if (source.length > MAX_PATTERN_LEN) return true;
91
+ if (/(\(.*[+*].*\).*[+*])|(\[.*\].*[+*].*[+*])/.test(source)) return true;
92
+ return false;
93
+ }
94
+
95
+ // ── matcher compilation ────────────────────────────────────────────────────
96
+
97
+ // A glob → anchored RegExp. `crossSep` controls whether `*`/`?` cross a path
98
+ // separator: false for path-style globs (segment-aware), true for command/URL
99
+ // globs (greedy). `**` always crosses separators; a trailing `/**` (or leading
100
+ // `**/`) collapses the separator so `src/**` matches `src/a/b` and `**/*.env`
101
+ // matches both `x.env` and `a/b/x.env`.
102
+ function globToRegExp(glob, { crossSep = false } = {}) {
103
+ let re = '';
104
+ for (let i = 0; i < glob.length; i++) {
105
+ const c = glob[i];
106
+ if (c === '*') {
107
+ if (glob[i + 1] === '*') {
108
+ i++;
109
+ if (glob[i + 1] === '/') { i++; re += '(?:.*/)?'; }
110
+ else re += '.*';
111
+ } else {
112
+ re += crossSep ? '.*' : '[^/]*';
113
+ }
114
+ } else if (c === '?') {
115
+ re += crossSep ? '.' : '[^/]';
116
+ } else if ('\\^$+.()|{}[]'.includes(c)) {
117
+ re += '\\' + c;
118
+ } else {
119
+ re += c;
120
+ }
121
+ }
122
+ return new RegExp('^' + re + '$');
123
+ }
124
+
125
+ // Count of "literal" (non-wildcard / non-metacharacter) characters — the
126
+ // specificity weight of a pattern. More literal chars ⇒ more specific.
127
+ function literalCount(source, kind) {
128
+ if (typeof source !== 'string') return 0;
129
+ const meta = kind === 'regex' ? new Set('.*+?()[]{}|^$\\') : new Set('*?');
130
+ let n = 0;
131
+ for (const ch of source) if (!meta.has(ch)) n++;
132
+ return n;
133
+ }
134
+
135
+ // Compile a rule's argument matcher from its source string. Returns null when no
136
+ // matcher is given (a tool-only rule), or throws on an unsafe/invalid pattern so
137
+ // normalizeRule can drop the rule (fail closed). `crossSep` comes from which key
138
+ // the user used (`path:` ⇒ false; `pattern:`/`url:`/`match:` ⇒ true).
139
+ function compileMatcher(source, crossSep) {
140
+ if (source == null) return { kind: 'any', specificity: 0, test: () => true };
141
+ const s = String(source);
142
+ if (s === '*' || s === '**') return { kind: 'any', specificity: 0, test: () => true };
143
+
144
+ const rx = s.match(/^\/(.*)\/([gimsuy]*)$/);
145
+ if (rx) {
146
+ const body = rx[1];
147
+ if (isPatternUnsafe(body)) throw new Error(`unsafe regex pattern: ${s}`);
148
+ // Strip the stateful `g` flag (it makes .test() position-dependent).
149
+ const flags = (rx[2] || '').replace(/g/g, '');
150
+ const re = new RegExp(body, flags);
151
+ return {
152
+ kind: 'regex',
153
+ specificity: literalCount(body, 'regex'),
154
+ test: (str) => re.test(str.length > MAX_SUBJECT_LEN ? str.slice(0, MAX_SUBJECT_LEN) : str),
155
+ };
156
+ }
157
+
158
+ if (isPatternUnsafe(s)) throw new Error(`unsafe glob pattern: ${s}`);
159
+ const re = globToRegExp(s, { crossSep });
160
+ return {
161
+ kind: 'glob',
162
+ specificity: literalCount(s, 'glob'),
163
+ test: (str) => re.test(str.length > MAX_SUBJECT_LEN ? str.slice(0, MAX_SUBJECT_LEN) : str),
164
+ };
165
+ }
166
+
167
+ const TOOL_WEIGHT = 1000; // a literal tool dominates an argument-pattern's weight
168
+
169
+ // Normalize one raw rule object into an internal rule, or null if malformed
170
+ // (logged via `log`). `scope` is 'user' | 'project'. The matcher source is taken
171
+ // from exactly one of `pattern` | `path` | `url` | `match`; supplying more than
172
+ // one is ambiguous and the rule is dropped (fail closed).
173
+ function normalizeRule(raw, scope, log) {
174
+ const warn = (msg) => { if (typeof log === 'function') log(`permission rule dropped (${scope}): ${msg}`); };
175
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw)) { warn('not an object'); return null; }
176
+
177
+ const action = typeof raw.action === 'string' ? raw.action.trim().toLowerCase() : '';
178
+ if (!VALID_ACTIONS.has(action)) { warn(`bad action ${JSON.stringify(raw.action)}`); return null; }
179
+
180
+ const tool = typeof raw.tool === 'string' ? raw.tool.trim() : '';
181
+ if (!tool) { warn('missing tool'); return null; }
182
+
183
+ const keys = ['pattern', 'path', 'url', 'match'].filter((k) => raw[k] != null && raw[k] !== '');
184
+ if (keys.length > 1) { warn(`multiple matcher keys (${keys.join(', ')})`); return null; }
185
+ const key = keys[0] || null;
186
+ const source = key ? String(raw[key]) : null;
187
+ const crossSep = key !== 'path'; // path globs are segment-aware; everything else is greedy
188
+
189
+ let toolMatcher, matcher;
190
+ try {
191
+ toolMatcher = globToRegExp(tool, { crossSep: true });
192
+ matcher = compileMatcher(source, crossSep);
193
+ } catch (err) {
194
+ warn(err.message);
195
+ return null;
196
+ }
197
+
198
+ const toolSpecificity = (tool === '*' || tool === '**') ? 0 : TOOL_WEIGHT;
199
+ return {
200
+ scope,
201
+ tool,
202
+ toolMatcher,
203
+ matcher,
204
+ matcherKey: key,
205
+ source,
206
+ action,
207
+ specificity: toolSpecificity + matcher.specificity,
208
+ };
209
+ }
210
+
211
+ // Normalize an array of raw rules for one layer; malformed entries are dropped.
212
+ function normalizeRuleLayer(rawRules, scope, log) {
213
+ if (!Array.isArray(rawRules)) return [];
214
+ const out = [];
215
+ for (const raw of rawRules) {
216
+ const r = normalizeRule(raw, scope, log);
217
+ if (r) out.push(r);
218
+ }
219
+ return out;
220
+ }
221
+
222
+ // Build the layered rule set from the two RAW config objects (already parsed
223
+ // JSON, NOT the shallow-merged view — the layers MUST stay separate so the
224
+ // project layer can be structurally prevented from widening). Reads
225
+ // `<cfg>.permissions.rules`.
226
+ function loadRuleLayers(userCfg, projectCfg, log) {
227
+ const pick = (cfg) => (cfg && cfg.permissions && Array.isArray(cfg.permissions.rules)) ? cfg.permissions.rules : [];
228
+ return {
229
+ user: normalizeRuleLayer(pick(userCfg), 'user', log),
230
+ project: normalizeRuleLayer(pick(projectCfg), 'project', log),
231
+ };
232
+ }
233
+
234
+ // ── call canonicalization (constraint 3) ───────────────────────────────────
235
+
236
+ // Resolve a path to its canonical absolute form (symlinks + `..` collapsed) and
237
+ // a cwd-relative form, both in posix separators so globs match identically on
238
+ // every platform. For a not-yet-existent path (writes), the existing ancestor is
239
+ // realpath'd and the basename re-appended.
240
+ function canonicalizePath(p, cwd) {
241
+ const base = cwd || process.cwd();
242
+ let abs = path.resolve(base, p);
243
+ try {
244
+ abs = fs.realpathSync(abs);
245
+ } catch {
246
+ try {
247
+ const dir = fs.realpathSync(path.dirname(abs));
248
+ abs = path.join(dir, path.basename(abs));
249
+ } catch { /* keep the path.resolve form */ }
250
+ }
251
+ const absPosix = abs.split(path.sep).join('/');
252
+ const rel = path.relative(base, abs).split(path.sep).join('/');
253
+ return { abs: absPosix, rel };
254
+ }
255
+
256
+ function normalizeCommand(cmd) {
257
+ return String(cmd == null ? '' : cmd).replace(/\s+/g, ' ').trim();
258
+ }
259
+
260
+ // Turn a [action, ...args] call tuple into the canonical, matchable shape.
261
+ function normalizeCall(call, opts = {}) {
262
+ const arr = Array.isArray(call) ? call : [];
263
+ const action = arr[0];
264
+ const args = arr.slice(1);
265
+ const meta = ACTION_META[action] || { tag: action, category: 'other' };
266
+ const cwd = opts.cwd || process.cwd();
267
+
268
+ const out = { action, tag: meta.tag, category: meta.category, command: null, url: null, paths: [] };
269
+
270
+ if (meta.category === 'shell') {
271
+ out.command = normalizeCommand(args[0]);
272
+ }
273
+ if (meta.urls) {
274
+ for (const i of meta.urls) {
275
+ if (args[i] != null && args[i] !== '') { out.url = String(args[i]); break; }
276
+ }
277
+ }
278
+ if (meta.paths) {
279
+ for (const i of meta.paths) {
280
+ const v = args[i];
281
+ if (v == null || v === '') continue;
282
+ const { abs, rel } = canonicalizePath(String(v), cwd);
283
+ out.paths.push(abs);
284
+ if (rel && rel !== abs) out.paths.push(rel);
285
+ }
286
+ }
287
+ return out;
288
+ }
289
+
290
+ // ── matching + resolution ──────────────────────────────────────────────────
291
+
292
+ function toolMatches(rule, call) {
293
+ try {
294
+ return rule.toolMatcher.test(String(call.action)) || rule.toolMatcher.test(String(call.tag));
295
+ } catch {
296
+ return false;
297
+ }
298
+ }
299
+
300
+ // Does a rule match a normalized call? Returns true | false | 'error'. 'error'
301
+ // (a matcher threw at runtime, e.g. a pathological regex that slipped the load
302
+ // guard) is propagated so the caller can fail closed.
303
+ function ruleMatchesCall(rule, call) {
304
+ if (!toolMatches(rule, call)) return false;
305
+ if (rule.matcher.kind === 'any') return true;
306
+
307
+ let subjects;
308
+ if (call.category === 'shell') subjects = [call.command];
309
+ else if (call.category === 'net') subjects = [call.url, ...call.paths];
310
+ else if (call.category === 'file') subjects = call.paths;
311
+ else subjects = []; // 'other' — only tool-only rules match
312
+
313
+ for (const s of subjects) {
314
+ if (s == null) continue;
315
+ try {
316
+ if (rule.matcher.test(String(s))) return true;
317
+ } catch {
318
+ return 'error';
319
+ }
320
+ }
321
+ return false;
322
+ }
323
+
324
+ // Collect the rules in one layer that match the call. Fail-closed handling of a
325
+ // matcher error: it NEVER grants (an erroring `allow` is treated as no-match)
326
+ // and still RESTRICTS (an erroring `deny`/`ask` is treated as a match).
327
+ function collectMatches(rules, call) {
328
+ const matches = [];
329
+ for (const rule of rules || []) {
330
+ let m;
331
+ try { m = ruleMatchesCall(rule, call); } catch { m = 'error'; }
332
+ if (m === true) matches.push(rule);
333
+ else if (m === 'error' && rule.action !== 'allow') matches.push(rule);
334
+ }
335
+ return matches;
336
+ }
337
+
338
+ // Resolve one layer's matches to a single { decision, rule } or null. Precedence:
339
+ // most specific wins; among equal specificity, deny > ask > allow (so the result
340
+ // is independent of rule order — no ambiguity).
341
+ function layerDecision(matches) {
342
+ if (!matches || !matches.length) return null;
343
+ let maxSpec = -1;
344
+ for (const r of matches) if (r.specificity > maxSpec) maxSpec = r.specificity;
345
+ const top = matches.filter((r) => r.specificity === maxSpec);
346
+ const deny = top.find((r) => r.action === 'deny');
347
+ if (deny) return { decision: 'deny', rule: deny };
348
+ const ask = top.find((r) => r.action === 'ask');
349
+ if (ask) return { decision: 'ask', rule: ask };
350
+ return { decision: 'allow', rule: top[0] };
351
+ }
352
+
353
+ function ruleReason(rule) {
354
+ if (!rule) return null;
355
+ const src = rule.source ? ` ${rule.matcherKey || 'pattern'}=${rule.source}` : '';
356
+ return `${rule.scope} ${rule.action} ${rule.tool}${src}`;
357
+ }
358
+
359
+ // THE resolver. Takes a NORMALIZED call (already canonicalized — constraint 3),
360
+ // the layered rules, and a context bag (reserved for tier/readonly composition,
361
+ // which the manager performs). Returns the deterministic rule-layer decision:
362
+ // { decision: 'allow'|'deny'|'ask'|null, rule, reason, scope }
363
+ // `null` means no rule matched — the caller falls back to the tier/descriptor
364
+ // default. Project rules can only NARROW: every project `allow` is dropped before
365
+ // resolution, so the project layer can contribute only `deny`/`ask`. Across
366
+ // layers the MOST RESTRICTIVE decision wins.
367
+ function resolvePermission(call, layers, context = {}) { // eslint-disable-line no-unused-vars
368
+ const userMatches = collectMatches(layers && layers.user, call);
369
+ // Structural project-cannot-widen: drop project `allow` rules entirely.
370
+ const projectMatches = collectMatches(layers && layers.project, call).filter((r) => r.action !== 'allow');
371
+
372
+ const u = layerDecision(userMatches);
373
+ const p = layerDecision(projectMatches);
374
+
375
+ let winner;
376
+ if (u && p) winner = RANK[p.decision] > RANK[u.decision] ? p : u;
377
+ else winner = u || p;
378
+
379
+ if (!winner) return { decision: null, rule: null, reason: null, scope: null };
380
+ return { decision: winner.decision, rule: winner.rule, reason: ruleReason(winner.rule), scope: winner.rule.scope };
381
+ }
382
+
383
+ module.exports = {
384
+ ACTION_META,
385
+ resolvePermission,
386
+ normalizeCall,
387
+ canonicalizePath,
388
+ normalizeCommand,
389
+ normalizeRule,
390
+ normalizeRuleLayer,
391
+ loadRuleLayers,
392
+ globToRegExp,
393
+ compileMatcher,
394
+ ruleMatchesCall,
395
+ collectMatches,
396
+ layerDecision,
397
+ ruleReason,
398
+ // test seams
399
+ literalCount,
400
+ isPatternUnsafe,
401
+ };
@@ -2,6 +2,7 @@
2
2
 
3
3
  const writer = require('./ui/writer');
4
4
  const messages = require('./ui/messages');
5
+ const { resolvePermission, normalizeCall } = require('./permission-rules');
5
6
 
6
7
  const TIER_FS = ['read_file', 'write_file', 'append_file', 'delete_file', 'list_dir', 'make_dir', 'move_file', 'copy_file', 'file_stat', 'search_files', 'store_memory', 'recall_memory'];
7
8
  const TIER_EXEC = ['exec'];
@@ -9,11 +10,23 @@ const TIER_NET = ['http_get', 'download'];
9
10
  const TIER_SYS = ['system_info', 'get_env', 'set_env'];
10
11
 
11
12
  const TIER_MAP = { fs: TIER_FS, exec: TIER_EXEC, net: TIER_NET, sys: TIER_SYS };
12
- const READONLY_BLOCKED = new Set(['write_file', 'append_file', 'delete_file', 'move_file', 'copy_file']);
13
+ // Every FILE-mutating tool. --readonly governs file tools only; shell side
14
+ // effects are NOT constrained here (a read-only session must still run `ls` /
15
+ // `git status`) — shell writes are confined by the OS sandbox + deny-list,
16
+ // the right layer for that (Pre-Task 5.0c).
17
+ const READONLY_BLOCKED = new Set([
18
+ 'write_file', 'append_file', 'delete_file', 'move_file', 'copy_file', 'download',
19
+ 'edit_file', 'replace_in_file', 'make_dir', 'remove_dir', 'upload',
20
+ // Native git tools (Task 5.1). The mutating git tools (the create/delete paths
21
+ // of branch/worktree are gated inside their executors) honor --readonly too — a
22
+ // read-only session must not stage/commit/switch/create. Read-only git tools
23
+ // (git_status/git_diff/git_log, and the LIST ops) are NOT here, so they still run.
24
+ 'git_add', 'git_commit', 'git_branch', 'git_checkout', 'git_worktree',
25
+ ]);
13
26
 
14
27
  let _permissionQueueTail = Promise.resolve();
15
28
 
16
- function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {}) {
29
+ function createPermissionManager(ui, { allowedTiers = [], readonly = false, skipPermissions = false, rules = null, cwd = null, approver = null, quiet = false } = {}) {
17
30
  const { BOLD, FG_CYAN, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, interactiveSelect } = ui;
18
31
 
19
32
  const autoApprovedTags = new Set();
@@ -27,6 +40,29 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
27
40
  sessionApprovedTags: new Set(),
28
41
  };
29
42
 
43
+ // Per-pattern rule layers (Task 4.1). { user: [...], project: [...] } of
44
+ // already-normalized rules, kept SEPARATE so the project layer can be
45
+ // structurally prevented from widening (see lib/permission-rules.js).
46
+ const ruleLayers = (rules && typeof rules === 'object')
47
+ ? { user: rules.user || [], project: rules.project || [] }
48
+ : { user: [], project: [] };
49
+ const hasRules = ruleLayers.user.length > 0 || ruleLayers.project.length > 0;
50
+
51
+ // Resolve the per-pattern rule decision for a [action, ...args] call tuple.
52
+ // Returns { decision: 'allow'|'deny'|'ask'|null, rule, reason }. `null` when no
53
+ // rule matches → the caller falls back to the tier/descriptor default. Pure
54
+ // wrapper around resolvePermission; any failure fails closed to a null decision
55
+ // (the normal gate then still asks for mutating tools).
56
+ function resolveRule(call) {
57
+ if (!hasRules) return { decision: null, rule: null, reason: null };
58
+ try {
59
+ const normalized = normalizeCall(call, { cwd: cwd || process.cwd() });
60
+ return resolvePermission(normalized, ruleLayers, { readonly, tiers: allowedTiers });
61
+ } catch {
62
+ return { decision: null, rule: null, reason: null };
63
+ }
64
+ }
65
+
30
66
  let uiCallbacks = null;
31
67
 
32
68
  function setUICallbacks(callbacks) {
@@ -124,20 +160,73 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
124
160
  }
125
161
  }
126
162
 
127
- async function askPermission(actionType, description, tag) {
128
- if (state.autoApproveAll) {
129
- _emitAutoApproved(description);
163
+ async function askPermission(actionType, description, tag, ruleVerdict = null) {
164
+ // --dangerously-skip-permissions is the ONLY way to fully auto-approve any
165
+ // tool call. It does not bypass the destructive-command deny-list (enforced
166
+ // unbypassably in tools.js) — it only skips the interactive/refusal gate.
167
+ // A per-pattern `deny` rule is handled in the agent gate BEFORE this point
168
+ // (it blocks even under skip-permissions); here we see only allow/ask/null.
169
+ if (skipPermissions) {
170
+ _emitAutoApproved(`[--dangerously-skip-permissions] ${description}`);
130
171
  return true;
131
172
  }
132
173
 
133
- if (tag && (autoApprovedTags.has(tag) || state.sessionApprovedTags.has(tag))) {
134
- _emitAutoApproved(description);
135
- return true;
174
+ // Per-pattern rules (Task 4.1). An `ask` rule FORCES the interactive prompt:
175
+ // it bypasses the auto-approve shortcuts below (tier flags, /approve, and the
176
+ // per-session "always") so a user policy of "ask for this" always holds. An
177
+ // `allow` rule auto-approves even what a tier wouldn't — but still composes
178
+ // with the deny-list / secret-guard / --readonly enforced downstream.
179
+ const ruleDecision = ruleVerdict && ruleVerdict.decision;
180
+ const forceAsk = ruleDecision === 'ask';
181
+
182
+ if (!forceAsk) {
183
+ if (ruleDecision === 'allow') {
184
+ _emitAutoApproved(`[rule${ruleVerdict.reason ? `: ${ruleVerdict.reason}` : ''}] ${description}`);
185
+ return true;
186
+ }
187
+
188
+ if (state.autoApproveAll) {
189
+ _emitAutoApproved(description);
190
+ return true;
191
+ }
192
+
193
+ if (tag && (autoApprovedTags.has(tag) || state.sessionApprovedTags.has(tag))) {
194
+ _emitAutoApproved(description);
195
+ return true;
196
+ }
197
+ }
198
+
199
+ // Programmatic approver (Task 5.2, SDK). When the process is embedded (no
200
+ // TTY) a host may supply an async approver — the programmatic equivalent of
201
+ // the interactive prompt. It is consulted ONLY when we would otherwise have
202
+ // to refuse for lack of a way to ask (no tier/rule/skip auto-approved above),
203
+ // so it never widens what a tier already granted, and an approver that throws
204
+ // or returns falsy means "no" (fail closed). With NO approver the safe
205
+ // default holds — refuse — exactly as headless does.
206
+ if (typeof approver === 'function') {
207
+ try {
208
+ const ok = await approver({ actionType, description, tag, rule: ruleVerdict || null });
209
+ return !!ok;
210
+ } catch {
211
+ return false;
212
+ }
136
213
  }
137
214
 
138
215
  if (!process.stdout.isTTY || !process.stdin.isTTY) {
139
- writer.scrollback(` [non-TTY] Auto-approving: ${description}`);
140
- return true;
216
+ // Non-TTY / headless mode. WITHOUT --dangerously-skip-permissions we no
217
+ // longer silently auto-approve — that was the security hole. A tier flag
218
+ // (--allow-fs/exec/net/all) pre-approves its tag above; anything reaching
219
+ // here would otherwise require interactive confirmation we cannot show,
220
+ // so we refuse it instead of approving it. `quiet` (set by the embedding
221
+ // SDK) suppresses the scrollback line — the denial is already surfaced to
222
+ // the host in the structured run result.
223
+ if (!quiet) {
224
+ writer.scrollback(
225
+ ` [non-TTY] Refused (interactive confirmation required, and ` +
226
+ `--dangerously-skip-permissions not set): ${description}`
227
+ );
228
+ }
229
+ return false;
141
230
  }
142
231
 
143
232
  if (uiCallbacks) {
@@ -209,6 +298,7 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
209
298
  captureSelect,
210
299
  clear,
211
300
  readonlyBlock,
301
+ resolveRule,
212
302
  setUICallbacks,
213
303
  state,
214
304
  toggleAll,
package/lib/pricing.js ADDED
@@ -0,0 +1,67 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Cost estimation (Task 2.6) — a per-model price table × token usage.
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // Prices are USD per 1,000,000 tokens. Self-hosted / local models have no
8
+ // published price; an unknown price yields cost === null, which the UI renders
9
+ // as "unknown" — NEVER a fake $0. Users extend/override the built-in table via
10
+ // `config.pricing` (same shape: { "<model>": { input, output } }).
11
+
12
+ const DEFAULT_PRICE_TABLE = {
13
+ 'gpt-4o': { input: 2.5, output: 10 },
14
+ 'gpt-4o-mini': { input: 0.15, output: 0.6 },
15
+ 'gpt-4.1': { input: 2, output: 8 },
16
+ 'gpt-4.1-mini': { input: 0.4, output: 1.6 },
17
+ 'o3-mini': { input: 1.1, output: 4.4 },
18
+ 'claude-3-5-sonnet': { input: 3, output: 15 },
19
+ 'claude-3-5-haiku': { input: 0.8, output: 4 },
20
+ };
21
+
22
+ function _normalize(entry) {
23
+ if (!entry || typeof entry !== 'object') return null;
24
+ const input = Number(entry.input);
25
+ const output = Number(entry.output);
26
+ if (!Number.isFinite(input) || !Number.isFinite(output) || input < 0 || output < 0) return null;
27
+ return { input, output };
28
+ }
29
+
30
+ // Resolve the price entry for a model. config `overrides` win over the built-in
31
+ // table. Matching: exact (case-insensitive) first, then substring with the
32
+ // longest (most specific) key winning — so "gpt-4o-mini" beats "gpt-4o".
33
+ // Returns { input, output } per-Mtok, or null when unknown.
34
+ function priceForModel(model, overrides) {
35
+ if (typeof model !== 'string' || !model) return null;
36
+ const table = { ...DEFAULT_PRICE_TABLE, ...(overrides || {}) };
37
+ const lower = model.toLowerCase();
38
+ for (const k of Object.keys(table)) {
39
+ if (k.toLowerCase() === lower) { const n = _normalize(table[k]); if (n) return n; }
40
+ }
41
+ const keys = Object.keys(table).sort((a, b) => b.length - a.length);
42
+ for (const k of keys) {
43
+ if (lower.includes(k.toLowerCase())) { const n = _normalize(table[k]); if (n) return n; }
44
+ }
45
+ return null;
46
+ }
47
+
48
+ // Cost in USD for a usage object given a price entry. A null/invalid price
49
+ // yields null (unknown) — never 0.
50
+ function computeCost(usage, price) {
51
+ const p = _normalize(price);
52
+ if (!p) return null;
53
+ const inTok = (usage && Number(usage.prompt_tokens)) || 0;
54
+ const outTok = (usage && Number(usage.completion_tokens)) || 0;
55
+ return (inTok / 1e6) * p.input + (outTok / 1e6) * p.output;
56
+ }
57
+
58
+ // Render a cost for display. null → "unknown"; otherwise a $-prefixed amount
59
+ // with extra precision for sub-cent costs.
60
+ function formatCost(cost) {
61
+ if (cost === null || cost === undefined || Number.isNaN(cost)) return 'unknown';
62
+ if (cost === 0) return '$0.00';
63
+ if (cost < 0.01) return '$' + cost.toFixed(6);
64
+ return '$' + cost.toFixed(4);
65
+ }
66
+
67
+ module.exports = { DEFAULT_PRICE_TABLE, priceForModel, computeCost, formatCost };