@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
@@ -0,0 +1,181 @@
1
+ 'use strict';
2
+
3
+ // Task W.6 — Bound shell/exec output entering the model context.
4
+ //
5
+ // THE BUG these tests pin: the entire shell stdout+stderr was fed to the model
6
+ // VERBATIM and UNBOUNDED. `max_output_lines` (50) was applied ONLY in the UI
7
+ // renderer (lib/ui/diff.js), NOT to the model-facing message — so `seq 1 5000`
8
+ // dumped ~6k tokens into context, every cat/find/diff/test/build potentially far
9
+ // more. The fix is a DOUBLE bound at the context boundary: a head+tail LINE cap
10
+ // plus a TOKEN safety net, with a notice that teaches the redirect-to-file → grep
11
+ // pattern. These tests assert what the MODEL receives (the audit's empirical
12
+ // method), both on the pure helper AND through the real agent loop — and that the
13
+ // exit code / failure signal is NEVER hidden by truncation.
14
+
15
+ const { test, before, after } = require('node:test');
16
+ const assert = require('node:assert');
17
+
18
+ const { capShellOutput } = require('../lib/agent');
19
+ const { DEFAULT_OUTPUT_MAX_TOKENS } = require('../lib/constants');
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Part A — pure model-facing bounding (capShellOutput)
23
+ // ---------------------------------------------------------------------------
24
+
25
+ function lines(n, prefix = 'L') {
26
+ return Array.from({ length: n }, (_, i) => `${prefix}${i + 1}`).join('\n');
27
+ }
28
+
29
+ test('many lines → head+tail bounded with an elision notice; BOTH ends present', () => {
30
+ const input = lines(200); // L1..L200
31
+ const r = capShellOutput(input, { maxLines: 50 });
32
+ assert.strictEqual(r.truncated, true);
33
+
34
+ // Head+tail of a 50-line budget = first 30 + last 20.
35
+ assert.match(r.text, /^L1\n/, 'first line (head) present');
36
+ assert.match(r.text, /(^|\n)L30(\n|$)/, 'end of head present');
37
+ assert.match(r.text, /(^|\n)L200$/, 'last line (tail) present — NOT head-only');
38
+ assert.match(r.text, /(^|\n)L181(\n|$)/, 'start of tail present');
39
+
40
+ // The middle is elided.
41
+ assert.doesNotMatch(r.text, /(^|\n)L100(\n|$)/, 'a middle line is elided');
42
+
43
+ // The notice states the split and the elided count (200 - 30 - 20 = 150).
44
+ assert.match(r.text, /150 line\(s\) elided/);
45
+ assert.match(r.text, /showing first 30 \+ last 20 of 200/);
46
+
47
+ // The model-facing text is far smaller than the raw output.
48
+ assert.ok(r.text.length < input.length / 2, 'bounded well below the original');
49
+ });
50
+
51
+ test('PAIRED POSITIVE: under-budget output is shown in FULL with no notice', () => {
52
+ const input = lines(10); // L1..L10, well under the 50-line budget
53
+ const r = capShellOutput(input, { maxLines: 50 });
54
+ assert.strictEqual(r.truncated, false);
55
+ assert.strictEqual(r.text, input, 'byte-for-byte unchanged — "bounded" is distinguishable from "broke"');
56
+ assert.doesNotMatch(r.text, /elided/);
57
+ assert.doesNotMatch(r.text, /token-capped/);
58
+ });
59
+
60
+ test('single ENORMOUS line → the token safety net caps it (line cap alone cannot)', () => {
61
+ // One line, so the line cap is a no-op (1 ≤ maxLines). Tokens ≈ 50k at char/4,
62
+ // well over the default 10k-token budget → the token net must fire.
63
+ const huge = 'x'.repeat(200000);
64
+ const r = capShellOutput(huge, { maxLines: 50 });
65
+ assert.strictEqual(r.truncated, true);
66
+ assert.doesNotMatch(r.text, /line\(s\) elided/, 'not a line-cap truncation — it is one line');
67
+ assert.match(r.text, /token-capped/, 'token safety net fired');
68
+ // Bounded to ~the token budget in chars (10k tokens * 4 chars ≈ 40k), far
69
+ // below the 200k original.
70
+ assert.ok(r.text.length < huge.length / 4, `token-bounded: ${r.text.length} << ${huge.length}`);
71
+ });
72
+
73
+ test('the elision notice teaches the redirect-to-file → grep pattern', () => {
74
+ const r = capShellOutput(lines(200), { maxLines: 50 });
75
+ assert.match(r.text, /redirect/i, 'notice mentions redirecting');
76
+ assert.match(r.text, /grep/i, 'notice steers toward grep (the W.5-enabled pattern)');
77
+ });
78
+
79
+ test('the token-cap notice also teaches redirect → grep', () => {
80
+ const r = capShellOutput('x'.repeat(200000), { maxLines: 50 });
81
+ assert.match(r.text, /redirect/i);
82
+ assert.match(r.text, /grep/i);
83
+ });
84
+
85
+ test('defaults are used when no budgets are passed', () => {
86
+ // Default line budget is 50; 60 lines must truncate.
87
+ const r = capShellOutput(lines(60));
88
+ assert.strictEqual(r.truncated, true);
89
+ assert.match(r.text, /elided/);
90
+ // And the default token budget is the documented constant.
91
+ assert.ok(DEFAULT_OUTPUT_MAX_TOKENS > 0);
92
+ });
93
+
94
+ test('non-string / empty input is handled without throwing', () => {
95
+ assert.deepStrictEqual(capShellOutput('', { maxLines: 50 }), { text: '', truncated: false });
96
+ assert.deepStrictEqual(capShellOutput(null, { maxLines: 50 }), { text: '', truncated: false });
97
+ });
98
+
99
+ // ---------------------------------------------------------------------------
100
+ // Part B — end-to-end through the REAL agent loop: a <shell> tag's output must
101
+ // reach the model BOUNDED (not just the UI), and the exit code must survive.
102
+ // ---------------------------------------------------------------------------
103
+
104
+ const ui = require('../lib/ui');
105
+ const { createApiClient } = require('../lib/api');
106
+ const { createToolExecutor, extractToolCalls } = require('../lib/tools');
107
+ const { createPermissionManager } = require('../lib/permissions');
108
+ const { createAgentRunner } = require('../lib/agent');
109
+ const { startMockLLM } = require('./harness/mock-llm');
110
+
111
+ let prevKey;
112
+ before(() => {
113
+ prevKey = process.env.SEMALT_API_KEY;
114
+ process.env.SEMALT_API_KEY = 'test-key';
115
+ });
116
+ after(() => {
117
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
118
+ else process.env.SEMALT_API_KEY = prevKey;
119
+ });
120
+
121
+ function buildRunner(base) {
122
+ const config = {
123
+ api_base: base, api_key: 'test-key', default_model: 'test-model',
124
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
125
+ sandbox: { mode: 'off' },
126
+ max_output_lines: 50,
127
+ max_output_tokens: DEFAULT_OUTPUT_MAX_TOKENS,
128
+ };
129
+ const getConfig = () => config;
130
+ const saveConfig = (c) => Object.assign(config, c);
131
+ const api = createApiClient({ getConfig, saveConfig, ui });
132
+ const pm = createPermissionManager(ui, { skipPermissions: true });
133
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
134
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
135
+ return createAgentRunner({
136
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
137
+ describePermission, permissionManager: pm, ui, getConfig,
138
+ });
139
+ }
140
+
141
+ async function runShellTurn(toolTag) {
142
+ const mock = await startMockLLM();
143
+ mock.replyWith(toolTag);
144
+ mock.replyWith('Done.');
145
+ try {
146
+ const runner = buildRunner(mock.base);
147
+ const cb = {
148
+ onToken: () => {}, onToolStart: () => {}, onToolEnd: () => {},
149
+ onError: () => {}, onRetry: () => {}, onAssistantMessage: () => {},
150
+ };
151
+ const messages = [{ role: 'user', content: 'go' }];
152
+ await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
153
+ const fedBack = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
154
+ return fedBack ? fedBack.content : '';
155
+ } finally {
156
+ await mock.close?.();
157
+ }
158
+ }
159
+
160
+ // A 200-line generator that contains NO '<' (which would break XML tag parsing).
161
+ const GEN_200 = 'node -e "var a=[];for(var i=1;i!==201;i++)a.push(i);console.log(a.join(String.fromCharCode(10)))"';
162
+
163
+ test('REAL loop: large shell output reaches the model BOUNDED (head+tail), not verbatim', async () => {
164
+ const content = await runShellTurn(`<shell>${GEN_200}</shell>`);
165
+ assert.match(content, /Exit code: 0/, 'exit code surfaced');
166
+ // Head+tail, not head-only: both 1 (head) and 200 (tail) present.
167
+ assert.match(content, /(^|\n)1(\n|$)/, 'first output line present');
168
+ assert.match(content, /(^|\n)200(\n|$)/, 'last output line present (tail)');
169
+ // The middle is elided with the teaching notice.
170
+ assert.match(content, /line\(s\) elided/);
171
+ assert.match(content, /redirect/i);
172
+ assert.match(content, /grep/i);
173
+ assert.doesNotMatch(content, /(^|\n)100(\n|$)/, 'a middle line is elided from context');
174
+ });
175
+
176
+ test('REAL loop: a FAILING command with large output still surfaces the non-zero exit', async () => {
177
+ const failing = 'node -e "var a=[];for(var i=1;i!==201;i++)a.push(i);console.log(a.join(String.fromCharCode(10)));process.exit(3)"';
178
+ const content = await runShellTurn(`<shell>${failing}</shell>`);
179
+ assert.match(content, /Exit code: 3/, 'non-zero exit is NOT hidden by truncation');
180
+ assert.match(content, /line\(s\) elided/, 'output still bounded');
181
+ });
@@ -0,0 +1,110 @@
1
+ 'use strict';
2
+
3
+ // End-to-end (via the chat harness) for skills (Task 3.5). Proves the
4
+ // progressive-disclosure contract at the loop level: a skill discovered at chat
5
+ // startup contributes ONLY its metadata to the system prompt, and its body is
6
+ // loaded into context ONLY when the skill is invoked as `/<skill>` — at which
7
+ // point the rendered body is submitted to the agent as a user prompt (never
8
+ // executed as code). The harness redirects $HOME to a temp dir before any lib
9
+ // module loads, so we stage skill folders under that temp global dir.
10
+
11
+ const { test } = require('node:test');
12
+ const assert = require('node:assert');
13
+ const fs = require('node:fs');
14
+ const path = require('node:path');
15
+
16
+ const { startChat } = require('./harness/chat-harness');
17
+ const { clearSkills } = require('../lib/commands/registry');
18
+ const { getSystemPrompt } = require('../lib/prompts');
19
+
20
+ const GLOBAL_SKILLS_DIR = path.join(process.env.HOME, '.semalt-ai', 'skills');
21
+
22
+ function stage(slug, content) {
23
+ const dir = path.join(GLOBAL_SKILLS_DIR, slug);
24
+ fs.mkdirSync(dir, { recursive: true });
25
+ fs.writeFileSync(path.join(dir, 'SKILL.md'), content);
26
+ }
27
+ function clearStaged() {
28
+ try { fs.rmSync(GLOBAL_SKILLS_DIR, { recursive: true, force: true }); } catch {}
29
+ clearSkills();
30
+ }
31
+
32
+ test('skill is discovered at startup and announced', async () => {
33
+ clearStaged();
34
+ stage('deep-research', '---\nname: Deep Research\ndescription: Research things\n---\nSECRET_SKILL_BODY');
35
+ const c = await startChat({ config: { auth_token: 'tok' } });
36
+ try {
37
+ assert.ok(c.chatHistory.find(/Loaded 1 skill\(s\): \/deep-research/), 'startup announces the skill');
38
+ } finally {
39
+ await c.submit('exit'); await c.done; c.cleanup(); clearStaged();
40
+ }
41
+ });
42
+
43
+ test('metadata is in the prompt but the body is NOT until invocation', async () => {
44
+ clearStaged();
45
+ stage('deep-research', '---\nname: Deep Research\ndescription: Research things\n---\nSECRET_SKILL_BODY full instructions');
46
+ const c = await startChat({ config: { auth_token: 'tok' } });
47
+ try {
48
+ // System prompt (auto-loaded) carries the metadata but not the body.
49
+ const prompt = getSystemPrompt(false);
50
+ assert.ok(prompt.includes('/deep-research'), 'metadata (invocation token) is in the prompt');
51
+ assert.ok(prompt.includes('Research things'), 'description is in the prompt');
52
+ assert.ok(!prompt.includes('SECRET_SKILL_BODY'), 'body is NOT in the prompt');
53
+
54
+ // Invoke the skill → the body is loaded and submitted as a user prompt.
55
+ await c.submit('/deep-research the topic');
56
+ assert.strictEqual(c.calls.runAgentLoop.length, 1, 'agent invoked once');
57
+ const turn = c.calls.runAgentLoop[0];
58
+ const userMsgs = turn.messages.filter((m) => m.role === 'user').map((m) => m.content);
59
+ assert.strictEqual(userMsgs.length, 1);
60
+ assert.ok(userMsgs[0].includes('SECRET_SKILL_BODY'), 'body enters context ONLY on invocation');
61
+ // Assets directory is surfaced so the agent can find skill scripts.
62
+ assert.ok(/Skill assets directory:/.test(userMsgs[0]), 'skill dir surfaced to the agent');
63
+ } finally {
64
+ await c.submit('exit'); await c.done; c.cleanup(); clearStaged();
65
+ }
66
+ });
67
+
68
+ test('skill body supports $ARGUMENTS substitution on invocation', async () => {
69
+ clearStaged();
70
+ stage('greet', '---\ndescription: greet\n---\nSay hi to $ARGUMENTS now');
71
+ const c = await startChat({ config: { auth_token: 'tok' } });
72
+ try {
73
+ await c.submit('/greet the world');
74
+ const turn = c.calls.runAgentLoop[0];
75
+ const userMsg = turn.messages.find((m) => m.role === 'user').content;
76
+ assert.ok(userMsg.startsWith('Say hi to the world now'), 'arguments rendered into the body');
77
+ } finally {
78
+ await c.submit('exit'); await c.done; c.cleanup(); clearStaged();
79
+ }
80
+ });
81
+
82
+ test('a built-in is never overridden by a same-named skill', async () => {
83
+ clearStaged();
84
+ // slug "clear" collides with the built-in /clear command.
85
+ stage('clear', '---\ndescription: nope\n---\nthis should never run as a prompt');
86
+ const c = await startChat({ config: { auth_token: 'tok' } });
87
+ try {
88
+ assert.ok(c.chatHistory.find(/\/clear.*already in use/i), 'collision warning shown');
89
+ await c.submit('a message');
90
+ const before = c.calls.runAgentLoop.length;
91
+ await c.submit('/clear');
92
+ assert.ok(c.chatHistory.find(/cleared/i), 'built-in /clear executed');
93
+ assert.strictEqual(c.calls.runAgentLoop.length, before, 'skill /clear did not invoke the agent');
94
+ } finally {
95
+ await c.submit('exit'); await c.done; c.cleanup(); clearStaged();
96
+ }
97
+ });
98
+
99
+ test('invoking a skill while logged out goes through the auth gate', async () => {
100
+ clearStaged();
101
+ stage('do-thing', '---\ndescription: d\n---\nInstructions body');
102
+ const c = await startChat({ config: { auth_token: '' } });
103
+ try {
104
+ await c.submit('/do-thing');
105
+ assert.ok(c.chatHistory.find(/Not logged in/), 'rendered skill prompt goes through the auth-gated agent path');
106
+ assert.strictEqual(c.calls.runAgentLoop.length, 0, 'agent not invoked while unauthenticated');
107
+ } finally {
108
+ await c.submit('exit'); await c.done; c.cleanup(); clearStaged();
109
+ }
110
+ });
@@ -0,0 +1,295 @@
1
+ 'use strict';
2
+
3
+ // Tests for skills (Task 3.5): discovery, the load-bearing PROGRESSIVE
4
+ // DISCLOSURE behavior (metadata-only in the prompt, body loaded only on
5
+ // invocation), project-over-global precedence, repo-root-bounded discovery,
6
+ // size bounding, the byte-for-byte-unchanged prompt when absent, and registry
7
+ // registration. Filesystem state is isolated to per-test temp directories.
8
+
9
+ const { test } = require('node:test');
10
+ const assert = require('node:assert');
11
+ const fs = require('node:fs');
12
+ const os = require('node:os');
13
+ const path = require('node:path');
14
+
15
+ const {
16
+ parseSkillFrontmatter,
17
+ discoverSkills,
18
+ loadSkillBody,
19
+ loadSkills,
20
+ findProjectSkillsDir,
21
+ skillsStatusLines,
22
+ } = require('../lib/skills');
23
+ const { getSystemPrompt } = require('../lib/prompts');
24
+ const {
25
+ registerSkills,
26
+ clearSkills,
27
+ skillCommands,
28
+ resolveCommand,
29
+ completionNames,
30
+ helpText,
31
+ commandNames,
32
+ } = require('../lib/commands/registry');
33
+
34
+ function tmp(prefix) { return fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), prefix))); }
35
+ function rmrf(p) { try { fs.rmSync(p, { recursive: true, force: true }); } catch {} }
36
+ // Stage a skill folder <root>/<slug>/SKILL.md with given content.
37
+ function writeSkill(root, slug, content) {
38
+ const dir = path.join(root, slug);
39
+ fs.mkdirSync(dir, { recursive: true });
40
+ fs.writeFileSync(path.join(dir, 'SKILL.md'), content);
41
+ return dir;
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Frontmatter parsing
46
+ // ---------------------------------------------------------------------------
47
+
48
+ test('parseSkillFrontmatter: name/description parsed; body follows', () => {
49
+ const src = '---\nname: Code Review\ndescription: Review a diff for bugs\n---\nDo the review carefully.';
50
+ const { meta, body } = parseSkillFrontmatter(src);
51
+ assert.strictEqual(meta.name, 'Code Review');
52
+ assert.strictEqual(meta.description, 'Review a diff for bugs');
53
+ assert.strictEqual(body, 'Do the review carefully.');
54
+ });
55
+
56
+ test('parseSkillFrontmatter: no frontmatter → whole text is the body', () => {
57
+ const { meta, body } = parseSkillFrontmatter('just instructions');
58
+ assert.strictEqual(meta.name, '');
59
+ assert.strictEqual(meta.description, '');
60
+ assert.strictEqual(body, 'just instructions');
61
+ });
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Discovery (global / project / precedence / repo-root bound)
65
+ // ---------------------------------------------------------------------------
66
+
67
+ test('discoverSkills: global skill from ~/.semalt-ai/skills/<name>/SKILL.md', () => {
68
+ const home = tmp('semalt-skhome-');
69
+ try {
70
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'deep-research',
71
+ '---\nname: Deep Research\ndescription: Multi-source research\n---\nSECRET_BODY_MARKER');
72
+ const skills = discoverSkills({ home, cwd: home });
73
+ assert.strictEqual(skills.length, 1);
74
+ const s = skills[0];
75
+ assert.strictEqual(s.name, '/deep-research');
76
+ assert.strictEqual(s.slug, 'deep-research');
77
+ assert.strictEqual(s.displayName, 'Deep Research');
78
+ assert.strictEqual(s.description, 'Multi-source research');
79
+ assert.strictEqual(s.source, 'global');
80
+ } finally { rmrf(home); }
81
+ });
82
+
83
+ test('discoverSkills: project skill from nearest .semalt/skills, repo-root walk', () => {
84
+ const home = tmp('semalt-skhome-');
85
+ const repo = tmp('semalt-skrepo-');
86
+ try {
87
+ fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
88
+ const sub = path.join(repo, 'src', 'deep');
89
+ fs.mkdirSync(sub, { recursive: true });
90
+ writeSkill(path.join(repo, '.semalt', 'skills'), 'deploy', '---\ndescription: Deploy\n---\nbody');
91
+ const skills = discoverSkills({ home, cwd: sub });
92
+ assert.strictEqual(skills.length, 1);
93
+ assert.strictEqual(skills[0].name, '/deploy');
94
+ assert.strictEqual(skills[0].source, 'project');
95
+ } finally { rmrf(home); rmrf(repo); }
96
+ });
97
+
98
+ test('discoverSkills: project overrides global on slug collision; global-only survives', () => {
99
+ const home = tmp('semalt-skhome-');
100
+ const repo = tmp('semalt-skrepo-');
101
+ try {
102
+ fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
103
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'review', '---\ndescription: GLOBAL\n---\nglobal body');
104
+ writeSkill(path.join(repo, '.semalt', 'skills'), 'review', '---\ndescription: PROJECT\n---\nproject body');
105
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'onlyglobal', '---\ndescription: g-only\n---\nbody');
106
+ const skills = discoverSkills({ home, cwd: repo });
107
+ const review = skills.find((s) => s.name === '/review');
108
+ assert.strictEqual(review.description, 'PROJECT');
109
+ assert.strictEqual(review.source, 'project');
110
+ assert.ok(skills.find((s) => s.name === '/onlyglobal'), 'global-only skill still surfaces');
111
+ } finally { rmrf(home); rmrf(repo); }
112
+ });
113
+
114
+ test('findProjectSkillsDir: bounded by repo root — does not escape above .git', () => {
115
+ const outer = tmp('semalt-skouter-');
116
+ try {
117
+ fs.mkdirSync(path.join(outer, '.semalt', 'skills'), { recursive: true });
118
+ const repo = path.join(outer, 'repo');
119
+ fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
120
+ const sub = path.join(repo, 'a', 'b');
121
+ fs.mkdirSync(sub, { recursive: true });
122
+ assert.strictEqual(findProjectSkillsDir(sub), null);
123
+ } finally { rmrf(outer); }
124
+ });
125
+
126
+ test('discoverSkills: folders without SKILL.md are skipped', () => {
127
+ const home = tmp('semalt-skhome-');
128
+ try {
129
+ fs.mkdirSync(path.join(home, '.semalt-ai', 'skills', 'not-a-skill'), { recursive: true });
130
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'real', '---\ndescription: d\n---\nb');
131
+ const skills = discoverSkills({ home, cwd: home });
132
+ assert.deepStrictEqual(skills.map((s) => s.name), ['/real']);
133
+ } finally { rmrf(home); }
134
+ });
135
+
136
+ // ---------------------------------------------------------------------------
137
+ // PROGRESSIVE DISCLOSURE — the load-bearing behavior
138
+ // ---------------------------------------------------------------------------
139
+
140
+ test('discovery returns METADATA ONLY — no body field', () => {
141
+ const home = tmp('semalt-skhome-');
142
+ try {
143
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'x',
144
+ '---\nname: X\ndescription: d\n---\nSECRET_BODY_MARKER');
145
+ const [s] = discoverSkills({ home, cwd: home });
146
+ assert.ok(!('body' in s), 'spec carries no body');
147
+ assert.ok(!JSON.stringify(s).includes('SECRET_BODY_MARKER'), 'body text is absent from the spec');
148
+ } finally { rmrf(home); }
149
+ });
150
+
151
+ test('the metadata block holds name+description but NOT the body', () => {
152
+ const home = tmp('semalt-skhome-');
153
+ try {
154
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'researcher',
155
+ '---\nname: Researcher\ndescription: research things\n---\nSECRET_BODY_MARKER with full instructions');
156
+ const { block } = loadSkills({ home, cwd: home });
157
+ assert.ok(block.includes('/researcher'), 'invocation token present');
158
+ assert.ok(block.includes('research things'), 'description present');
159
+ assert.ok(block.includes('<<<SKILLS>>>') && block.includes('<<<END_SKILLS>>>'));
160
+ assert.ok(!block.includes('SECRET_BODY_MARKER'), 'BODY is NOT injected into the prompt block');
161
+ } finally { rmrf(home); }
162
+ });
163
+
164
+ test('loadSkillBody reads the body ON DEMAND (the invocation-time read)', () => {
165
+ const home = tmp('semalt-skhome-');
166
+ try {
167
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'researcher',
168
+ '---\nname: Researcher\ndescription: research\n---\nSECRET_BODY_MARKER with full instructions');
169
+ const [s] = discoverSkills({ home, cwd: home });
170
+ const body = loadSkillBody(s);
171
+ assert.ok(body.includes('SECRET_BODY_MARKER'), 'body loaded only when explicitly requested');
172
+ assert.ok(!body.includes('description: research'), 'frontmatter stripped from the loaded body');
173
+ } finally { rmrf(home); }
174
+ });
175
+
176
+ // ---------------------------------------------------------------------------
177
+ // Absent skills → prompt byte-for-byte unchanged
178
+ // ---------------------------------------------------------------------------
179
+
180
+ test('no skills present → empty block, empty list', () => {
181
+ const home = tmp('semalt-emptyhome-');
182
+ const dir = tmp('semalt-noskills-');
183
+ try {
184
+ const r = loadSkills({ home, cwd: dir });
185
+ assert.strictEqual(r.block, '');
186
+ assert.deepStrictEqual(r.skills, []);
187
+ assert.strictEqual(r.truncated, false);
188
+ } finally { rmrf(home); rmrf(dir); }
189
+ });
190
+
191
+ test('getSystemPrompt is byte-for-byte the base prompt when skills (and memory) empty', () => {
192
+ // Both extras explicit-empty → exactly the base template.
193
+ const base = getSystemPrompt(false, '', '');
194
+ assert.ok(!base.includes('<<<SKILLS>>>'), 'no skills section in the base prompt');
195
+ // Skills are appended verbatim AFTER memory — proves append-only + empty == base.
196
+ assert.strictEqual(getSystemPrompt(false, '', '\n\nSKILLBLOCK'), base + '\n\nSKILLBLOCK');
197
+ // Native template too.
198
+ const nbase = getSystemPrompt(true, '', '');
199
+ assert.strictEqual(getSystemPrompt(true, '', '\n\nX'), nbase + '\n\nX');
200
+ });
201
+
202
+ test('getSystemPrompt order is base + memory + skills', () => {
203
+ const base = getSystemPrompt(false, '', '');
204
+ assert.strictEqual(getSystemPrompt(false, '\n\nMEM', '\n\nSKL'), base + '\n\nMEM' + '\n\nSKL');
205
+ });
206
+
207
+ // ---------------------------------------------------------------------------
208
+ // Size bounding
209
+ // ---------------------------------------------------------------------------
210
+
211
+ test('oversized skills metadata is truncated with a visible notice', () => {
212
+ const home = tmp('semalt-skhome-');
213
+ try {
214
+ for (let i = 0; i < 20; i++) {
215
+ writeSkill(path.join(home, '.semalt-ai', 'skills'), 'skill-' + i,
216
+ `---\nname: Skill ${i}\ndescription: ${'D'.repeat(200)}\n---\nbody`);
217
+ }
218
+ const r = loadSkills({ home, cwd: home, maxBytes: 200 });
219
+ assert.strictEqual(r.truncated, true);
220
+ assert.ok(/truncated/i.test(r.block), 'block carries a truncation notice');
221
+ } finally { rmrf(home); }
222
+ });
223
+
224
+ // ---------------------------------------------------------------------------
225
+ // Status lines (/skills view)
226
+ // ---------------------------------------------------------------------------
227
+
228
+ test('skillsStatusLines: empty + populated', () => {
229
+ assert.match(skillsStatusLines({ skills: [] }).join('\n'), /No skills found/);
230
+ const lines = skillsStatusLines({
231
+ skills: [{ name: '/review', source: 'project', description: 'do a review' }],
232
+ truncated: false,
233
+ }).join('\n');
234
+ assert.ok(lines.includes('/review'));
235
+ assert.ok(lines.includes('[project]'));
236
+ assert.ok(/bodies load on invocation/i.test(lines));
237
+ });
238
+
239
+ // ---------------------------------------------------------------------------
240
+ // Registry registration
241
+ // ---------------------------------------------------------------------------
242
+
243
+ test('registerSkills: skill resolves and completes; built-ins win on collision', () => {
244
+ clearSkills();
245
+ try {
246
+ const { registered, warnings } = registerSkills([
247
+ { name: '/research', slug: 'research', description: 'Research', skillPath: '/x/SKILL.md', dir: '/x', source: 'global' },
248
+ { name: '/model', slug: 'model', skillPath: '/y/SKILL.md', source: 'global' }, // collides with built-in
249
+ ]);
250
+ assert.strictEqual(registered.length, 1, 'only the non-colliding skill registers');
251
+ assert.strictEqual(registered[0].name, '/research');
252
+ assert.strictEqual(warnings.length, 1);
253
+ assert.match(warnings[0], /\/model/);
254
+
255
+ // /model still resolves to the built-in, not the skill.
256
+ const m = resolveCommand('/model');
257
+ assert.ok(!m.spec.skill, 'built-in /model not shadowed by a skill');
258
+
259
+ // /research resolves to the skill spec carrying its path (NOT a body).
260
+ const r = resolveCommand('/research some topic');
261
+ assert.strictEqual(r.name, '/research');
262
+ assert.strictEqual(r.arg, 'some topic');
263
+ assert.ok(r.spec.skill, 'skill flagged on the spec');
264
+ assert.strictEqual(r.spec.skillPath, '/x/SKILL.md');
265
+ assert.ok(!('body' in r.spec) && !('template' in r.spec), 'spec carries no body/template');
266
+
267
+ // Bare invocation (no arg) also resolves (optional-arg behavior).
268
+ assert.strictEqual(resolveCommand('/research').name, '/research');
269
+
270
+ // Completion + help surface the skill under its own heading.
271
+ assert.ok(completionNames().includes('/research'));
272
+ assert.match(helpText(), /Skills:/);
273
+ assert.match(helpText(), /\/research Research/);
274
+
275
+ // Parity name list stays built-ins only (skills handled inline, no handler).
276
+ assert.ok(!commandNames().includes('/research'));
277
+ } finally { clearSkills(); }
278
+ });
279
+
280
+ test('registerSkills replaces the prior set; skillCommands reflects current', () => {
281
+ clearSkills();
282
+ try {
283
+ registerSkills([{ name: '/one', slug: 'one', skillPath: '/a' }]);
284
+ assert.ok(resolveCommand('/one'));
285
+ assert.strictEqual(skillCommands().length, 1);
286
+ registerSkills([{ name: '/two', slug: 'two', skillPath: '/b' }]);
287
+ assert.strictEqual(resolveCommand('/one'), null, 'prior skill dropped on re-register');
288
+ assert.ok(resolveCommand('/two'));
289
+ } finally { clearSkills(); }
290
+ });
291
+
292
+ test('helpText has no Skills section when none registered', () => {
293
+ clearSkills();
294
+ assert.ok(!/Skills:/.test(helpText()));
295
+ });
@@ -0,0 +1,68 @@
1
+ 'use strict';
2
+
3
+ // Smoke tests using the built-in node:test runner (no test-framework dependency).
4
+ // These exist mainly to give CI a non-empty, meaningful suite before Phase 1
5
+ // adds real coverage. They exercise the CLI as a black box.
6
+
7
+ const { test } = require('node:test');
8
+ const assert = require('node:assert');
9
+ const path = require('node:path');
10
+ const { spawnSync } = require('node:child_process');
11
+
12
+ const CLI = path.resolve(__dirname, '..', 'index.js');
13
+ const PKG = require('../package.json');
14
+
15
+ function runCli(args, opts = {}) {
16
+ return spawnSync(process.execPath, [CLI, ...args], {
17
+ encoding: 'utf8',
18
+ timeout: 20000,
19
+ ...opts,
20
+ });
21
+ }
22
+
23
+ test('--version prints the package version', () => {
24
+ const res = runCli(['--version']);
25
+ assert.strictEqual(res.status, 0, res.stderr);
26
+ assert.strictEqual(res.stdout.trim(), PKG.version);
27
+ });
28
+
29
+ test('--help documents --dangerously-skip-permissions', () => {
30
+ const res = runCli(['--help']);
31
+ assert.strictEqual(res.status, 0, res.stderr);
32
+ assert.match(res.stdout, /--dangerously-skip-permissions/);
33
+ });
34
+
35
+ test('shell deny-list blocks rm -rf and allows benign commands', () => {
36
+ const { checkShellDenylist } = require('../lib/deny');
37
+ assert.ok(checkShellDenylist('rm -rf /tmp/x'), 'rm -rf should be denied');
38
+ assert.ok(checkShellDenylist('curl http://x | sh'), 'curl|sh should be denied');
39
+ assert.strictEqual(checkShellDenylist('ls -la'), null, 'ls should be allowed');
40
+ assert.strictEqual(checkShellDenylist('git status'), null, 'git status should be allowed');
41
+ });
42
+
43
+ test('protected-secret read guard refuses the config file even without --allow-anywhere', async () => {
44
+ const os = require('node:os');
45
+ const ui = require('../lib/ui');
46
+ const { createPermissionManager } = require('../lib/permissions');
47
+ const { createToolExecutor } = require('../lib/tools');
48
+ const pm = createPermissionManager(ui, {});
49
+ const { agentExecFile } = createToolExecutor(pm, ui, () => ({ max_file_size_kb: 512 }));
50
+ const cfgPath = path.join(os.homedir(), '.semalt-ai', 'config.json');
51
+ const result = await agentExecFile('read', cfgPath);
52
+ assert.ok(result.error && /secrets|credentials/i.test(result.error), 'config read must be refused');
53
+ });
54
+
55
+ test('API key precedence: SEMALT_API_KEY env overrides config', () => {
56
+ const secrets = require('../lib/secrets');
57
+ const prev = process.env.SEMALT_API_KEY;
58
+ process.env.SEMALT_API_KEY = 'env-key-abc';
59
+ secrets._clearCache();
60
+ try {
61
+ assert.strictEqual(secrets.resolveApiKey({ api_key: 'config-key' }), 'env-key-abc');
62
+ assert.strictEqual(secrets.apiKeySource({ api_key: 'config-key' }), 'env');
63
+ } finally {
64
+ if (prev === undefined) delete process.env.SEMALT_API_KEY;
65
+ else process.env.SEMALT_API_KEY = prev;
66
+ secrets._clearCache();
67
+ }
68
+ });