@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
@@ -0,0 +1,181 @@
1
+ 'use strict';
2
+
3
+ // Integration tests for checkpoints (Task 4.3) driving the REAL executor +
4
+ // runAgentLoop (and a REAL subagent loop) against the mock-LLM harness. Covers:
5
+ // * a write through the real loop is checkpointed post-gate/pre-mutation, with
6
+ // turn linkage, and is rewindable;
7
+ // * a DENIED tool call produces NO checkpoint;
8
+ // * a SUBAGENT's mutation is checkpointed into the PARENT session and is
9
+ // rewindable (subagents reuse the parent's agentExecFile).
10
+
11
+ const { test, before, after, afterEach } = require('node:test');
12
+ const assert = require('node:assert');
13
+ const fs = require('fs');
14
+ const os = require('os');
15
+ const path = require('path');
16
+
17
+ const ui = require('../lib/ui');
18
+ const { createApiClient } = require('../lib/api');
19
+ const { createToolExecutor, extractToolCalls } = require('../lib/tools');
20
+ const { createPermissionManager } = require('../lib/permissions');
21
+ const { createAgentRunner } = require('../lib/agent');
22
+ const { createCheckpointStore } = require('../lib/checkpoints');
23
+ const toolRegistry = require('../lib/tool_registry');
24
+ const { createSubagentManager, buildSpawnAgentEntry } = require('../lib/subagents');
25
+ const { startMockLLM } = require('./harness/mock-llm');
26
+
27
+ let prevKey;
28
+ before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
29
+ after(() => {
30
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
31
+ else process.env.SEMALT_API_KEY = prevKey;
32
+ });
33
+ afterEach(() => { toolRegistry.clearDynamicTools(); });
34
+
35
+ function tmpdir(tag = 'work') { return fs.mkdtempSync(path.join(os.tmpdir(), `semalt-cpa-${tag}-`)); }
36
+
37
+ // Build a full stack (api + permissions + executor wired with a real checkpoint
38
+ // store + agent runner). Optionally wires a subagent manager sharing the SAME
39
+ // executor, so a child's mutations flow through the same checkpoint store.
40
+ function buildStack(base, { skipPermissions = false, withSubagents = false } = {}) {
41
+ const config = {
42
+ api_base: base, api_key: 'test-key', default_model: 'test-model',
43
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
44
+ checkpoints: { enabled: true },
45
+ };
46
+ const getConfig = () => config;
47
+ const api = createApiClient({ getConfig, saveConfig: (c) => Object.assign(config, c), ui });
48
+ const pm = createPermissionManager(ui, { skipPermissions });
49
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
50
+
51
+ const root = tmpdir('cproot');
52
+ const checkpointStore = createCheckpointStore({
53
+ getConfig, sessionId: 'agent-sess', rootDir: root, audit: { logCheckpoint: () => {} },
54
+ });
55
+
56
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig, { checkpointStore });
57
+ const runner = createAgentRunner({
58
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
59
+ describePermission, permissionManager: pm, ui, getConfig, checkpoints: checkpointStore,
60
+ });
61
+
62
+ let manager = null;
63
+ if (withSubagents) {
64
+ manager = createSubagentManager({
65
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
66
+ describePermission, permissionManager: pm, ui, getConfig, agentDefs: [],
67
+ });
68
+ toolRegistry.registerDynamicTool(buildSpawnAgentEntry(manager));
69
+ }
70
+ return { runner, manager, checkpointStore, root, config };
71
+ }
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // 1. A write through the real loop is checkpointed (post-gate) and rewindable
75
+ // ---------------------------------------------------------------------------
76
+
77
+ test('a top-level write is checkpointed with turn linkage and is rewindable', async () => {
78
+ const dir = tmpdir();
79
+ const file = path.join(dir, 'note.txt');
80
+ fs.writeFileSync(file, 'ORIGINAL');
81
+
82
+ // isPathSafe confines writes to the CWD — point it at the work dir.
83
+ const cwd0 = process.cwd();
84
+ process.chdir(dir);
85
+ const mock = await startMockLLM();
86
+ mock.replyWith(`<write_file path="${file}">REWRITTEN</write_file>`); // iter 0: the mutation
87
+ mock.replyWith('Done.'); // iter 1: final
88
+ try {
89
+ const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: true });
90
+ const messages = [{ role: 'user', content: 'rewrite the note' }];
91
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: { onError: () => {} } });
92
+
93
+ assert.strictEqual(fs.readFileSync(file, 'utf8'), 'REWRITTEN', 'the write happened');
94
+
95
+ const items = checkpointStore.list();
96
+ assert.strictEqual(items.length, 1, 'one checkpoint captured for the write');
97
+ const rec = checkpointStore._loadRecord(items[0].seq);
98
+ assert.strictEqual(rec.action, 'write');
99
+ assert.strictEqual(rec.targets[0].path, file);
100
+ // prior state was captured BEFORE the mutation
101
+ assert.strictEqual(Buffer.from(rec.targets[0].priorContentB64, 'base64').toString('utf8'), 'ORIGINAL');
102
+ // turn linkage recorded (forward-compat for conversation-rewind, Task 4.3b)
103
+ assert.strictEqual(rec.turn.turnId, 'turn-1');
104
+ assert.strictEqual(typeof rec.turn.messageCountAtStart, 'number');
105
+
106
+ // and it actually rewinds
107
+ const res = checkpointStore.rewind('last');
108
+ assert.ok(res.ok);
109
+ assert.strictEqual(fs.readFileSync(file, 'utf8'), 'ORIGINAL', 'rewind restored prior content');
110
+ } finally {
111
+ await mock.close();
112
+ process.chdir(cwd0);
113
+ }
114
+ });
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // 2. A denied tool call produces NO checkpoint
118
+ // ---------------------------------------------------------------------------
119
+
120
+ test('a denied write (non-TTY, no skip) is NOT checkpointed', async () => {
121
+ const dir = tmpdir();
122
+ const file = path.join(dir, 'blocked.txt');
123
+
124
+ const mock = await startMockLLM();
125
+ mock.replyWith(`<write_file path="${file}">SHOULD NOT WRITE</write_file>`); // denied at the gate
126
+ mock.replyWith('ok');
127
+ try {
128
+ // skipPermissions:false in a non-TTY env → the gate REFUSES the write, so
129
+ // the executor never runs and nothing is checkpointed.
130
+ const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: false });
131
+ const messages = [{ role: 'user', content: 'try to write' }];
132
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: { onError: () => {} } });
133
+
134
+ assert.ok(!fs.existsSync(file), 'the write was denied');
135
+ assert.strictEqual(checkpointStore.list().length, 0, 'no checkpoint for a denied call');
136
+ } finally {
137
+ await mock.close();
138
+ }
139
+ });
140
+
141
+ // ---------------------------------------------------------------------------
142
+ // 3. A subagent's mutation is checkpointed in the PARENT session, rewindable
143
+ // ---------------------------------------------------------------------------
144
+
145
+ test('a subagent mutation is checkpointed in the parent session and is rewindable', async () => {
146
+ const dir = tmpdir();
147
+ const file = path.join(dir, 'child.txt');
148
+ fs.writeFileSync(file, 'BEFORE');
149
+
150
+ const cwd0 = process.cwd();
151
+ process.chdir(dir);
152
+ const mock = await startMockLLM();
153
+ // Parent dispatches spawn_agent; the child performs the write; child + parent
154
+ // each conclude. The child reuses the parent's agentExecFile, so its mutation
155
+ // flows through the SAME checkpoint store.
156
+ mock.replyWithToolCall('spawn_agent', { prompt: 'rewrite child.txt' }); // parent iter 0
157
+ mock.replyWith(`<write_file path="${file}">AFTER</write_file>`); // child iter 0 (the mutation)
158
+ mock.replyWith('child done'); // child iter 1 (final)
159
+ mock.replyWith('parent done'); // parent iter 1 (final)
160
+ try {
161
+ const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: true, withSubagents: true });
162
+ const messages = [{ role: 'user', content: 'delegate a write' }];
163
+ await runner.runAgentLoop(messages, 'test-model', 6, null, { callbacks: { onError: () => {} } });
164
+
165
+ assert.strictEqual(fs.readFileSync(file, 'utf8'), 'AFTER', 'the child performed the write');
166
+
167
+ const items = checkpointStore.list();
168
+ assert.strictEqual(items.length, 1, "the child's write is checkpointed in the parent session");
169
+ const rec = checkpointStore._loadRecord(items[0].seq);
170
+ assert.strictEqual(rec.targets[0].path, file);
171
+ // linked to the parent's turn (the parent never reset turn context for the child)
172
+ assert.strictEqual(rec.turn.turnId, 'turn-1');
173
+
174
+ const res = checkpointStore.rewind('last');
175
+ assert.ok(res.ok);
176
+ assert.strictEqual(fs.readFileSync(file, 'utf8'), 'BEFORE', "a subagent's mutation is rewindable from the parent");
177
+ } finally {
178
+ await mock.close();
179
+ process.chdir(cwd0);
180
+ }
181
+ });