skyloom 1.14.8 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.github/workflows/ci.yml +2 -2
  2. package/.github/workflows/publish.yml +51 -4
  3. package/CONVERSION_PLAN.md +191 -191
  4. package/config/default.yaml +46 -43
  5. package/config/models.yaml +928 -155
  6. package/config/providers.yaml +109 -6
  7. package/dist/agents/snow.d.ts +2 -0
  8. package/dist/agents/snow.d.ts.map +1 -1
  9. package/dist/agents/snow.js +36 -5
  10. package/dist/agents/snow.js.map +1 -1
  11. package/dist/cli/loom_chat.d.ts.map +1 -1
  12. package/dist/cli/loom_chat.js +207 -1
  13. package/dist/cli/loom_chat.js.map +1 -1
  14. package/dist/cli/main.js +190 -40
  15. package/dist/cli/main.js.map +1 -1
  16. package/dist/cli/tui.d.ts.map +1 -1
  17. package/dist/cli/tui.js +6 -31
  18. package/dist/cli/tui.js.map +1 -1
  19. package/dist/core/agent.d.ts +6 -4
  20. package/dist/core/agent.d.ts.map +1 -1
  21. package/dist/core/agent.js +61 -20
  22. package/dist/core/agent.js.map +1 -1
  23. package/dist/core/catalog.d.ts.map +1 -1
  24. package/dist/core/catalog.js +30 -9
  25. package/dist/core/catalog.js.map +1 -1
  26. package/dist/core/commands.d.ts +110 -0
  27. package/dist/core/commands.d.ts.map +1 -0
  28. package/dist/core/commands.js +633 -0
  29. package/dist/core/commands.js.map +1 -0
  30. package/dist/core/concurrency.d.ts +38 -0
  31. package/dist/core/concurrency.d.ts.map +1 -0
  32. package/dist/core/concurrency.js +65 -0
  33. package/dist/core/concurrency.js.map +1 -0
  34. package/dist/core/factory.js +16 -16
  35. package/dist/core/file_checkpoint.d.ts +9 -0
  36. package/dist/core/file_checkpoint.d.ts.map +1 -1
  37. package/dist/core/file_checkpoint.js +33 -1
  38. package/dist/core/file_checkpoint.js.map +1 -1
  39. package/dist/core/llm.d.ts.map +1 -1
  40. package/dist/core/llm.js +66 -13
  41. package/dist/core/llm.js.map +1 -1
  42. package/dist/core/memory.js +51 -51
  43. package/dist/core/schemas.d.ts +16 -0
  44. package/dist/core/schemas.d.ts.map +1 -1
  45. package/dist/core/schemas.js +32 -0
  46. package/dist/core/schemas.js.map +1 -1
  47. package/dist/core/security.d.ts.map +1 -1
  48. package/dist/core/security.js +27 -0
  49. package/dist/core/security.js.map +1 -1
  50. package/dist/core/skymd.js +14 -14
  51. package/dist/core/trace.d.ts +105 -0
  52. package/dist/core/trace.d.ts.map +1 -0
  53. package/dist/core/trace.js +213 -0
  54. package/dist/core/trace.js.map +1 -0
  55. package/dist/tools/builtin.d.ts +2 -6
  56. package/dist/tools/builtin.d.ts.map +1 -1
  57. package/dist/tools/builtin.js +18 -111
  58. package/dist/tools/builtin.js.map +1 -1
  59. package/dist/tools/extra.d.ts +13 -0
  60. package/dist/tools/extra.d.ts.map +1 -0
  61. package/dist/tools/extra.js +827 -0
  62. package/dist/tools/extra.js.map +1 -0
  63. package/dist/tools/guards.d.ts +12 -0
  64. package/dist/tools/guards.d.ts.map +1 -0
  65. package/dist/tools/guards.js +143 -0
  66. package/dist/tools/guards.js.map +1 -0
  67. package/dist/tools/model_tool.d.ts.map +1 -1
  68. package/dist/tools/model_tool.js +24 -4
  69. package/dist/tools/model_tool.js.map +1 -1
  70. package/dist/web/markdown.d.ts +32 -0
  71. package/dist/web/markdown.d.ts.map +1 -0
  72. package/dist/web/markdown.js +202 -0
  73. package/dist/web/markdown.js.map +1 -0
  74. package/dist/web/server.d.ts +4 -0
  75. package/dist/web/server.d.ts.map +1 -1
  76. package/dist/web/server.js +14 -582
  77. package/dist/web/server.js.map +1 -1
  78. package/dist/web/ui.d.ts +31 -0
  79. package/dist/web/ui.d.ts.map +1 -0
  80. package/dist/web/ui.js +1009 -0
  81. package/dist/web/ui.js.map +1 -0
  82. package/docs/AESTHETIC_DESIGN.md +152 -152
  83. package/docs/OPTIMIZATION_PLAN.md +178 -178
  84. package/package.json +1 -1
  85. package/src/agents/snow.ts +38 -5
  86. package/src/cli/commands_md.ts +112 -112
  87. package/src/cli/input_macros.ts +83 -83
  88. package/src/cli/loom.ts +1041 -1041
  89. package/src/cli/loom_chat.ts +772 -603
  90. package/src/cli/main.ts +853 -723
  91. package/src/cli/tui.ts +264 -289
  92. package/src/core/agent/guard.ts +133 -133
  93. package/src/core/agent/task.ts +100 -100
  94. package/src/core/agent.ts +1630 -1590
  95. package/src/core/agent_helpers.ts +500 -500
  96. package/src/core/bus.ts +221 -221
  97. package/src/core/cache.ts +153 -153
  98. package/src/core/catalog.ts +199 -178
  99. package/src/core/circuit_breaker.ts +119 -119
  100. package/src/core/commands.ts +704 -0
  101. package/src/core/concurrency.ts +73 -0
  102. package/src/core/config.ts +365 -365
  103. package/src/core/constants.ts +95 -95
  104. package/src/core/factory.ts +656 -656
  105. package/src/core/file_checkpoint.ts +163 -136
  106. package/src/core/hooks.ts +126 -126
  107. package/src/core/llm.ts +972 -915
  108. package/src/core/logger.ts +143 -143
  109. package/src/core/mcp.ts +1001 -1001
  110. package/src/core/memory.ts +1201 -1201
  111. package/src/core/middleware.ts +350 -350
  112. package/src/core/model_config.ts +159 -159
  113. package/src/core/pipelines.ts +424 -424
  114. package/src/core/schemas.ts +319 -282
  115. package/src/core/security.ts +27 -0
  116. package/src/core/semantic.ts +211 -211
  117. package/src/core/skill.ts +384 -384
  118. package/src/core/skymd.ts +143 -143
  119. package/src/core/theme.ts +65 -65
  120. package/src/core/tool.ts +457 -457
  121. package/src/core/trace.ts +236 -0
  122. package/src/core/verify.ts +71 -71
  123. package/src/plugins/loader.ts +91 -91
  124. package/src/skills/loader.ts +75 -75
  125. package/src/tools/builtin.ts +571 -642
  126. package/src/tools/computer.ts +279 -279
  127. package/src/tools/extra.ts +662 -0
  128. package/src/tools/guards.ts +82 -0
  129. package/src/tools/model_tool.ts +93 -74
  130. package/src/tools/todo.ts +76 -76
  131. package/src/web/markdown.ts +193 -0
  132. package/src/web/server.ts +117 -693
  133. package/src/web/ui.ts +949 -0
  134. package/tests/agent.test.ts +211 -159
  135. package/tests/agent_helpers.test.ts +48 -48
  136. package/tests/catalog.test.ts +86 -86
  137. package/tests/checkpoint_commands.test.ts +124 -124
  138. package/tests/claude_compat.test.ts +110 -110
  139. package/tests/commands.test.ts +103 -0
  140. package/tests/concurrency.test.ts +102 -0
  141. package/tests/config.test.ts +41 -41
  142. package/tests/extra_tools.test.ts +212 -0
  143. package/tests/fence_plugin.test.ts +52 -52
  144. package/tests/guard.test.ts +75 -75
  145. package/tests/loom.test.ts +337 -337
  146. package/tests/memory.test.ts +170 -170
  147. package/tests/model_config.test.ts +109 -109
  148. package/tests/skymd.test.ts +146 -146
  149. package/tests/ssrf.test.ts +38 -38
  150. package/tests/structured_retry.test.ts +87 -0
  151. package/tests/task.test.ts +60 -60
  152. package/tests/todo_toolstats.test.ts +94 -94
  153. package/tests/trace.test.ts +128 -0
  154. package/tests/tui.test.ts +67 -67
  155. package/tests/web.test.ts +169 -0
  156. package/tsconfig.json +38 -38
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Shared safety guards for tool handlers: SSRF protection for outbound fetches
3
+ * and the optional workspace fence for filesystem tools. Kept in their own
4
+ * module so both builtin.ts and extra.ts can use them without a circular import.
5
+ */
6
+
7
+ import * as os from 'os';
8
+ import * as path from 'path';
9
+ import { lookup } from 'dns/promises';
10
+
11
+ /* ── SSRF guard for outbound fetches ──────────────────────────────────────
12
+ Auto-approved/low-danger fetch tools must not be able to pivot to internal
13
+ services / cloud metadata (169.254.169.254). We block private, loopback and
14
+ link-local targets — both when the URL is an IP literal and after DNS
15
+ resolution. Operators who need internal hosts set SKYLOOM_ALLOW_PRIVATE_FETCH=1.
16
+ ────────────────────────────────────────────────────────────────────────── */
17
+ export function isPrivateIPv4(ip: string): boolean {
18
+ const p = ip.split('.').map(Number);
19
+ if (p.length !== 4 || p.some((n) => Number.isNaN(n) || n < 0 || n > 255)) return false;
20
+ const [a, b] = p;
21
+ if (a === 0 || a === 127) return true; // this-host / loopback
22
+ if (a === 10) return true; // private
23
+ if (a === 172 && b >= 16 && b <= 31) return true; // private
24
+ if (a === 192 && b === 168) return true; // private
25
+ if (a === 169 && b === 254) return true; // link-local + cloud metadata
26
+ if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT
27
+ return false;
28
+ }
29
+
30
+ export function isPrivateIp(ip: string): boolean {
31
+ const v = ip.toLowerCase();
32
+ if (v === '::1' || v === '::') return true;
33
+ if (v.startsWith('::ffff:')) { // IPv4-mapped IPv6
34
+ const mapped = v.slice(7);
35
+ if (mapped.includes('.')) return isPrivateIPv4(mapped);
36
+ }
37
+ if (/^f[cd]/.test(v)) return true; // fc00::/7 unique-local
38
+ if (/^fe[89ab]/.test(v)) return true; // fe80::/10 link-local
39
+ if (v.includes('.') && !v.includes(':')) return isPrivateIPv4(v);
40
+ return false;
41
+ }
42
+
43
+ export async function assertFetchAllowed(rawUrl: string): Promise<void> {
44
+ let u: URL;
45
+ try { u = new URL(rawUrl); } catch { throw new Error(`invalid URL: ${rawUrl}`); }
46
+ if (u.protocol !== 'http:' && u.protocol !== 'https:') {
47
+ throw new Error(`blocked URL scheme '${u.protocol}' — only http/https are allowed`);
48
+ }
49
+ if (process.env.SKYLOOM_ALLOW_PRIVATE_FETCH === '1') return;
50
+ const host = u.hostname.replace(/^\[|\]$/g, ''); // strip IPv6 brackets
51
+ if (isPrivateIp(host)) {
52
+ throw new Error(`blocked request to private/loopback address ${host} (set SKYLOOM_ALLOW_PRIVATE_FETCH=1 to allow)`);
53
+ }
54
+ let addrs: Array<{ address: string }> = [];
55
+ try { addrs = await lookup(host, { all: true }); } catch { return; /* let fetch surface DNS errors */ }
56
+ for (const a of addrs) {
57
+ if (isPrivateIp(a.address)) {
58
+ throw new Error(`blocked request: ${host} resolves to private address ${a.address} (set SKYLOOM_ALLOW_PRIVATE_FETCH=1 to allow)`);
59
+ }
60
+ }
61
+ }
62
+
63
+ /* ── Optional workspace fence for file tools ──────────────────────────────
64
+ Off by default (the agent is a Claude-Code-style assistant that legitimately
65
+ works across a repo). Set SKYLOOM_WORKSPACE_FENCE=1 to confine file tools to
66
+ a root directory (SKYLOOM_WORKSPACE_ROOT, or the process cwd), blocking
67
+ traversal to ~/.ssh, /etc, etc.
68
+ ────────────────────────────────────────────────────────────────────────── */
69
+ export function fenceRoot(): string | null {
70
+ if (process.env.SKYLOOM_WORKSPACE_FENCE !== '1') return null;
71
+ const raw = process.env.SKYLOOM_WORKSPACE_ROOT;
72
+ return raw ? path.resolve(raw.replace(/^~(?=$|\/|\\)/, os.homedir())) : process.cwd();
73
+ }
74
+
75
+ /** Returns an error string if `resolvedPath` is outside the fence, else null. */
76
+ export function fenceCheck(resolvedPath: string): string | null {
77
+ const root = fenceRoot();
78
+ if (!root) return null;
79
+ const rel = path.relative(root, resolvedPath);
80
+ if (rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel))) return null;
81
+ return `Error: 路径越界 — 工作区围栏已启用 (SKYLOOM_WORKSPACE_FENCE=1),'${resolvedPath}' 在根目录 '${root}' 之外。`;
82
+ }
@@ -1,74 +1,93 @@
1
- /**
2
- * Model self-service tools — let an agent inspect and switch its own LLM.
3
- *
4
- * Registered per-agent (same pattern as delegate_to), so the closure knows
5
- * which agent is asking. The runtime config object is shared by reference
6
- * with LLMClient, so a switch takes effect on the very next LLM call and is
7
- * persisted to ~/.skyloom/config.yaml.
8
- */
9
-
10
- import type { ToolDefinition } from '../core/tool';
11
- import { listProviders, modelsFor, providerLabel } from '../core/catalog';
12
- import { setAgentModel, clearAgentModel, describeAgentLLM } from '../core/model_config';
13
-
14
- export function createModelTools(agentName: string, runtimeConfig: any): ToolDefinition[] {
15
- const listModels: ToolDefinition = {
16
- name: 'list_models',
17
- description:
18
- 'List every model available in the catalog (grouped by provider) plus your current model. ' +
19
- 'Call this before set_my_model to pick a valid id.',
20
- parameters: [],
21
- handler: async () => {
22
- const me = describeAgentLLM(runtimeConfig, agentName);
23
- const lines: string[] = [
24
- `Current: ${me.model} (${me.source === 'agent' ? 'per-agent override' : 'unified default'})`,
25
- '',
26
- ];
27
- for (const p of listProviders()) {
28
- const models = modelsFor(p);
29
- if (!models.length) continue;
30
- lines.push(`${providerLabel(p)}: ${models.map(m => m.id).join(', ')}`);
31
- }
32
- return lines.join('\n');
33
- },
34
- };
35
-
36
- const setMyModel: ToolDefinition = {
37
- name: 'set_my_model',
38
- description:
39
- 'Switch the LLM model YOU run on, effective from your next reply and persisted to config. ' +
40
- 'Use when the user asks you to change/upgrade/downgrade your model. ' +
41
- "Pass model='default' to drop your override and follow the unified default again. " +
42
- 'Call list_models first if unsure of valid ids.',
43
- parameters: [
44
- {
45
- name: 'model',
46
- type: 'string',
47
- description: "Catalog model id (e.g. 'deepseek-chat'), or 'default' to clear the override",
48
- required: true,
49
- },
50
- ],
51
- handler: async (kwargs: Record<string, any>) => {
52
- const modelId = String(kwargs.model || '').trim();
53
- if (!modelId) return '✗ model is required';
54
-
55
- const before = describeAgentLLM(runtimeConfig, agentName);
56
- if (modelId === 'default' || modelId === 'unified') {
57
- clearAgentModel(runtimeConfig, agentName);
58
- const after = describeAgentLLM(runtimeConfig, agentName);
59
- return `✓ ${agentName} 已回到统一配置: ${before.model} ${after.model} (default)`;
60
- }
61
-
62
- const r = setAgentModel(runtimeConfig, agentName, modelId);
63
- if (!r.ok) {
64
- return `✗ '${modelId}' 不在模型目录中。${r.suggestions.length ? '可选: ' + r.suggestions.join(', ') : '先调 list_models 查看可用模型。'}`;
65
- }
66
- const keyNote = describeAgentLLM(runtimeConfig, agentName).keySource === 'missing'
67
- ? `\n⚠ 该 provider (${r.provider}) 尚无可用 API key — 提醒用户运行 /apikey set ${r.provider} <key>`
68
- : '';
69
- return `✓ ${agentName} 的模型已切换: ${before.model} → ${modelId}${r.provider ? ` (${r.provider})` : ''},下一次回复即生效${keyNote}`;
70
- },
71
- };
72
-
73
- return [listModels, setMyModel];
74
- }
1
+ /**
2
+ * Model self-service tools — let an agent inspect and switch its own LLM.
3
+ *
4
+ * Registered per-agent (same pattern as delegate_to), so the closure knows
5
+ * which agent is asking. The runtime config object is shared by reference
6
+ * with LLMClient, so a switch takes effect on the very next LLM call and is
7
+ * persisted to ~/.skyloom/config.yaml.
8
+ */
9
+
10
+ import type { ToolDefinition } from '../core/tool';
11
+ import { listProviders, modelsFor, providerLabel } from '../core/catalog';
12
+ import { setAgentModel, clearAgentModel, describeAgentLLM } from '../core/model_config';
13
+
14
+ export function createModelTools(agentName: string, runtimeConfig: any): ToolDefinition[] {
15
+ const listModels: ToolDefinition = {
16
+ name: 'list_models',
17
+ description:
18
+ 'List every model available in the catalog (grouped by provider) plus your current model. ' +
19
+ 'Call this before set_my_model to pick a valid id. ' +
20
+ 'Shows model id, context window, cost per 1M tokens, and description.',
21
+ parameters: [
22
+ {
23
+ name: 'provider',
24
+ type: 'string',
25
+ description: 'Optional: filter by provider name (e.g. "openai", "deepseek", "qwen")',
26
+ required: false,
27
+ },
28
+ ],
29
+ handler: async (kwargs: Record<string, any>) => {
30
+ const me = describeAgentLLM(runtimeConfig, agentName);
31
+ const filter = String(kwargs.provider || '').toLowerCase();
32
+ const lines: string[] = [
33
+ `Current: ${me.model} (${me.source === 'agent' ? 'per-agent override' : 'unified default'})`,
34
+ '',
35
+ ];
36
+ let totalModels = 0;
37
+ for (const p of listProviders()) {
38
+ const models = modelsFor(p);
39
+ if (!models.length) continue;
40
+ if (filter && !p.toLowerCase().includes(filter) && !providerLabel(p).toLowerCase().includes(filter)) continue;
41
+ lines.push(`${providerLabel(p)}:`);
42
+ for (const m of models) {
43
+ totalModels++;
44
+ const costStr = m.costIn === 0 && m.costOut === 0 ? 'FREE' : `$${m.costIn.toFixed(2)}/$${m.costOut.toFixed(2)}`;
45
+ const ctxStr = m.context >= 1000000 ? `${(m.context / 1000000).toFixed(0)}M` : m.context >= 1000 ? `${(m.context / 1000).toFixed(0)}K` : `${m.context}`;
46
+ lines.push(` · ${m.id.padEnd(42)} ${ctxStr.padStart(5)} ${costStr.padStart(14)} ${m.desc}`);
47
+ }
48
+ lines.push('');
49
+ }
50
+ lines.push(`Total: ${listProviders().length} providers · ${totalModels} models`);
51
+ return lines.join('\n');
52
+ },
53
+ };
54
+
55
+ const setMyModel: ToolDefinition = {
56
+ name: 'set_my_model',
57
+ description:
58
+ 'Switch the LLM model YOU run on, effective from your next reply and persisted to config. ' +
59
+ 'Use when the user asks you to change/upgrade/downgrade your model. ' +
60
+ "Pass model='default' to drop your override and follow the unified default again. " +
61
+ 'Call list_models first if unsure of valid ids.',
62
+ parameters: [
63
+ {
64
+ name: 'model',
65
+ type: 'string',
66
+ description: "Catalog model id (e.g. 'deepseek-chat'), or 'default' to clear the override",
67
+ required: true,
68
+ },
69
+ ],
70
+ handler: async (kwargs: Record<string, any>) => {
71
+ const modelId = String(kwargs.model || '').trim();
72
+ if (!modelId) return '✗ model is required';
73
+
74
+ const before = describeAgentLLM(runtimeConfig, agentName);
75
+ if (modelId === 'default' || modelId === 'unified') {
76
+ clearAgentModel(runtimeConfig, agentName);
77
+ const after = describeAgentLLM(runtimeConfig, agentName);
78
+ return `✓ ${agentName} 已回到统一配置: ${before.model} → ${after.model} (default)`;
79
+ }
80
+
81
+ const r = setAgentModel(runtimeConfig, agentName, modelId);
82
+ if (!r.ok) {
83
+ return `✗ '${modelId}' 不在模型目录中。${r.suggestions.length ? '可选: ' + r.suggestions.join(', ') : '先调 list_models 查看可用模型。'}`;
84
+ }
85
+ const keyNote = describeAgentLLM(runtimeConfig, agentName).keySource === 'missing'
86
+ ? `\n⚠ 该 provider (${r.provider}) 尚无可用 API key — 提醒用户运行 /apikey set ${r.provider} <key>`
87
+ : '';
88
+ return `✓ ${agentName} 的模型已切换: ${before.model} → ${modelId}${r.provider ? ` (${r.provider})` : ''},下一次回复即生效${keyNote}`;
89
+ },
90
+ };
91
+
92
+ return [listModels, setMyModel];
93
+ }
package/src/tools/todo.ts CHANGED
@@ -1,76 +1,76 @@
1
- /**
2
- * todo_write — agents externalize multi-step task state (Claude Code 式).
3
- *
4
- * The agent maintains a checklist in working memory: plan it up front, mark
5
- * items active/done as it works. The list survives compaction (working
6
- * memory, not chat history), the CLI renders it live, and the tool's return
7
- * value keeps the current state visible to the model itself.
8
- *
9
- * Whole-list replace semantics: every call passes the complete list. That
10
- * keeps the tool idempotent and trivially recoverable after a bad call.
11
- */
12
-
13
- import type { ToolDefinition } from '../core/tool';
14
-
15
- export type TodoStatus = 'pending' | 'active' | 'done';
16
- export interface TodoItem {
17
- text: string;
18
- status: TodoStatus;
19
- }
20
-
21
- const MAX_ITEMS = 20;
22
- const STATUSES = new Set<string>(['pending', 'active', 'done']);
23
-
24
- export const TODO_WORKING_KEY = 'todos';
25
-
26
- /** Parse + validate the items argument (JSON array). */
27
- export function parseTodoItems(raw: any): { items: TodoItem[] | null; error: string } {
28
- let parsed: any = raw;
29
- if (typeof raw === 'string') {
30
- try { parsed = JSON.parse(raw); } catch { return { items: null, error: 'items 必须是合法 JSON 数组' }; }
31
- }
32
- if (!Array.isArray(parsed)) return { items: null, error: 'items 必须是数组' };
33
- if (parsed.length > MAX_ITEMS) return { items: null, error: `最多 ${MAX_ITEMS} 项 — 合并粒度` };
34
- const items: TodoItem[] = [];
35
- for (const it of parsed) {
36
- const text = typeof it === 'string' ? it : String(it?.text ?? '').trim();
37
- const status = typeof it === 'object' && it !== null && STATUSES.has(String(it.status)) ? String(it.status) : 'pending';
38
- if (!text) return { items: null, error: '存在空的任务项' };
39
- items.push({ text: text.slice(0, 120), status: status as TodoStatus });
40
- }
41
- return { items, error: '' };
42
- }
43
-
44
- export function renderTodoList(items: TodoItem[]): string {
45
- const done = items.filter(i => i.status === 'done').length;
46
- const lines = items.map(i => {
47
- const mark = i.status === 'done' ? '✓' : i.status === 'active' ? '◐' : '·';
48
- return `${mark} ${i.text}`;
49
- });
50
- return `任务清单 ${done}/${items.length}\n${lines.join('\n')}`;
51
- }
52
-
53
- export function createTodoTool(agent: { memory: { setWorking(k: string, v: any): void } }): ToolDefinition {
54
- return {
55
- name: 'todo_write',
56
- description:
57
- 'Maintain your task checklist for multi-step work. Call it FIRST to plan (all pending), ' +
58
- 'then again whenever an item starts (active) or finishes (done) — pass the COMPLETE list each time. ' +
59
- 'items is a JSON array: [{"text":"...","status":"pending|active|done"}, ...]. ' +
60
- 'Use for any task with 3+ steps; skip for trivial one-shot answers.',
61
- parameters: [
62
- {
63
- name: 'items',
64
- type: 'string',
65
- description: 'The complete checklist as a JSON array of {text, status} (status: pending/active/done)',
66
- required: true,
67
- },
68
- ],
69
- handler: async (kwargs: Record<string, any>) => {
70
- const { items, error } = parseTodoItems(kwargs.items);
71
- if (!items) return `✗ ${error}`;
72
- agent.memory.setWorking(TODO_WORKING_KEY, items);
73
- return `✓ ${renderTodoList(items)}`;
74
- },
75
- };
76
- }
1
+ /**
2
+ * todo_write — agents externalize multi-step task state (Claude Code 式).
3
+ *
4
+ * The agent maintains a checklist in working memory: plan it up front, mark
5
+ * items active/done as it works. The list survives compaction (working
6
+ * memory, not chat history), the CLI renders it live, and the tool's return
7
+ * value keeps the current state visible to the model itself.
8
+ *
9
+ * Whole-list replace semantics: every call passes the complete list. That
10
+ * keeps the tool idempotent and trivially recoverable after a bad call.
11
+ */
12
+
13
+ import type { ToolDefinition } from '../core/tool';
14
+
15
+ export type TodoStatus = 'pending' | 'active' | 'done';
16
+ export interface TodoItem {
17
+ text: string;
18
+ status: TodoStatus;
19
+ }
20
+
21
+ const MAX_ITEMS = 20;
22
+ const STATUSES = new Set<string>(['pending', 'active', 'done']);
23
+
24
+ export const TODO_WORKING_KEY = 'todos';
25
+
26
+ /** Parse + validate the items argument (JSON array). */
27
+ export function parseTodoItems(raw: any): { items: TodoItem[] | null; error: string } {
28
+ let parsed: any = raw;
29
+ if (typeof raw === 'string') {
30
+ try { parsed = JSON.parse(raw); } catch { return { items: null, error: 'items 必须是合法 JSON 数组' }; }
31
+ }
32
+ if (!Array.isArray(parsed)) return { items: null, error: 'items 必须是数组' };
33
+ if (parsed.length > MAX_ITEMS) return { items: null, error: `最多 ${MAX_ITEMS} 项 — 合并粒度` };
34
+ const items: TodoItem[] = [];
35
+ for (const it of parsed) {
36
+ const text = typeof it === 'string' ? it : String(it?.text ?? '').trim();
37
+ const status = typeof it === 'object' && it !== null && STATUSES.has(String(it.status)) ? String(it.status) : 'pending';
38
+ if (!text) return { items: null, error: '存在空的任务项' };
39
+ items.push({ text: text.slice(0, 120), status: status as TodoStatus });
40
+ }
41
+ return { items, error: '' };
42
+ }
43
+
44
+ export function renderTodoList(items: TodoItem[]): string {
45
+ const done = items.filter(i => i.status === 'done').length;
46
+ const lines = items.map(i => {
47
+ const mark = i.status === 'done' ? '✓' : i.status === 'active' ? '◐' : '·';
48
+ return `${mark} ${i.text}`;
49
+ });
50
+ return `任务清单 ${done}/${items.length}\n${lines.join('\n')}`;
51
+ }
52
+
53
+ export function createTodoTool(agent: { memory: { setWorking(k: string, v: any): void } }): ToolDefinition {
54
+ return {
55
+ name: 'todo_write',
56
+ description:
57
+ 'Maintain your task checklist for multi-step work. Call it FIRST to plan (all pending), ' +
58
+ 'then again whenever an item starts (active) or finishes (done) — pass the COMPLETE list each time. ' +
59
+ 'items is a JSON array: [{"text":"...","status":"pending|active|done"}, ...]. ' +
60
+ 'Use for any task with 3+ steps; skip for trivial one-shot answers.',
61
+ parameters: [
62
+ {
63
+ name: 'items',
64
+ type: 'string',
65
+ description: 'The complete checklist as a JSON array of {text, status} (status: pending/active/done)',
66
+ required: true,
67
+ },
68
+ ],
69
+ handler: async (kwargs: Record<string, any>) => {
70
+ const { items, error } = parseTodoItems(kwargs.items);
71
+ if (!items) return `✗ ${error}`;
72
+ agent.memory.setWorking(TODO_WORKING_KEY, items);
73
+ return `✓ ${renderTodoList(items)}`;
74
+ },
75
+ };
76
+ }
@@ -0,0 +1,193 @@
1
+ /**
2
+ * 水墨气象台 · Markdown 渲染器 — dependency-free, isomorphic.
3
+ *
4
+ * These functions run in BOTH worlds:
5
+ * - in Node, imported normally (unit-testable);
6
+ * - in the browser, injected into the page via `fn.toString()` (see ui.ts).
7
+ *
8
+ * Constraints that follow from the injection trick:
9
+ * - every function must be a top-level `function` declaration;
10
+ * - they may only call each other by bare name (no imports, no module-scope
11
+ * state) — tsc's CommonJS emit keeps such cross-calls as plain identifiers,
12
+ * so the stringified source stays valid in a browser.
13
+ */
14
+
15
+ /** HTML-escape text content. */
16
+ export function escapeHtml(s: string): string {
17
+ return String(s)
18
+ .replace(/&/g, '&amp;')
19
+ .replace(/</g, '&lt;')
20
+ .replace(/>/g, '&gt;')
21
+ .replace(/"/g, '&quot;');
22
+ }
23
+
24
+ /**
25
+ * Minimal one-pass syntax highlighter. Tokenizes comments, strings, keywords
26
+ * and numbers with a single alternation so already-emitted HTML is never
27
+ * re-matched. Good-enough coverage for the common languages an agent emits
28
+ * (ts/js/py/sh/go/rust/sql/json); everything unrecognized is just escaped.
29
+ */
30
+ export function highlightCode(code: string, lang: string): string {
31
+ if (code.length > 30000) return escapeHtml(code); // don't jank on huge blocks
32
+ const l = (lang || '').toLowerCase();
33
+ const hashComments = /^(py|python|sh|bash|zsh|shell|rb|ruby|yaml|yml|toml|make|makefile|r)$/.test(l);
34
+ const kw = '\\b(?:function|return|if|else|elif|for|while|do|const|let|var|class|import|export|from|async|await|new|try|catch|finally|throw|switch|case|default|break|continue|typeof|instanceof|in|of|def|lambda|pass|yield|with|as|is|not|and|or|None|True|False|self|this|fn|pub|impl|struct|enum|match|use|mod|trait|interface|type|extends|implements|public|private|protected|static|void|null|undefined|true|false|SELECT|FROM|WHERE|INSERT|UPDATE|DELETE|JOIN|GROUP|ORDER|BY|LIMIT)\\b';
35
+ const comment = hashComments
36
+ ? '#[^\\n]*'
37
+ : '\\/\\/[^\\n]*|\\/\\*[\\s\\S]*?\\*\\/|--[^\\n]*';
38
+ const re = new RegExp(
39
+ '(' + comment + ')' +
40
+ '|("(?:[^"\\\\\\n]|\\\\.)*"|\'(?:[^\'\\\\\\n]|\\\\.)*\'|`(?:[^`\\\\]|\\\\.)*`)' +
41
+ '|(' + kw + ')' +
42
+ '|(\\b\\d+(?:\\.\\d+)?\\b)',
43
+ 'g'
44
+ );
45
+ let out = '';
46
+ let last = 0;
47
+ let m: RegExpExecArray | null;
48
+ while ((m = re.exec(code))) {
49
+ out += escapeHtml(code.slice(last, m.index));
50
+ if (m[1]) out += '<span class="tk-c">' + escapeHtml(m[1]) + '</span>';
51
+ else if (m[2]) out += '<span class="tk-s">' + escapeHtml(m[2]) + '</span>';
52
+ else if (m[3]) out += '<span class="tk-k">' + escapeHtml(m[3]) + '</span>';
53
+ else out += '<span class="tk-n">' + escapeHtml(m[4]) + '</span>';
54
+ last = m.index + m[0].length;
55
+ }
56
+ return out + escapeHtml(code.slice(last));
57
+ }
58
+
59
+ /** Inline markdown: code spans, bold, italic, strikethrough, safe links. */
60
+ export function mdInline(s: string): string {
61
+ const parts = String(s).split(/(`[^`\n]*`)/);
62
+ let out = '';
63
+ for (let i = 0; i < parts.length; i++) {
64
+ const p = parts[i];
65
+ if (p.length > 1 && p.charAt(0) === '`' && p.charAt(p.length - 1) === '`') {
66
+ out += '<code>' + escapeHtml(p.slice(1, -1)) + '</code>';
67
+ continue;
68
+ }
69
+ let t = escapeHtml(p);
70
+ t = t.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
71
+ t = t.replace(/(^|[^*\w])\*([^*\n]+)\*(?!\*)/g, '$1<em>$2</em>');
72
+ t = t.replace(/~~([^~\n]+)~~/g, '<del>$1</del>');
73
+ // Only http(s) links; URL was escaped above so quotes can't break out.
74
+ t = t.replace(/\[([^\]\n]+)\]\((https?:\/\/[^)\s]+)\)/g,
75
+ '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>');
76
+ out += t;
77
+ }
78
+ return out;
79
+ }
80
+
81
+ /**
82
+ * Block-level markdown → HTML. Supports: fenced code (with language tag,
83
+ * tolerant of an unclosed fence mid-stream), #–#### headings, hr, blockquote,
84
+ * ul/ol, tables, paragraphs. Unknown constructs degrade to escaped text —
85
+ * never to broken markup.
86
+ */
87
+ export function mdToHtml(src: string): string {
88
+ const lines = String(src).replace(/\r\n?/g, '\n').split('\n');
89
+ let html = '';
90
+ let i = 0;
91
+ let para: string[] = [];
92
+
93
+ function flushPara() {
94
+ if (para.length) {
95
+ html += '<p>' + para.map(mdInline).join('<br>') + '</p>';
96
+ para = [];
97
+ }
98
+ }
99
+
100
+ while (i < lines.length) {
101
+ const line = lines[i];
102
+
103
+ // fenced code — tolerate a missing closing fence (streaming)
104
+ const fence = line.match(/^\s*```\s*([\w+#-]*)\s*$/);
105
+ if (fence) {
106
+ flushPara();
107
+ const lang = fence[1] || '';
108
+ const buf: string[] = [];
109
+ i++;
110
+ while (i < lines.length && !/^\s*```\s*$/.test(lines[i])) { buf.push(lines[i]); i++; }
111
+ i++; // skip closing fence (or run off the end mid-stream)
112
+ const code = buf.join('\n');
113
+ html += '<div class="codeblock"><div class="cb-head"><span class="cb-lang">' +
114
+ escapeHtml(lang || 'text') +
115
+ '</span><button class="cb-copy" type="button">复制</button></div>' +
116
+ '<pre><code>' + highlightCode(code, lang) + '</code></pre></div>';
117
+ continue;
118
+ }
119
+
120
+ // blank line → paragraph break
121
+ if (/^\s*$/.test(line)) { flushPara(); i++; continue; }
122
+
123
+ // heading
124
+ const h = line.match(/^(#{1,4})\s+(.*)$/);
125
+ if (h) {
126
+ flushPara();
127
+ const lvl = h[1].length;
128
+ html += '<h' + (lvl + 1) + ' class="md-h md-h' + lvl + '">' + mdInline(h[2]) + '</h' + (lvl + 1) + '>';
129
+ i++; continue;
130
+ }
131
+
132
+ // horizontal rule
133
+ if (/^\s*(?:-{3,}|\*{3,})\s*$/.test(line)) { flushPara(); html += '<hr>'; i++; continue; }
134
+
135
+ // blockquote
136
+ if (/^\s*>\s?/.test(line)) {
137
+ flushPara();
138
+ const buf: string[] = [];
139
+ while (i < lines.length && /^\s*>\s?/.test(lines[i])) { buf.push(lines[i].replace(/^\s*>\s?/, '')); i++; }
140
+ html += '<blockquote>' + buf.map(mdInline).join('<br>') + '</blockquote>';
141
+ continue;
142
+ }
143
+
144
+ // table: header row | separator row | body rows
145
+ if (line.indexOf('|') >= 0 && i + 1 < lines.length &&
146
+ /^\s*\|?[\s:|-]+\|?\s*$/.test(lines[i + 1]) && lines[i + 1].indexOf('-') >= 0) {
147
+ flushPara();
148
+ const splitRow = function (r: string): string[] {
149
+ return r.replace(/^\s*\|/, '').replace(/\|\s*$/, '').split('|').map(function (c) { return c.trim(); });
150
+ };
151
+ const head = splitRow(line);
152
+ i += 2;
153
+ let body = '';
154
+ while (i < lines.length && lines[i].indexOf('|') >= 0 && !/^\s*$/.test(lines[i])) {
155
+ body += '<tr>' + splitRow(lines[i]).map(function (c) { return '<td>' + mdInline(c) + '</td>'; }).join('') + '</tr>';
156
+ i++;
157
+ }
158
+ html += '<div class="md-table"><table><thead><tr>' +
159
+ head.map(function (c) { return '<th>' + mdInline(c) + '</th>'; }).join('') +
160
+ '</tr></thead><tbody>' + body + '</tbody></table></div>';
161
+ continue;
162
+ }
163
+
164
+ // unordered list
165
+ if (/^\s*[-*+]\s+/.test(line)) {
166
+ flushPara();
167
+ let items = '';
168
+ while (i < lines.length && /^\s*[-*+]\s+/.test(lines[i])) {
169
+ items += '<li>' + mdInline(lines[i].replace(/^\s*[-*+]\s+/, '')) + '</li>';
170
+ i++;
171
+ }
172
+ html += '<ul>' + items + '</ul>';
173
+ continue;
174
+ }
175
+
176
+ // ordered list
177
+ if (/^\s*\d+[.)]\s+/.test(line)) {
178
+ flushPara();
179
+ let items = '';
180
+ while (i < lines.length && /^\s*\d+[.)]\s+/.test(lines[i])) {
181
+ items += '<li>' + mdInline(lines[i].replace(/^\s*\d+[.)]\s+/, '')) + '</li>';
182
+ i++;
183
+ }
184
+ html += '<ol>' + items + '</ol>';
185
+ continue;
186
+ }
187
+
188
+ para.push(line);
189
+ i++;
190
+ }
191
+ flushPara();
192
+ return html;
193
+ }