skyloom 1.13.6 → 1.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.github/workflows/ci.yml +36 -36
  2. package/README.md +220 -159
  3. package/config/providers.yaml +39 -39
  4. package/config/skills/api_integrator/SKILL.md +15 -15
  5. package/config/skills/arch_designer/SKILL.md +13 -13
  6. package/config/skills/ci_cd_manager/SKILL.md +14 -14
  7. package/config/skills/code_analysis/SKILL.md +13 -13
  8. package/config/skills/code_generator/SKILL.md +12 -12
  9. package/config/skills/code_reviewer/SKILL.md +13 -13
  10. package/config/skills/content_writer/SKILL.md +14 -14
  11. package/config/skills/data_transformer/SKILL.md +15 -15
  12. package/config/skills/document_analysis/SKILL.md +13 -13
  13. package/config/skills/emotional_companion/SKILL.md +15 -15
  14. package/config/skills/performance_checker/SKILL.md +14 -14
  15. package/config/skills/security_auditor/SKILL.md +14 -14
  16. package/config/skills/self_evolve/SKILL.md +13 -13
  17. package/config/skills/sys_operator/SKILL.md +15 -15
  18. package/config/skills/task_planner/SKILL.md +14 -14
  19. package/config/skills/web_research/SKILL.md +14 -14
  20. package/config/skills/workflow_designer/SKILL.md +13 -13
  21. package/dist/agents/dew.js +52 -52
  22. package/dist/agents/fair.js +84 -84
  23. package/dist/agents/fog.js +30 -30
  24. package/dist/agents/frost.js +32 -32
  25. package/dist/agents/rain.js +32 -32
  26. package/dist/agents/snow.js +68 -68
  27. package/dist/cli/commands_md.d.ts +41 -0
  28. package/dist/cli/commands_md.d.ts.map +1 -0
  29. package/dist/cli/commands_md.js +140 -0
  30. package/dist/cli/commands_md.js.map +1 -0
  31. package/dist/cli/input_macros.d.ts +28 -0
  32. package/dist/cli/input_macros.d.ts.map +1 -0
  33. package/dist/cli/input_macros.js +120 -0
  34. package/dist/cli/input_macros.js.map +1 -0
  35. package/dist/cli/loom.d.ts +220 -0
  36. package/dist/cli/loom.d.ts.map +1 -0
  37. package/dist/cli/loom.js +1094 -0
  38. package/dist/cli/loom.js.map +1 -0
  39. package/dist/cli/loom_chat.d.ts +20 -0
  40. package/dist/cli/loom_chat.d.ts.map +1 -0
  41. package/dist/cli/loom_chat.js +685 -0
  42. package/dist/cli/loom_chat.js.map +1 -0
  43. package/dist/cli/main.js +310 -14
  44. package/dist/cli/main.js.map +1 -1
  45. package/dist/cli/tui.d.ts.map +1 -1
  46. package/dist/cli/tui.js +7 -1
  47. package/dist/cli/tui.js.map +1 -1
  48. package/dist/core/agent.d.ts +17 -0
  49. package/dist/core/agent.d.ts.map +1 -1
  50. package/dist/core/agent.js +173 -7
  51. package/dist/core/agent.js.map +1 -1
  52. package/dist/core/factory.d.ts.map +1 -1
  53. package/dist/core/factory.js +34 -2
  54. package/dist/core/factory.js.map +1 -1
  55. package/dist/core/file_checkpoint.d.ts +57 -0
  56. package/dist/core/file_checkpoint.d.ts.map +1 -0
  57. package/dist/core/file_checkpoint.js +162 -0
  58. package/dist/core/file_checkpoint.js.map +1 -0
  59. package/dist/core/hooks.d.ts +43 -0
  60. package/dist/core/hooks.d.ts.map +1 -0
  61. package/dist/core/hooks.js +110 -0
  62. package/dist/core/hooks.js.map +1 -0
  63. package/dist/core/llm.d.ts.map +1 -1
  64. package/dist/core/llm.js +15 -9
  65. package/dist/core/llm.js.map +1 -1
  66. package/dist/core/longdoc.js +5 -5
  67. package/dist/core/mcp.d.ts +16 -0
  68. package/dist/core/mcp.d.ts.map +1 -1
  69. package/dist/core/mcp.js +55 -0
  70. package/dist/core/mcp.js.map +1 -1
  71. package/dist/core/model_config.d.ts +40 -0
  72. package/dist/core/model_config.d.ts.map +1 -0
  73. package/dist/core/model_config.js +191 -0
  74. package/dist/core/model_config.js.map +1 -0
  75. package/dist/core/skill.d.ts +7 -0
  76. package/dist/core/skill.d.ts.map +1 -1
  77. package/dist/core/skill.js +47 -0
  78. package/dist/core/skill.js.map +1 -1
  79. package/dist/core/skymd.d.ts +39 -0
  80. package/dist/core/skymd.d.ts.map +1 -0
  81. package/dist/core/skymd.js +177 -0
  82. package/dist/core/skymd.js.map +1 -0
  83. package/dist/core/tool.d.ts +12 -0
  84. package/dist/core/tool.d.ts.map +1 -1
  85. package/dist/core/tool.js +30 -0
  86. package/dist/core/tool.js.map +1 -1
  87. package/dist/core/verify.d.ts +27 -0
  88. package/dist/core/verify.d.ts.map +1 -0
  89. package/dist/core/verify.js +62 -0
  90. package/dist/core/verify.js.map +1 -0
  91. package/dist/skills/loader.d.ts +22 -2
  92. package/dist/skills/loader.d.ts.map +1 -1
  93. package/dist/skills/loader.js +45 -15
  94. package/dist/skills/loader.js.map +1 -1
  95. package/dist/tools/builtin.d.ts.map +1 -1
  96. package/dist/tools/builtin.js +13 -3
  97. package/dist/tools/builtin.js.map +1 -1
  98. package/dist/tools/model_tool.d.ts +11 -0
  99. package/dist/tools/model_tool.d.ts.map +1 -0
  100. package/dist/tools/model_tool.js +71 -0
  101. package/dist/tools/model_tool.js.map +1 -0
  102. package/dist/tools/todo.d.ts +30 -0
  103. package/dist/tools/todo.d.ts.map +1 -0
  104. package/dist/tools/todo.js +78 -0
  105. package/dist/tools/todo.js.map +1 -0
  106. package/docs/AESTHETIC_DESIGN.md +152 -144
  107. package/docs/OPTIMIZATION_PLAN.md +178 -178
  108. package/package.json +68 -68
  109. package/scripts/install.js +48 -48
  110. package/scripts/link.js +10 -10
  111. package/setup.bat +79 -79
  112. package/skill-test-ty2fOA/test.md +10 -10
  113. package/src/agents/dew.ts +70 -70
  114. package/src/agents/fair.ts +102 -102
  115. package/src/agents/fog.ts +48 -48
  116. package/src/agents/frost.ts +50 -50
  117. package/src/agents/rain.ts +50 -50
  118. package/src/agents/snow.ts +239 -239
  119. package/src/cli/commands_md.ts +112 -0
  120. package/src/cli/input_macros.ts +83 -0
  121. package/src/cli/loom.ts +982 -0
  122. package/src/cli/loom_chat.ts +598 -0
  123. package/src/cli/main.ts +255 -9
  124. package/src/cli/mode.ts +58 -58
  125. package/src/cli/tui.ts +228 -222
  126. package/src/core/agent/guard.ts +134 -134
  127. package/src/core/agent/task.ts +100 -100
  128. package/src/core/agent.ts +169 -7
  129. package/src/core/arbitrate.ts +162 -162
  130. package/src/core/catalog.ts +178 -178
  131. package/src/core/checkpoint.ts +94 -94
  132. package/src/core/estimate.ts +104 -104
  133. package/src/core/evolve.ts +191 -191
  134. package/src/core/factory.ts +31 -2
  135. package/src/core/file_checkpoint.ts +136 -0
  136. package/src/core/filter.ts +103 -103
  137. package/src/core/graph.ts +156 -156
  138. package/src/core/hooks.ts +126 -0
  139. package/src/core/icons.ts +53 -53
  140. package/src/core/index.ts +37 -37
  141. package/src/core/learn.ts +146 -146
  142. package/src/core/llm.ts +15 -9
  143. package/src/core/longdoc.ts +155 -155
  144. package/src/core/mcp.ts +48 -0
  145. package/src/core/mcp_server.ts +176 -176
  146. package/src/core/model_config.ts +157 -0
  147. package/src/core/profile.ts +255 -255
  148. package/src/core/router.ts +124 -124
  149. package/src/core/sandbox.ts +142 -142
  150. package/src/core/security.ts +243 -243
  151. package/src/core/skill.ts +42 -0
  152. package/src/core/skymd.ts +143 -0
  153. package/src/core/theme.ts +65 -65
  154. package/src/core/tool.ts +30 -0
  155. package/src/core/tool_router.ts +193 -193
  156. package/src/core/vector.ts +152 -152
  157. package/src/core/verify.ts +71 -0
  158. package/src/core/workspace.ts +150 -150
  159. package/src/plugins/loader.ts +66 -66
  160. package/src/skills/loader.ts +45 -16
  161. package/src/sql.js.d.ts +29 -29
  162. package/src/tools/builtin.ts +13 -3
  163. package/src/tools/computer.ts +269 -269
  164. package/src/tools/delegate.ts +49 -49
  165. package/src/tools/model_tool.ts +74 -0
  166. package/src/tools/todo.ts +76 -0
  167. package/src/web/tts.ts +93 -93
  168. package/tests/agent.test.ts +159 -159
  169. package/tests/agent_helpers.test.ts +48 -48
  170. package/tests/bus.test.ts +121 -121
  171. package/tests/catalog.test.ts +86 -86
  172. package/tests/checkpoint_commands.test.ts +124 -0
  173. package/tests/claude_compat.test.ts +110 -0
  174. package/tests/config.test.ts +41 -41
  175. package/tests/guard.test.ts +75 -75
  176. package/tests/icons.test.ts +45 -45
  177. package/tests/loom.test.ts +248 -0
  178. package/tests/memory.test.ts +170 -170
  179. package/tests/model_config.test.ts +109 -0
  180. package/tests/router.test.ts +86 -86
  181. package/tests/schemas.test.ts +51 -51
  182. package/tests/semantic.test.ts +83 -83
  183. package/tests/setup.ts +10 -10
  184. package/tests/skill.test.ts +172 -172
  185. package/tests/skymd.test.ts +146 -0
  186. package/tests/task.test.ts +60 -60
  187. package/tests/todo_toolstats.test.ts +94 -0
  188. package/tests/tool.test.ts +108 -108
  189. package/tests/tool_router.test.ts +71 -71
  190. package/tests/tui.test.ts +67 -67
  191. package/vitest.config.ts +17 -17
  192. package/=12 +0 -0
  193. package/=8 +0 -0
package/src/core/agent.ts CHANGED
@@ -28,6 +28,27 @@ import { LoopGuard } from './agent/guard';
28
28
 
29
29
  const log = getLogger('agent');
30
30
 
31
+ /** Tools whose success means the filesystem changed (triggers the verify loop). */
32
+ const WRITE_TOOL_RE = /^(write_|edit_|delete_|create_)|^run_bash$|^git_commit$/;
33
+
34
+ /** Tools with side effects, hidden from the model while in plan mode. */
35
+ const SIDE_EFFECT_TOOL_RE = /^(write_|edit_|delete_|create_|kill_|launch_|service_|browser_)|^run_bash$|^git_commit$|^open_path$|^delegate_to$/;
36
+
37
+ /** Default context budget per recorded tool result (chars; ~3k tokens). */
38
+ const TOOL_RESULT_LIMIT = 12000;
39
+
40
+ /**
41
+ * Clamp an oversized tool result before it enters the context window:
42
+ * keep head + tail, tell the model what was cut and how to fetch precisely.
43
+ */
44
+ export function clampToolResult(s: string, limit: number = TOOL_RESULT_LIMIT): string {
45
+ if (s.length <= limit) return s;
46
+ const head = s.slice(0, Math.floor(limit * 0.72));
47
+ const tail = s.slice(-Math.floor(limit * 0.18));
48
+ const cut = s.length - head.length - tail.length;
49
+ return `${head}\n…[工具结果过长,中间省略 ${cut} 字符 — 需要该部分时用更精确的参数重新调用(read_file 的 offset/limit、grep 定位、缩小查询范围)]\n${tail}`;
50
+ }
51
+
31
52
  // Domain model lives in ./agent/task — re-exported here so importers of
32
53
  // '../core/agent' are unaffected by the Phase 3 split.
33
54
  import { AgentState, TaskState, Task, TaskResult } from './agent/task';
@@ -65,6 +86,11 @@ export class BaseAgent {
65
86
  protected _pendingRequests: Map<string, { resolve: (value: string) => void; reject: (err: Error) => void }> = new Map();
66
87
  protected _bgTasks: Set<Promise<void>> = new Set();
67
88
  approvalCallback: ((toolName: string, args: Record<string, any>) => Promise<boolean>) | null = null;
89
+ /** Plan mode: read-only tool set + plan-first instructions on each turn. */
90
+ planMode: boolean = false;
91
+ /** Set when this turn executed a tool that mutates the filesystem (verify trigger). */
92
+ protected _turnWroteFiles: boolean = false;
93
+ private _hooks: import('./hooks').Hooks | null = null;
68
94
  protected _turnLock: Promise<void> = Promise.resolve();
69
95
  private _turnLockCounter: number = 0;
70
96
  private _turnLockResolve: (() => void) | null = null;
@@ -145,10 +171,10 @@ export class BaseAgent {
145
171
  const lang = (this.config as any).llm?.language || 'zh';
146
172
  if (lang === 'en') {
147
173
  return prompt +
148
- `\n\n## Thinking Protocol\nBefore acting, briefly weigh: (1) **What** is the actual need? (2) **How** sure am I? If <80%, flag with [uncertain] and ask.\nIf stuck, admit it — propose a partial answer or ask the user. Never fabricate.\n\n## Behavior\n- Act, don't narrate. No "I will..." before tool calls.\n- Stay in scope. Do what's asked, then stop.\n- Batch independent tool calls in one response.\n- Verify writes: read back, report verified state.\n- Call list_skills when the task needs specialized capabilities.`;
174
+ `\n\n## Thinking Protocol\nBefore acting, briefly weigh: (1) **What** is the actual need? (2) **How** sure am I? If <80%, flag with [uncertain] and ask.\nIf stuck, admit it — propose a partial answer or ask the user. Never fabricate.\n\n## Behavior\n- Act, don't narrate. No "I will..." before tool calls.\n- Stay in scope. Do what's asked, then stop.\n- Batch independent tool calls in one response.\n- For tasks with 3+ steps, plan with todo_write first and update item status as you go.\n- Verify writes: read back, report verified state.\n- Call list_skills when the task needs specialized capabilities.`;
149
175
  }
150
176
  return prompt +
151
- `\n\n## 思考协议\n行动前快速判断:(1) 用户真实需求是什么?(2) 我有多大把握?低于80%标注 [不确定] 并主动询问。\n卡住时承认,给出部分答案或请求用户指导。绝不编造。\n\n## 行为守则\n- 直接行动,不预告。不说「我将要...」,直接调用工具\n- 不擅自扩大范围。用户要什么做什么,核心完成即止\n- 独立的工具调用一次发出,并行执行\n- 写入后回读验证,汇报已验证状态而非仅尝试\n- 任务涉及专业能力时(PPT/Excel/PDF/网页设计/代码审查等),先调 list_skills 查看可用技能,再用 use_skill 激活`;
177
+ `\n\n## 思考协议\n行动前快速判断:(1) 用户真实需求是什么?(2) 我有多大把握?低于80%标注 [不确定] 并主动询问。\n卡住时承认,给出部分答案或请求用户指导。绝不编造。\n\n## 行为守则\n- 直接行动,不预告。不说「我将要...」,直接调用工具\n- 不擅自扩大范围。用户要什么做什么,核心完成即止\n- 独立的工具调用一次发出,并行执行\n- 3 步以上的任务先用 todo_write 列任务清单,开工/完成时逐项更新状态\n- 写入后回读验证,汇报已验证状态而非仅尝试\n- 任务涉及专业能力时(PPT/Excel/PDF/网页设计/代码审查等),先调 list_skills 查看可用技能,再用 use_skill 激活`;
152
178
  }
153
179
 
154
180
  protected injectProgrammingWisdom(prompt: string): string {
@@ -159,16 +185,34 @@ export class BaseAgent {
159
185
  return prompt + `\n\n## 工程能力\n顶级工程师:类型安全、真实的错误处理、按根因调试、按安全与性能审查。你可以阅读和修改 Skyloom 自身源码。`;
160
186
  }
161
187
 
188
+ /** Layered SKY.md / CLAUDE.md / AGENTS.md project memory (see core/skymd). */
189
+ protected injectProjectMemory(prompt: string): string {
190
+ try {
191
+ const { loadProjectMemory } = require('./skymd');
192
+ const mem = loadProjectMemory();
193
+ if (!mem.text) return prompt;
194
+ return prompt + `\n\n## 项目记忆 (SKY.md)\n用户与项目维护的约定,优先级高于你的通用习惯:\n\n${mem.text}`;
195
+ } catch {
196
+ return prompt;
197
+ }
198
+ }
199
+
162
200
  reinitLanguage(): void {
163
201
  this._baseSystemPrompt = '';
164
202
  this._baseSystemPrompt = this.resolveSystemPrompt();
165
203
  this._baseSystemPrompt = this.injectWorkspaceInfo(this._baseSystemPrompt);
166
204
  this._baseSystemPrompt = this.injectBehaviorRules(this._baseSystemPrompt);
167
205
  this._baseSystemPrompt = this.injectProgrammingWisdom(this._baseSystemPrompt);
206
+ this._baseSystemPrompt = this.injectProjectMemory(this._baseSystemPrompt);
168
207
  this._baseSystemPrompt += '\n\n' + this.currentTimeTag();
169
208
  this.rebuildSystemPrompt();
170
209
  }
171
210
 
211
+ /** Re-read SKY.md layers into the system prompt (after `#` quick memory / edits). */
212
+ reloadProjectMemory(): void {
213
+ this.reinitLanguage();
214
+ }
215
+
172
216
  async init(): Promise<void> {
173
217
  if (this._baseSystemPrompt) return;
174
218
  await this.memory.initDb();
@@ -186,6 +230,7 @@ export class BaseAgent {
186
230
  this._baseSystemPrompt = this.injectWorkspaceInfo(this._baseSystemPrompt);
187
231
  this._baseSystemPrompt = this.injectBehaviorRules(this._baseSystemPrompt);
188
232
  this._baseSystemPrompt = this.injectProgrammingWisdom(this._baseSystemPrompt);
233
+ this._baseSystemPrompt = this.injectProjectMemory(this._baseSystemPrompt);
189
234
  this._baseSystemPrompt += '\n\n' + this.currentTimeTag();
190
235
  this.rebuildSystemPrompt();
191
236
  this._tools = this.toolRegistry.getTools();
@@ -212,6 +257,13 @@ export class BaseAgent {
212
257
  description: 'List all available skills with their names and descriptions. Use this first to discover what skills you can activate.',
213
258
  parameters: [],
214
259
  handler: async () => {
260
+ // live change detection: re-scan user/project skill folders so a
261
+ // SKILL.md edit or drop-in applies without restarting the session
262
+ try {
263
+ const { registerDynamicSkills } = require('../skills/loader');
264
+ registerDynamicSkills(self.skillRegistry);
265
+ self.loadSkills();
266
+ } catch { /* live reload is best-effort */ }
215
267
  const skills = self.getAvailableSkills();
216
268
  if (!skills.length) return 'No skills available.';
217
269
  const maxName = Math.max(...skills.map(s => s.name.length), 1);
@@ -553,9 +605,37 @@ export class BaseAgent {
553
605
  if (onStatus) onStatus(p.label);
554
606
  await this.setState(AgentState.ACTING);
555
607
 
608
+ // File checkpoint: snapshot the target before any mutating file tool
609
+ // runs, so /rewind can restore the pre-turn state.
610
+ try {
611
+ const { getFileCheckpoints } = require('./file_checkpoint');
612
+ const cp = getFileCheckpoints();
613
+ const snapPath = cp.pathToSnapshot(p.toolName, p.toolArgs || {});
614
+ if (snapPath) cp.snapshot(snapPath);
615
+ } catch { /* checkpointing must never block execution */ }
616
+
617
+ // pre_tool hooks are enforced policy: a non-zero exit blocks the call.
618
+ const hooks = this.getHooks();
619
+ if (hooks.preTool.length > 0) {
620
+ try {
621
+ const { runPreToolHooks } = require('./hooks');
622
+ const pre = runPreToolHooks(hooks, p.toolName, p.toolArgs || {}, this.name);
623
+ if (!pre.allowed) {
624
+ return { idx, result: { tc: p.tc, result: `[blocked by pre_tool hook] ${pre.reason}`, success: false, toolName: p.toolName } };
625
+ }
626
+ } catch { /* hook machinery must never break tool execution */ }
627
+ }
628
+
556
629
  try {
557
630
  const toolResult = await this.toolRegistry.execute(p.toolName, p.toolArgs || {});
558
631
  const resultStr = toolResult.result || toolResult.error || '(no output)';
632
+ if (toolResult.success && WRITE_TOOL_RE.test(p.toolName)) this._turnWroteFiles = true;
633
+ if (hooks.postTool.length > 0) {
634
+ try {
635
+ const { runPostToolHooks } = require('./hooks');
636
+ runPostToolHooks(hooks, p.toolName, p.toolArgs || {}, this.name);
637
+ } catch { /* best-effort */ }
638
+ }
559
639
  return { idx, result: { tc: p.tc, result: resultStr, success: toolResult.success, toolName: p.toolName } };
560
640
  } catch (e) {
561
641
  return { idx, result: { tc: p.tc, result: `Tool '${p.toolName}' execution failed: ${e}`, success: false, toolName: p.toolName } };
@@ -578,7 +658,9 @@ export class BaseAgent {
578
658
  }
579
659
  }
580
660
 
581
- // Phase D: Record results to memory
661
+ // Phase D: Record results to memory (clamped — one runaway read_file or
662
+ // http_get must not flood the context window)
663
+ const resultLimit = Number((this.config as any)?.llm?.tool_result_limit) || undefined;
582
664
  for (const r of results) {
583
665
  if (!r) continue;
584
666
 
@@ -586,7 +668,7 @@ export class BaseAgent {
586
668
  if (suppressed) suppressed.add(r.toolName);
587
669
  }
588
670
 
589
- this.memory.addMessage('tool', r.result, {
671
+ this.memory.addMessage('tool', clampToolResult(r.result, resultLimit), {
590
672
  name: r.toolName,
591
673
  toolCallId: r.tc.id,
592
674
  ephemeral,
@@ -726,7 +808,15 @@ export class BaseAgent {
726
808
  signal?: AbortSignal
727
809
  ): AsyncGenerator<Record<string, any>> {
728
810
  await this.setState(AgentState.THINKING);
729
- this.memory.addMessage('user', message);
811
+ // Plan mode: the tag travels with the message so the model plans instead
812
+ // of acting, and the read-only tool filter below removes the temptation.
813
+ const userMessage = this.planMode
814
+ ? `[计划模式] 只读调研,不要执行任何修改。请输出一份编号的执行计划(涉及哪些文件、每步做什么、风险点),等待用户批准后再实施。\n\n${message}`
815
+ : message;
816
+ this.memory.addMessage('user', userMessage);
817
+ try {
818
+ require('./file_checkpoint').getFileCheckpoints().beginTurn(message);
819
+ } catch { /* optional */ }
730
820
  let assistantStored = false;
731
821
 
732
822
  if (this.shouldAutoCompact()) {
@@ -750,9 +840,16 @@ export class BaseAgent {
750
840
  let cacheKey: string | null = null;
751
841
 
752
842
  const resolveToolNames = (): string[] => {
753
- const key = JSON.stringify([[...suppressedTools].sort(), [...this._activeSkills].sort()]);
843
+ const key = JSON.stringify([[...suppressedTools].sort(), [...this._activeSkills].sort(), this.planMode]);
754
844
  if (toolNamesCache !== null && cacheKey === key) return toolNamesCache;
755
845
  let candidates = this.activeToolNames().filter(t => !suppressedTools.has(t));
846
+ if (this.planMode) {
847
+ candidates = candidates.filter(n => {
848
+ if (SIDE_EFFECT_TOOL_RE.test(n)) return false;
849
+ const t = this.toolRegistry.get(n);
850
+ return !(t as any)?.dangerous;
851
+ });
852
+ }
756
853
  const must = new Set<string>();
757
854
  for (const s of this._skills) {
758
855
  if (this._activeSkills.has(s.name)) {
@@ -1029,6 +1126,25 @@ export class BaseAgent {
1029
1126
  };
1030
1127
  }
1031
1128
 
1129
+ /** Per-role token breakdown for the /context command. */
1130
+ contextDetail(): Record<string, any> {
1131
+ const byRole: Record<string, { tokens: number; count: number }> = {};
1132
+ for (const m of this.memory.shortTerm) {
1133
+ const extra = (m as any).toolCalls ? JSON.stringify((m as any).toolCalls).length : 0;
1134
+ const tokens = Math.ceil(((m.content || '').length + extra) / 4);
1135
+ const slot = byRole[m.role] || (byRole[m.role] = { tokens: 0, count: 0 });
1136
+ slot.tokens += tokens;
1137
+ slot.count += 1;
1138
+ }
1139
+ return {
1140
+ ...this.contextUsage(),
1141
+ byRole,
1142
+ systemPromptTokens: Math.ceil(this._baseSystemPrompt.length / 4),
1143
+ toolCount: this.activeToolNames().length,
1144
+ activeSkills: [...this._activeSkills],
1145
+ };
1146
+ }
1147
+
1032
1148
  protected shouldAutoCompact(): boolean {
1033
1149
  const usage = this.memory.getContextWindowUsage();
1034
1150
  // Compact before hitting the real window — leave ~20% headroom for the reply.
@@ -1259,9 +1375,43 @@ export class BaseAgent {
1259
1375
 
1260
1376
  this.memory.addMessage('user', prompt);
1261
1377
  const preLen = this.memory.shortTerm.length;
1378
+ this._turnWroteFiles = false;
1379
+ try {
1380
+ require('./file_checkpoint').getFileCheckpoints().beginTurn(`[task] ${task.description}`);
1381
+ } catch { /* optional */ }
1262
1382
 
1263
1383
  try {
1264
- const response = await this.llmLoop({ onStatus, ephemeral: true });
1384
+ let response = await this.llmLoop({ onStatus, ephemeral: true });
1385
+
1386
+ // ── 验证闭环: if this task touched the filesystem and verify commands
1387
+ // are configured (config.verify or SKY.md "## Verify"), run them and
1388
+ // feed failures back for a bounded number of fix rounds. ──
1389
+ try {
1390
+ const { resolveVerifyConfig, runVerify } = require('./verify');
1391
+ const vc = resolveVerifyConfig(this.config);
1392
+ if (vc.commands.length > 0 && this._turnWroteFiles) {
1393
+ for (let round = 0; round <= vc.maxFixRounds; round++) {
1394
+ if (onStatus) onStatus(`verify: ${vc.commands.length} 条命令`);
1395
+ const vr = runVerify(vc);
1396
+ if (vr.ok) {
1397
+ response.content += `\n\n[verify ✓ 全部通过]\n${vr.report}`;
1398
+ break;
1399
+ }
1400
+ if (round === vc.maxFixRounds) {
1401
+ response.content += `\n\n[verify ✗ 经 ${vc.maxFixRounds} 轮修复仍未通过]\n${vr.report.slice(0, 1500)}`;
1402
+ break;
1403
+ }
1404
+ if (onStatus) onStatus(`verify 失败 — 修复第 ${round + 1}/${vc.maxFixRounds} 轮`);
1405
+ log.warn('verify_failed_fixing', { agent: this.name, round: round + 1 });
1406
+ this.memory.addMessage('user',
1407
+ `[自动验证失败] 以下验证命令未通过。请定位根因并修复,确保它们全部通过:\n\n${vr.report}`);
1408
+ response = await this.llmLoop({ onStatus, ephemeral: true });
1409
+ }
1410
+ }
1411
+ } catch (e) {
1412
+ log.warn('verify_loop_error', { error: String(e) });
1413
+ }
1414
+
1265
1415
  const filePaths = extractFilePathsFromMessages(this.memory.shortTerm.slice(preLen));
1266
1416
  const enriched = enrichResponseWithArtifacts(response.content, filePaths);
1267
1417
  this.memory.addMessage('assistant', enriched, { toolCalls: response.toolCalls, reasoningContent: response.reasoningContent });
@@ -1285,6 +1435,18 @@ export class BaseAgent {
1285
1435
  private _security: any = null;
1286
1436
  get security(): any { if (!this._security) { try { const { getSecurity } = require('./security'); this._security = getSecurity(); } catch { this._security = {}; } } return this._security; }
1287
1437
 
1438
+ protected getHooks(): import('./hooks').Hooks {
1439
+ if (!this._hooks) {
1440
+ try {
1441
+ const { loadHooks } = require('./hooks');
1442
+ this._hooks = loadHooks(this.config);
1443
+ } catch {
1444
+ this._hooks = { sessionStart: [], preTool: [], postTool: [] };
1445
+ }
1446
+ }
1447
+ return this._hooks!;
1448
+ }
1449
+
1288
1450
  protected async checkToolApproval(toolName: string, toolArgs: Record<string, any>): Promise<boolean> {
1289
1451
  try {
1290
1452
  const sec = this.security;
@@ -1,162 +1,162 @@
1
- /**
2
- * 多Agent冲突仲裁 — majority voting, quality scoring, tie-breaking.
3
- *
4
- * When multiple agents produce conflicting outputs on the same task
5
- * (or when a reviewer disagrees with an executor), this module
6
- * provides structured conflict resolution.
7
- */
8
-
9
- import type { TaskExecutionResult } from "./factory";
10
-
11
- /* ═══════════════════════════════════════
12
- Conflict detection
13
- ═══════════════════════════════════════ */
14
- export interface Conflict {
15
- taskId: string;
16
- results: TaskExecutionResult[];
17
- description: string;
18
- severity: "low" | "medium" | "high";
19
- }
20
-
21
- /** Detect if two results conflict based on success status and content overlap. */
22
- export function detectConflicts(results: TaskExecutionResult[]): Conflict[] {
23
- const byTask = new Map<string, TaskExecutionResult[]>();
24
- for (const r of results) { const id = r.id; if (!byTask.has(id)) byTask.set(id, []); byTask.get(id)!.push(r); }
25
-
26
- const conflicts: Conflict[] = [];
27
- for (const [id, items] of byTask) {
28
- if (items.length < 2) continue;
29
-
30
- const successes = items.filter(r => r.success);
31
- const failures = items.filter(r => !r.success);
32
-
33
- // All succeeded — check content divergence
34
- if (successes.length >= 2) {
35
- const contents = successes.map(r => (r.content || "").toLowerCase());
36
- const similarity = pairwiseSimilarity(contents);
37
- if (similarity < 0.3) {
38
- conflicts.push({ taskId: id, results: successes, description: "Multiple agents produced divergent successful outputs", severity: "medium" });
39
- }
40
- }
41
-
42
- // Mix of success and failure
43
- if (successes.length > 0 && failures.length > 0) {
44
- conflicts.push({ taskId: id, results: items, description: `${successes.length} succeeded, ${failures.length} failed — need tiebreaker`, severity: "medium" });
45
- }
46
-
47
- // All failed
48
- if (failures.length >= 2 && successes.length === 0) {
49
- conflicts.push({ taskId: id, results: failures, description: "All agents failed on this task", severity: "high" });
50
- }
51
- }
52
-
53
- return conflicts;
54
- }
55
-
56
- /* ═══════════════════════════════════════
57
- Content similarity (n-gram Jaccard)
58
- ═══════════════════════════════════════ */
59
- function pairwiseSimilarity(texts: string[]): number {
60
- if (texts.length < 2) return 1.0;
61
- let total = 0;
62
- let count = 0;
63
- for (let i = 0; i < texts.length; i++) {
64
- for (let j = i + 1; j < texts.length; j++) {
65
- total += ngramJaccard(texts[i], texts[j], 3);
66
- count++;
67
- }
68
- }
69
- return count === 0 ? 0 : total / count;
70
- }
71
-
72
- function ngramJaccard(a: string, b: string, n: number): number {
73
- const as = ngrams(a, n), bs = ngrams(b, n);
74
- if (as.size === 0 && bs.size === 0) return 1;
75
- let intersection = 0;
76
- for (const g of as) { if (bs.has(g)) intersection++; }
77
- const union = as.size + bs.size - intersection;
78
- return union === 0 ? 0 : intersection / union;
79
- }
80
-
81
- function ngrams(s: string, n: number): Set<string> {
82
- const out = new Set<string>();
83
- for (let i = 0; i <= s.length - n; i++) out.add(s.slice(i, i + n));
84
- return out;
85
- }
86
-
87
- /* ═══════════════════════════════════════
88
- Majority voting / arbitration
89
- ═══════════════════════════════════════ */
90
- export interface ArbitrationResult {
91
- winner: TaskExecutionResult;
92
- method: "unanimous" | "majority" | "tiebreaker" | "single";
93
- confidence: number; // 0-1
94
- reasoning: string;
95
- }
96
-
97
- /** Pick the best result from conflicting ones via majority vote. */
98
- export function arbitrate(results: TaskExecutionResult[]): ArbitrationResult {
99
- if (results.length === 0) throw new Error("No results to arbitrate");
100
- if (results.length === 1) return { winner: results[0], method: "single", confidence: 0.8, reasoning: "Only one result available" };
101
-
102
- const success = results.filter(r => r.success);
103
- const fail = results.filter(r => !r.success);
104
-
105
- // All agree (success)
106
- if (success.length === results.length) {
107
- const longest = success.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
108
- return { winner: longest, method: "unanimous", confidence: 0.95, reasoning: `${results.length}/${results.length} agents agreed` };
109
- }
110
-
111
- // Majority success
112
- if (success.length > fail.length) {
113
- // Pick the longest successful content (most detailed)
114
- const best = success.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
115
- return { winner: best, method: "majority", confidence: success.length / results.length, reasoning: `${success.length}/${results.length} succeeded, selected most detailed` };
116
- }
117
-
118
- // Majority failure — pick the "closest to success" (longest content)
119
- if (fail.length > success.length) {
120
- const best = fail.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
121
- return { winner: best, method: "majority", confidence: 0.3, reasoning: `Majority failed (${fail.length}/${results.length}), best-effort from partial output` };
122
- }
123
-
124
- // Tie — prefer success, or longest content
125
- const tie = success.length > 0 ? success[0] : fail[0];
126
- return { winner: tie, method: "tiebreaker", confidence: 0.5, reasoning: `Tie — selected ${tie.success ? "success" : "longest"} result` };
127
- }
128
-
129
- /* ═══════════════════════════════════════
130
- Quality scoring for individual results
131
- ═══════════════════════════════════════ */
132
- export interface QualityScore {
133
- score: number; // 0-100
134
- completeness: number; // how much of the task was addressed
135
- richness: number; // detail level of the output
136
- correctness: number; // did it match expectations (requires ground truth)
137
- }
138
-
139
- export function scoreQuality(result: TaskExecutionResult): QualityScore {
140
- const content = result.content || "";
141
-
142
- // Completeness: length is a weak proxy but useful
143
- const completeness = content.length > 500 ? 80 : content.length > 100 ? 50 : content.length > 0 ? 20 : 0;
144
-
145
- // Richness: code blocks, structured output, bullet points
146
- let richness = 50;
147
- if (/```/.test(content)) richness += 20;
148
- if (/\|.*\|.*\|/.test(content)) richness += 15; // tables
149
- if (/^[-*] /.test(content)) richness += 10; // bullets
150
- if (/\d+\./.test(content)) richness += 10; // numbered lists
151
- richness = Math.min(100, richness);
152
-
153
- // Correctness: basic sanity checks
154
- let correctness = 70;
155
- if (content.includes("Error") || content.includes("error")) correctness -= 20;
156
- if (content.includes("[REDACTED]")) correctness -= 10;
157
- if (content.includes("truncated")) correctness -= 15;
158
- correctness = Math.max(0, correctness);
159
-
160
- const score = Math.round((completeness * 0.3 + richness * 0.3 + correctness * 0.4));
161
- return { score, completeness, richness, correctness };
162
- }
1
+ /**
2
+ * 多Agent冲突仲裁 — majority voting, quality scoring, tie-breaking.
3
+ *
4
+ * When multiple agents produce conflicting outputs on the same task
5
+ * (or when a reviewer disagrees with an executor), this module
6
+ * provides structured conflict resolution.
7
+ */
8
+
9
+ import type { TaskExecutionResult } from "./factory";
10
+
11
+ /* ═══════════════════════════════════════
12
+ Conflict detection
13
+ ═══════════════════════════════════════ */
14
+ export interface Conflict {
15
+ taskId: string;
16
+ results: TaskExecutionResult[];
17
+ description: string;
18
+ severity: "low" | "medium" | "high";
19
+ }
20
+
21
+ /** Detect if two results conflict based on success status and content overlap. */
22
+ export function detectConflicts(results: TaskExecutionResult[]): Conflict[] {
23
+ const byTask = new Map<string, TaskExecutionResult[]>();
24
+ for (const r of results) { const id = r.id; if (!byTask.has(id)) byTask.set(id, []); byTask.get(id)!.push(r); }
25
+
26
+ const conflicts: Conflict[] = [];
27
+ for (const [id, items] of byTask) {
28
+ if (items.length < 2) continue;
29
+
30
+ const successes = items.filter(r => r.success);
31
+ const failures = items.filter(r => !r.success);
32
+
33
+ // All succeeded — check content divergence
34
+ if (successes.length >= 2) {
35
+ const contents = successes.map(r => (r.content || "").toLowerCase());
36
+ const similarity = pairwiseSimilarity(contents);
37
+ if (similarity < 0.3) {
38
+ conflicts.push({ taskId: id, results: successes, description: "Multiple agents produced divergent successful outputs", severity: "medium" });
39
+ }
40
+ }
41
+
42
+ // Mix of success and failure
43
+ if (successes.length > 0 && failures.length > 0) {
44
+ conflicts.push({ taskId: id, results: items, description: `${successes.length} succeeded, ${failures.length} failed — need tiebreaker`, severity: "medium" });
45
+ }
46
+
47
+ // All failed
48
+ if (failures.length >= 2 && successes.length === 0) {
49
+ conflicts.push({ taskId: id, results: failures, description: "All agents failed on this task", severity: "high" });
50
+ }
51
+ }
52
+
53
+ return conflicts;
54
+ }
55
+
56
+ /* ═══════════════════════════════════════
57
+ Content similarity (n-gram Jaccard)
58
+ ═══════════════════════════════════════ */
59
+ function pairwiseSimilarity(texts: string[]): number {
60
+ if (texts.length < 2) return 1.0;
61
+ let total = 0;
62
+ let count = 0;
63
+ for (let i = 0; i < texts.length; i++) {
64
+ for (let j = i + 1; j < texts.length; j++) {
65
+ total += ngramJaccard(texts[i], texts[j], 3);
66
+ count++;
67
+ }
68
+ }
69
+ return count === 0 ? 0 : total / count;
70
+ }
71
+
72
+ function ngramJaccard(a: string, b: string, n: number): number {
73
+ const as = ngrams(a, n), bs = ngrams(b, n);
74
+ if (as.size === 0 && bs.size === 0) return 1;
75
+ let intersection = 0;
76
+ for (const g of as) { if (bs.has(g)) intersection++; }
77
+ const union = as.size + bs.size - intersection;
78
+ return union === 0 ? 0 : intersection / union;
79
+ }
80
+
81
+ function ngrams(s: string, n: number): Set<string> {
82
+ const out = new Set<string>();
83
+ for (let i = 0; i <= s.length - n; i++) out.add(s.slice(i, i + n));
84
+ return out;
85
+ }
86
+
87
+ /* ═══════════════════════════════════════
88
+ Majority voting / arbitration
89
+ ═══════════════════════════════════════ */
90
+ export interface ArbitrationResult {
91
+ winner: TaskExecutionResult;
92
+ method: "unanimous" | "majority" | "tiebreaker" | "single";
93
+ confidence: number; // 0-1
94
+ reasoning: string;
95
+ }
96
+
97
+ /** Pick the best result from conflicting ones via majority vote. */
98
+ export function arbitrate(results: TaskExecutionResult[]): ArbitrationResult {
99
+ if (results.length === 0) throw new Error("No results to arbitrate");
100
+ if (results.length === 1) return { winner: results[0], method: "single", confidence: 0.8, reasoning: "Only one result available" };
101
+
102
+ const success = results.filter(r => r.success);
103
+ const fail = results.filter(r => !r.success);
104
+
105
+ // All agree (success)
106
+ if (success.length === results.length) {
107
+ const longest = success.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
108
+ return { winner: longest, method: "unanimous", confidence: 0.95, reasoning: `${results.length}/${results.length} agents agreed` };
109
+ }
110
+
111
+ // Majority success
112
+ if (success.length > fail.length) {
113
+ // Pick the longest successful content (most detailed)
114
+ const best = success.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
115
+ return { winner: best, method: "majority", confidence: success.length / results.length, reasoning: `${success.length}/${results.length} succeeded, selected most detailed` };
116
+ }
117
+
118
+ // Majority failure — pick the "closest to success" (longest content)
119
+ if (fail.length > success.length) {
120
+ const best = fail.reduce((a, b) => (b.content || "").length > (a.content || "").length ? b : a);
121
+ return { winner: best, method: "majority", confidence: 0.3, reasoning: `Majority failed (${fail.length}/${results.length}), best-effort from partial output` };
122
+ }
123
+
124
+ // Tie — prefer success, or longest content
125
+ const tie = success.length > 0 ? success[0] : fail[0];
126
+ return { winner: tie, method: "tiebreaker", confidence: 0.5, reasoning: `Tie — selected ${tie.success ? "success" : "longest"} result` };
127
+ }
128
+
129
+ /* ═══════════════════════════════════════
130
+ Quality scoring for individual results
131
+ ═══════════════════════════════════════ */
132
+ export interface QualityScore {
133
+ score: number; // 0-100
134
+ completeness: number; // how much of the task was addressed
135
+ richness: number; // detail level of the output
136
+ correctness: number; // did it match expectations (requires ground truth)
137
+ }
138
+
139
+ export function scoreQuality(result: TaskExecutionResult): QualityScore {
140
+ const content = result.content || "";
141
+
142
+ // Completeness: length is a weak proxy but useful
143
+ const completeness = content.length > 500 ? 80 : content.length > 100 ? 50 : content.length > 0 ? 20 : 0;
144
+
145
+ // Richness: code blocks, structured output, bullet points
146
+ let richness = 50;
147
+ if (/```/.test(content)) richness += 20;
148
+ if (/\|.*\|.*\|/.test(content)) richness += 15; // tables
149
+ if (/^[-*] /.test(content)) richness += 10; // bullets
150
+ if (/\d+\./.test(content)) richness += 10; // numbered lists
151
+ richness = Math.min(100, richness);
152
+
153
+ // Correctness: basic sanity checks
154
+ let correctness = 70;
155
+ if (content.includes("Error") || content.includes("error")) correctness -= 20;
156
+ if (content.includes("[REDACTED]")) correctness -= 10;
157
+ if (content.includes("truncated")) correctness -= 15;
158
+ correctness = Math.max(0, correctness);
159
+
160
+ const score = Math.round((completeness * 0.3 + richness * 0.3 + correctness * 0.4));
161
+ return { score, completeness, richness, correctness };
162
+ }