universal-dev-standards 5.1.0-beta.6 → 5.1.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/bin/uds.js +12 -0
  2. package/bundled/ai/standards/agent-communication-protocol.ai.yaml +34 -0
  3. package/bundled/ai/standards/anti-sycophancy-prompting.ai.yaml +111 -0
  4. package/bundled/ai/standards/capability-declaration.ai.yaml +113 -0
  5. package/bundled/ai/standards/circuit-breaker.ai.yaml +93 -0
  6. package/bundled/ai/standards/developer-memory.ai.yaml +13 -0
  7. package/bundled/ai/standards/dual-phase-output.ai.yaml +108 -0
  8. package/bundled/ai/standards/failure-source-taxonomy.ai.yaml +115 -0
  9. package/bundled/ai/standards/frontend-design-standards.ai.yaml +305 -0
  10. package/bundled/ai/standards/health-check-standards.ai.yaml +140 -0
  11. package/bundled/ai/standards/immutability-first.ai.yaml +112 -0
  12. package/bundled/ai/standards/model-selection.ai.yaml +111 -3
  13. package/bundled/ai/standards/packaging-standards.ai.yaml +142 -0
  14. package/bundled/ai/standards/recovery-recipe-registry.ai.yaml +200 -0
  15. package/bundled/ai/standards/retry-standards.ai.yaml +134 -0
  16. package/bundled/ai/standards/security-decision.ai.yaml +87 -0
  17. package/bundled/ai/standards/skill-standard-alignment-check.ai.yaml +119 -0
  18. package/bundled/ai/standards/standard-admission-criteria.ai.yaml +107 -0
  19. package/bundled/ai/standards/standard-lifecycle-management.ai.yaml +144 -0
  20. package/bundled/ai/standards/timeout-standards.ai.yaml +104 -0
  21. package/bundled/ai/standards/token-budget.ai.yaml +108 -0
  22. package/bundled/core/anti-sycophancy-prompting.md +184 -0
  23. package/bundled/core/capability-declaration.md +59 -0
  24. package/bundled/core/circuit-breaker.md +58 -0
  25. package/bundled/core/developer-memory.md +29 -1
  26. package/bundled/core/dual-phase-output.md +56 -0
  27. package/bundled/core/failure-source-taxonomy.md +72 -0
  28. package/bundled/core/frontend-design-standards.md +474 -0
  29. package/bundled/core/health-check-standards.md +72 -0
  30. package/bundled/core/immutability-first.md +105 -0
  31. package/bundled/core/model-selection.md +80 -0
  32. package/bundled/core/packaging-standards.md +216 -0
  33. package/bundled/core/recovery-recipe-registry.md +69 -0
  34. package/bundled/core/retry-standards.md +62 -0
  35. package/bundled/core/security-decision.md +65 -0
  36. package/bundled/core/skill-standard-alignment-check.md +79 -0
  37. package/bundled/core/standard-admission-criteria.md +84 -0
  38. package/bundled/core/standard-lifecycle-management.md +94 -0
  39. package/bundled/core/timeout-standards.md +63 -0
  40. package/bundled/core/token-budget.md +58 -0
  41. package/bundled/locales/zh-CN/CHANGELOG.md +22 -3
  42. package/bundled/locales/zh-CN/README.md +1 -1
  43. package/bundled/locales/zh-TW/CHANGELOG.md +22 -3
  44. package/bundled/locales/zh-TW/README.md +1 -1
  45. package/bundled/locales/zh-TW/core/anti-sycophancy-prompting.md +184 -0
  46. package/bundled/locales/zh-TW/core/packaging-standards.md +224 -0
  47. package/bundled/skills/e2e-assistant/SKILL.md +19 -5
  48. package/bundled/skills/testing-guide/SKILL.md +5 -0
  49. package/bundled/skills/testing-guide/test-skeleton-templates.md +316 -0
  50. package/package.json +1 -1
  51. package/src/commands/config.js +9 -0
  52. package/src/commands/init.js +91 -46
  53. package/src/commands/mcp.js +26 -0
  54. package/src/commands/run-intent.js +66 -0
  55. package/src/commands/update.js +35 -4
  56. package/src/core/command-router.js +85 -0
  57. package/src/core/project-config.js +91 -0
  58. package/src/flows/init-flow.js +6 -1
  59. package/src/i18n/messages.js +6 -6
  60. package/src/mcp/__tests__/server.test.js +251 -0
  61. package/src/mcp/server.js +352 -0
  62. package/src/prompts/init.js +157 -1
  63. package/src/reconciler/actual-state-scanner.js +24 -0
  64. package/src/uninstallers/hook-uninstaller.js +32 -1
  65. package/src/utils/e2e-analyzer.js +88 -5
  66. package/src/utils/e2e-detector.js +73 -1
  67. package/src/utils/integration-generator.js +22 -3
  68. package/standards-registry.json +193 -5
package/bin/uds.js CHANGED
@@ -26,6 +26,8 @@ import { releaseCommand } from '../src/commands/release.js';
26
26
  import { compileStandards } from '../src/commands/compile.js';
27
27
  import { flowCreateCommand, flowListCommand, flowValidateCommand, flowDiffCommand, flowExportCommand, flowImportCommand } from '../src/commands/flow.js';
28
28
  import { generateReport } from '../src/commands/report.js';
29
+ import { mcpCommand } from '../src/commands/mcp.js';
30
+ import { runIntentCommand } from '../src/commands/run-intent.js';
29
31
  import { setLanguage, setLanguageExplicit, detectLanguage, t } from '../src/i18n/messages.js';
30
32
  import { maybeCheckForUpdates, formatUpdateNotice, shouldCheckUpdateForCommand } from '../src/utils/update-checker.js';
31
33
  import { config } from '../src/utils/config-manager.js';
@@ -539,4 +541,14 @@ missionCommand
539
541
  .option('-s, --state <state>', 'Filter by state')
540
542
  .action(missionListCommand);
541
543
 
544
+ // MCP command for AI tool integration
545
+ mcpCommand(program);
546
+
547
+ // uds run <intent> — language-agnostic command proxy (XSPEC-029)
548
+ program
549
+ .command('run <intent>')
550
+ .description('Run a project command by intent (test/lint/build/security) via uds.project.yaml')
551
+ .option('--dry-run', 'Show resolved command without executing')
552
+ .action(runIntentCommand);
553
+
542
554
  program.parse();
@@ -108,6 +108,36 @@ standard:
108
108
  - field: constraints
109
109
  description: "約束條件 [string]"
110
110
 
111
+ hook_exit_codes:
112
+ description: >
113
+ Hook 退出碼三分類(借鑑 lintsinghua/claude-code-book Ch.7 Hooks 協議)。
114
+ 適用於所有 DevAP/VibeOps 生成的 Claude Code hook shell 腳本。
115
+ codes:
116
+ - code: 0
117
+ name: pass
118
+ description: "Hook 通過,工具/Agent 執行繼續"
119
+ blocking: false
120
+ stdout_handling: "忽略"
121
+ stderr_handling: "忽略"
122
+ - code: 2
123
+ name: block
124
+ description: "阻止執行;stderr 輸出作為回饋文字直接注入 AI 上下文"
125
+ blocking: true
126
+ stdout_handling: "忽略"
127
+ stderr_handling: "作為 AI feedback 注入(支援 JSON 格式 decision:block/reason)"
128
+ note: "PreToolUse exit 2 → 阻止工具呼叫;Stop exit 2 → 中止 session"
129
+ - code: other
130
+ name: warn
131
+ description: "非阻擋性警告;記錄日誌但執行繼續(exit 1 為常用警告代碼)"
132
+ blocking: false
133
+ stdout_handling: "記錄到系統日誌"
134
+ stderr_handling: "顯示為 warning,不注入 AI 上下文"
135
+ rules:
136
+ - "MUST: 阻擋行為只能用 exit 2,不得用 exit 1 或其他非零代碼"
137
+ - "MUST: exit 2 時 stderr 必須是人類可讀的原因(推薦 JSON: {reason: string})"
138
+ - "SHOULD: 軟性警告(deprecated pattern、style 違反)使用 exit 1"
139
+ - "MAY: exit 0 時可輸出 JSON 到 stdout(Stop hook 用 decision:block 要求 AI 繼續)"
140
+
111
141
  rules:
112
142
  - id: ACP-001
113
143
  trigger: "訊息缺少必要欄位"
@@ -129,6 +159,10 @@ standard:
129
159
  trigger: "建立 Handoff"
130
160
  action: "引用 artifact_id,不嵌入完整內容"
131
161
  priority: medium
162
+ - id: ACP-006
163
+ trigger: "生成 hook 腳本"
164
+ action: "依照 hook_exit_codes 三分類:0=pass, 2=block+feedback, 其他=warn"
165
+ priority: high
132
166
 
133
167
  physical_spec:
134
168
  type: checklist
@@ -0,0 +1,111 @@
1
+ # Anti-Sycophancy Prompting Standards - AI Optimized
2
+ # Source: core/anti-sycophancy-prompting.md
3
+
4
+ id: anti-sycophancy-prompting
5
+ meta:
6
+ version: "1.0.0"
7
+ updated: "2026-04-15"
8
+ source: core/anti-sycophancy-prompting.md
9
+ description: Techniques and rules for designing prompts that elicit genuine, critical LLM responses rather than sycophantic agreement
10
+
11
+ principles:
12
+ socratic_critique:
13
+ rule: Reframe evaluation tasks as critique tasks to eliminate sycophancy incentives
14
+ do:
15
+ - Ask for fatal objections rather than approval
16
+ - Require technically grounded, non-trivial objections
17
+ - Prohibit positive opening phrases
18
+ do_not:
19
+ - Ask "is this a good idea?" without specifying critique mode
20
+ - Accept "Great idea, but..." as valid critique framing
21
+
22
+ anchor_prevention:
23
+ rule: Obtain independent LLM judgment before revealing user position
24
+ steps:
25
+ - Ask for neutral comparison without revealing preference
26
+ - Receive independent judgment
27
+ - Reveal user position
28
+ - Require explicit technical justification for any stance change
29
+
30
+ symmetric_output:
31
+ rule: Use format constraints to force balanced opposing viewpoints
32
+ format: "| Arguments FOR | Arguments AGAINST | — Net Recommendation: [clear stance]"
33
+ constraints:
34
+ - Both columns must have similar length (< 20% difference)
35
+ - Net recommendation must be explicit and may be negative
36
+
37
+ confidence_labeling:
38
+ rule: Require confidence scores on all recommendations
39
+ format: "Confidence: [1-5] — [reason for uncertainty]"
40
+ scale:
41
+ 5: Validated at similar scale, high certainty
42
+ 4: Industry standard with sufficient documentation
43
+ 3: Reasonable inference, PoC recommended
44
+ 2: Uncertain, Spike strongly recommended
45
+ 1: Highly uncertain, not recommended for direct adoption
46
+ constraints:
47
+ - Confidence < 3 must include "More information needed"
48
+ - All major claims require confidence labeling
49
+
50
+ sycophancy_detection:
51
+ rule: Detect sycophantic response patterns for automated re-evaluation
52
+ signals:
53
+ - id: positive-opener
54
+ pattern: Response starts with agreeable phrase within first 50 tokens
55
+ examples: ["great", "interesting", "certainly", "of course", "absolutely"]
56
+ - id: position-flip
57
+ pattern: Model reverses stance after user reveals preference without new evidence
58
+ - id: risk-minimization
59
+ pattern: "While there are some minor issues, overall..."
60
+ - id: missing-quantification
61
+ pattern: Major recommendation lacks confidence score or specific metrics
62
+ trigger: If 2+ signals detected, invoke re-evaluation with Red Team framing
63
+
64
+ prohibited_behaviors:
65
+ - id: positive-opener
66
+ description: Do NOT open critique with positive affirmation
67
+ correct_action: Start directly with analysis
68
+
69
+ - id: unsupported-flip
70
+ description: Do NOT reverse stance after user reveals preference without new technical evidence
71
+ correct_action: Maintain position or cite specific new information
72
+
73
+ - id: unquantified-risk
74
+ description: Do NOT describe risks as "minor" without evidence
75
+ correct_action: Quantify risk or explain why it is bounded
76
+
77
+ - id: missing-confidence
78
+ description: Do NOT provide major recommendations without confidence level
79
+ correct_action: Always include confidence (1-5) and uncertainty statement
80
+
81
+ agent_application:
82
+ code_review:
83
+ apply: [socratic_critique, symmetric_output, sycophancy_detection]
84
+ architecture_advisor:
85
+ apply: [anchor_prevention, confidence_labeling, sycophancy_detection]
86
+ bug_analysis:
87
+ apply: [socratic_critique, confidence_labeling]
88
+ general_consultation:
89
+ apply: [symmetric_output, confidence_labeling]
90
+
91
+ complete_template: |
92
+ You are a domain expert with no emotional investment in my satisfaction.
93
+ Your role is to identify flaws in my thinking, not to make me feel good.
94
+
95
+ Rules:
96
+ - Do NOT open with positive phrases (good, interesting, nice, certainly)
97
+ - Every recommendation must include a confidence level (1-5) and what you are uncertain about
98
+ - If my direction is wrong, say so directly
99
+
100
+ My question: [question]
101
+
102
+ First, list the incorrect assumptions I may be holding about this problem.
103
+ Then give your honest recommendation.
104
+
105
+ checklist:
106
+ - Prompt does not invite agreement
107
+ - Positive opening phrases explicitly prohibited
108
+ - Independent stance obtained before revealing user preference (if applicable)
109
+ - Dual-column format enforced for evaluation tasks
110
+ - Confidence levels required on major recommendations
111
+ - Sycophancy detection applied to output
@@ -0,0 +1,113 @@
1
+ # Capability Declaration Standard - AI Optimized
2
+ # Source: XSPEC-037 (claude-code-book Ch.3 Fail-Closed buildTool factory)
3
+
4
+ standard:
5
+ id: capability-declaration
6
+ name: Capability Declaration Standard
7
+ description: Fail-Closed 能力聲明 — 工具/Adapter/Agent 必須顯式聲明安全性,缺省為最保守預設
8
+
9
+ meta:
10
+ version: "1.0.0"
11
+ updated: "2026-04-15"
12
+ source: XSPEC-037
13
+ description: >
14
+ 所有工具、Adapter 和 Agent 能力預設為「不安全、需授權」。
15
+ 開發者必須顯式聲明 isConcurrencySafe: true 才能享受並行優化。
16
+ 「忘記設權限」的結果是保守行為而非危險行為。
17
+ scope: universal
18
+ borrowed_from: "claude-code-book Ch.3 buildTool factory, isConcurrencySafe/isReadOnly default false"
19
+
20
+ guidelines:
21
+ - "所有工具、Adapter、Agent 必須實作 CapabilityDeclaration(即使使用預設值)"
22
+ - "isConcurrencySafe 和 isReadOnly 預設為 false — 必須顯式聲明才能解鎖優化路徑"
23
+ - "框架必須在缺少聲明時使用 FAIL_CLOSED_DEFAULTS,並記錄警告"
24
+ - "聲明必須反映實際能力,虛假聲明(如謊稱 isReadOnly)視為安全漏洞"
25
+ - "trustLevel 影響沙箱隔離強度,不可降低至低於 userSettings 允許的等級"
26
+
27
+ interface:
28
+ CapabilityDeclaration:
29
+ fields:
30
+ isConcurrencySafe:
31
+ type: boolean
32
+ default: false
33
+ description: "是否對並行執行安全(無競態、無共享可變狀態)。預設 false。"
34
+ unlock: "設為 true 後可加入並行批次執行"
35
+ isReadOnly:
36
+ type: boolean
37
+ default: false
38
+ description: "是否為純讀取操作(不修改任何持久化狀態)。預設 false。"
39
+ unlock: "設為 true 後可跳過寫入相關的 Safety Hook 階段"
40
+ requiresUserConfirmation:
41
+ type: boolean
42
+ default: true
43
+ description: "執行前是否需要使用者明確確認。預設 true。"
44
+ unlock: "設為 false 後進入自動執行模式(需 userSettings 允許)"
45
+ trustLevel:
46
+ type: enum
47
+ values: [trusted, sandboxed, untrusted]
48
+ default: untrusted
49
+ description: "工具的信任等級,影響沙箱隔離強度"
50
+ mapping:
51
+ trusted: "內建工具或已審核插件,無沙箱限制"
52
+ sandboxed: "第三方工具,在受限環境中執行"
53
+ untrusted: "未知來源,最嚴格限制(預設)"
54
+
55
+ fail_closed_defaults:
56
+ isConcurrencySafe: false
57
+ isReadOnly: false
58
+ requiresUserConfirmation: true
59
+ trustLevel: untrusted
60
+ log_on_use:
61
+ level: warn
62
+ message: "[WARN] Capability not declared, using Fail-Closed defaults for: {component_name}"
63
+
64
+ well_known_declarations:
65
+ note: "常見工具的建議聲明(供參考,各專案可調整)"
66
+ examples:
67
+ GrepTool:
68
+ isConcurrencySafe: true
69
+ isReadOnly: true
70
+ requiresUserConfirmation: false
71
+ trustLevel: trusted
72
+ GlobTool:
73
+ isConcurrencySafe: true
74
+ isReadOnly: true
75
+ requiresUserConfirmation: false
76
+ trustLevel: trusted
77
+ FileReadTool:
78
+ isConcurrencySafe: true
79
+ isReadOnly: true
80
+ requiresUserConfirmation: false
81
+ trustLevel: trusted
82
+ FileEditTool:
83
+ isConcurrencySafe: false
84
+ isReadOnly: false
85
+ requiresUserConfirmation: true
86
+ trustLevel: trusted
87
+ BashTool:
88
+ isConcurrencySafe: false
89
+ isReadOnly: false
90
+ requiresUserConfirmation: true
91
+ trustLevel: sandboxed
92
+
93
+ enforcement:
94
+ on_missing_declaration:
95
+ action: "使用 FAIL_CLOSED_DEFAULTS"
96
+ log: true
97
+ on_false_claim:
98
+ description: "聲明 isReadOnly: true 但實際執行了寫入"
99
+ detection: "runtime 監控 + 審計日誌"
100
+ consequence: "記錄 CAPABILITY_MISMATCH 事件,降級至 FAIL_CLOSED_DEFAULTS"
101
+
102
+ applicable_components:
103
+ - "DevAP AgentAdapter(ClaudeAdapter / OpenCodeAdapter / CliAdapter)"
104
+ - "DevAP Tool 呼叫系統"
105
+ - "VibeOps ToolExecutor"
106
+ - "VibeOps Agent(planner / builder / evaluator 等)"
107
+ - "所有 MCP 工具插件"
108
+
109
+ error_codes:
110
+ CAP-001: "CAPABILITY_NOT_DECLARED — 使用 Fail-Closed 預設"
111
+ CAP-002: "CAPABILITY_MISMATCH — 實際行為與聲明不符"
112
+ CAP-003: "TRUST_LEVEL_INSUFFICIENT — trustLevel 低於場景要求"
113
+ CAP-004: "CONCURRENT_UNSAFE — 嘗試並行執行 isConcurrencySafe: false 的組件"
@@ -0,0 +1,93 @@
1
+ # Circuit Breaker Standard - AI Optimized
2
+ # Source: XSPEC-036 (claude-code-book Ch.2 MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES)
3
+
4
+ standard:
5
+ id: circuit-breaker
6
+ name: Circuit Breaker Standard
7
+ description: 通用斷路器模式 — 連續失敗後開路,防止 API 呼叫雪崩
8
+
9
+ meta:
10
+ version: "1.0.0"
11
+ updated: "2026-04-15"
12
+ source: XSPEC-036
13
+ description: >
14
+ 任何依賴外部 API 或重試機制的 Agent 組件都應使用斷路器保護。
15
+ 書中實測:引入斷路器前每日浪費 ~250K API 呼叫(1279 個 session 各超過 50 次連續失敗)。
16
+ scope: universal
17
+ borrowed_from: "claude-code-book Ch.2 circuit breaker, MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3"
18
+
19
+ guidelines:
20
+ - "任何重試機制必須使用斷路器包裝,不得直接無限重試"
21
+ - "斷路器狀態必須透過遙測可觀測(circuit_breaker_state_change 事件)"
22
+ - "OPEN 狀態下的請求必須立即失敗(fail fast),不等待 timeout"
23
+ - "failureThreshold 預設值為 3,與 claude-code-book 及 DevAP Fix Loop 一致"
24
+ - "斷路器必須按照「功能單元」建立,不得全域共享單一斷路器"
25
+
26
+ states:
27
+ CLOSED:
28
+ description: "正常運作,請求正常轉發"
29
+ transition_to_OPEN: "連續失敗次數 >= failureThreshold"
30
+ reset_condition: "任一成功呼叫重設失敗計數器"
31
+ OPEN:
32
+ description: "開路,立即拒絕所有請求"
33
+ action: "回傳 CircuitOpenError,不實際執行"
34
+ transition_to_HALF_OPEN: "等待 cooldownMs 後自動進入"
35
+ HALF_OPEN:
36
+ description: "半開,允許一次探針呼叫"
37
+ on_probe_success: "→ CLOSED,重設計數器"
38
+ on_probe_failure: "→ OPEN,重設 cooldown"
39
+
40
+ interface:
41
+ CircuitBreaker:
42
+ fields:
43
+ name: string
44
+ state: "CLOSED | HALF_OPEN | OPEN"
45
+ methods:
46
+ execute: "async <T>(fn: () => Promise<T>) => Promise<T>"
47
+ getState: "() => CircuitBreakerState"
48
+ reset: "() => void # 手動重設(管理員用)"
49
+
50
+ CircuitBreakerConfig:
51
+ fields:
52
+ failureThreshold:
53
+ type: number
54
+ default: 3
55
+ description: "連續失敗 N 次後開路"
56
+ cooldownMs:
57
+ type: number
58
+ default: 30000
59
+ description: "OPEN → HALF_OPEN 等待時間(毫秒)"
60
+ successThreshold:
61
+ type: number
62
+ default: 1
63
+ description: "HALF_OPEN → CLOSED 需要的連續成功次數"
64
+
65
+ CircuitOpenError:
66
+ fields:
67
+ code: "CIRCUIT_OPEN"
68
+ breakerName: string
69
+ state: "OPEN"
70
+ cooldownRemainingMs: number
71
+
72
+ telemetry_events:
73
+ circuit_breaker_state_change:
74
+ fields:
75
+ breaker_name: string
76
+ from_state: "CLOSED | HALF_OPEN | OPEN"
77
+ to_state: "CLOSED | HALF_OPEN | OPEN"
78
+ failure_count: number
79
+ timestamp: string
80
+ when: "每次狀態轉換時上傳"
81
+
82
+ applicable_scenarios:
83
+ - "DevAP Fix Loop Agent 呼叫重試"
84
+ - "DevAP Judge / Quality Gate 重試"
85
+ - "DevAP API 呼叫(LLM API 不穩定保護)"
86
+ - "VibeOps Feedback Loop 重試"
87
+ - "VibeOps FLARE 主動檢索重試"
88
+ - "VibeOps AutoCompact(原始靈感來源)"
89
+
90
+ error_codes:
91
+ CB-001: "CIRCUIT_OPEN — 斷路器開路,請求被拒絕"
92
+ CB-002: "PROBE_FAILED — HALF_OPEN 探針失敗,重新開路"
93
+ CB-003: "INVALID_CONFIG — failureThreshold 必須 >= 1"
@@ -154,6 +154,19 @@ standard:
154
154
  Upgrade path: pitfalls → pattern → mental-model.
155
155
  priority: recommended
156
156
 
157
+ - id: memory-as-hint-not-conclusion
158
+ trigger: "before acting on any recalled memory (file paths, function names, API flags, repo state)"
159
+ instruction: >
160
+ 記憶是線索,不是結論(借鑑 lintsinghua/claude-code-book 記憶驗證原則)。
161
+ 使用記憶前必須驗證其仍然成立:
162
+ - 記憶提到檔案路徑 → 先確認檔案存在(Glob/Read)
163
+ - 記憶提到函式或 flag → 先確認仍存在(Grep)
164
+ - 記憶描述 repo 架構快照 → 優先信任 git log / 原始碼
165
+ - 若記憶內容與現況衝突 → 信任現況,標記記憶為 needs-revision
166
+ 禁止:直接引用記憶中的具體 API/路徑/函式名稱作為事實,
167
+ 未獨立驗證即向使用者推薦操作。
168
+ priority: required
169
+
157
170
  - id: noise-control
158
171
  trigger: surfacing memories
159
172
  instruction: >
@@ -0,0 +1,108 @@
1
+ # Dual-Phase LLM Output Standard - AI Optimized
2
+ # Source: XSPEC-035 (claude-code-book Ch.7 AutoCompact dual-phase design)
3
+
4
+ standard:
5
+ id: dual-phase-output
6
+ name: Dual-Phase LLM Output Standard
7
+ description: 雙階段 LLM 輸出模式 — <analysis> 思考丟棄,<summary> 結構化保留
8
+
9
+ meta:
10
+ version: "1.0.0"
11
+ updated: "2026-04-15"
12
+ source: XSPEC-035
13
+ description: >
14
+ 讓 LLM 充分推理的同時,避免思考過程累積在上下文中消耗 token 預算。
15
+ 適用於所有需要 LLM 審查的場景:Judge、Evaluator、Guardian、AutoCompact。
16
+ scope: universal
17
+ borrowed_from: "claude-code-book Ch.7 formatCompactSummary dual-phase output"
18
+
19
+ guidelines:
20
+ - "所有 LLM 審查 Agent 必須要求雙階段輸出格式(<analysis> + <summary>)"
21
+ - "<analysis> 在後處理時必須丟棄,不得寫入持久化上下文或對話歷史"
22
+ - "<summary> 必須包含結構化結論欄位(decision、confidence、findings、next_action)"
23
+ - "若回應缺少雙階段格式,後處理器必須降級相容(完整回應視為 summary)並記錄警告"
24
+ - "summary 欄位命名需遵循本標準,各應用場景可擴充但不可刪減核心欄位"
25
+
26
+ format:
27
+ xml_tags:
28
+ analysis:
29
+ purpose: "LLM 思考草稿 — 後處理時丟棄"
30
+ required: true
31
+ content_guidance:
32
+ - "逐條審查邏輯"
33
+ - "邊界情境考量"
34
+ - "替代方案比較"
35
+ summary:
36
+ purpose: "結構化結論 — 後處理時保留"
37
+ required: true
38
+ core_fields:
39
+ decision:
40
+ type: enum
41
+ values: [approved, rejected, needs_revision]
42
+ required: true
43
+ confidence:
44
+ type: enum
45
+ values: [high, medium, low]
46
+ required: true
47
+ findings:
48
+ type: array
49
+ item_format: "[type] description"
50
+ required: true
51
+ next_action:
52
+ type: string
53
+ required: true
54
+ extension_fields:
55
+ note: "各應用場景可新增欄位,不可刪減上述核心欄位"
56
+ examples:
57
+ security: "severity: critical | high | medium | low, cwe_ids: [CWE-NNN]"
58
+ quality: "test_coverage: number, tech_debt_score: number"
59
+
60
+ prompt_template: |
61
+ You MUST respond using EXACTLY this two-phase XML structure:
62
+
63
+ <analysis>
64
+ [Your reasoning process — will be DISCARDED after processing]
65
+ - Step-by-step evaluation
66
+ - Edge case considerations
67
+ - Alternative comparisons
68
+ </analysis>
69
+
70
+ <summary>
71
+ decision: approved | rejected | needs_revision
72
+ confidence: high | medium | low
73
+ findings:
74
+ - [type] description
75
+ next_action: [recommended follow-up action]
76
+ </summary>
77
+
78
+ IMPORTANT: The <analysis> block is your scratchpad. Only <summary> persists.
79
+
80
+ post_processing:
81
+ extract_summary:
82
+ pattern: "<summary>([\\s\\S]*?)</summary>"
83
+ on_missing: fallback_to_full_response
84
+ discard_analysis:
85
+ pattern: "<analysis>([\\s\\S]*?)</analysis>"
86
+ action: discard
87
+ fallback:
88
+ trigger: "summary tag not found in response"
89
+ action: "use full response as summary content"
90
+ log_level: warn
91
+ message: "[WARN] dual-phase format missing, fallback to full response"
92
+
93
+ token_impact:
94
+ analysis_ratio: "50-70% of typical review response"
95
+ savings_per_review: "1000–3500 tokens"
96
+ savings_in_fix_loop_3x: "3000–10500 tokens"
97
+ note: "Savings accumulate in repeated review scenarios (Fix Loop, Feedback Loop)"
98
+
99
+ applicable_agents:
100
+ - DevAP Judge Agent
101
+ - VibeOps Evaluator Agent
102
+ - VibeOps Guardian Agent
103
+ - Any LLM-driven AutoCompact / summarization component
104
+
105
+ error_codes:
106
+ DPO-001: "summary tag missing — fallback activated"
107
+ DPO-002: "analysis tag missing — full response processed as-is"
108
+ DPO-003: "required summary field absent — parsing error"
@@ -0,0 +1,115 @@
1
+ # Failure Source Taxonomy Standard - AI Optimized
2
+ # Source: XSPEC-045 (claw-code ROADMAP Phase 2 Failure Taxonomy, DEC-035)
3
+
4
+ standard:
5
+ id: failure-source-taxonomy
6
+ name: Failure Source Taxonomy Standard
7
+ description: 失敗來源分類法 — 在 TaskStatus(what)之上新增 failureSource(why)維度,8 類結構化失敗來源
8
+
9
+ meta:
10
+ version: "1.0.0"
11
+ updated: "2026-04-16"
12
+ source: XSPEC-045
13
+ description: >
14
+ 現有的 TaskStatus 只回答「發生了什麼」(what),failureSource 補充「為什麼失敗」(why)。
15
+ 結構化的失敗來源使下游恢復機制(Recovery Recipe Registry, XSPEC-046)能精準匹配策略,
16
+ 避免用同一套重試邏輯處理本質不同的失敗類型。
17
+ scope: universal
18
+ borrowed_from: "ultraworkers/claw-code ROADMAP Phase 2 Failure Taxonomy (adapted for LLM Agent context)"
19
+
20
+ guidelines:
21
+ - "所有失敗結果應攜帶 failureSource,使恢復策略可精準匹配"
22
+ - "failureSource 為 optional 欄位,不得破壞現有不含此欄位的程式碼"
23
+ - "在同一失敗事件中,選擇最根本的來源作為 failureSource(例如 branch_divergence 比 compilation 更根本)"
24
+ - "failureSource 應由偵測到失敗的元件設定(QualityGate / Adapter / SafetyHook / BranchDriftChecker)"
25
+ - "跨專案(DevAP / VibeOps)各自獨立定義 FailureSource type,語義保持一致"
26
+
27
+ failure_sources:
28
+ prompt_delivery:
29
+ description: "Prompt 未正確傳遞給 LLM(API 4xx、空回應、格式解析失敗)"
30
+ detection_hint: "API 回傳 4xx / 空回應 / JSON 解析失敗"
31
+ recommended_recovery: "重試或 model_switch"
32
+ severity_range: [critical, high]
33
+
34
+ model_degradation:
35
+ description: "LLM 降智或回應品質明顯下降(重複輸出、無關回應、品質驟降)"
36
+ detection_hint: "輸出品質評分低於基準線 / 連續重複輸出 / 評估分數 < 30"
37
+ recommended_recovery: "model_switch"
38
+ severity_range: [critical, high, medium]
39
+
40
+ branch_divergence:
41
+ description: "工作分支落後基底分支,可能導致合併衝突或假回歸"
42
+ detection_hint: "git rev-list --count HEAD..origin/{baseBranch} > 0"
43
+ recommended_recovery: "rebase_and_retry"
44
+ severity_range: [critical, high, medium]
45
+ note: "severity 由落後 commit 數決定:1-5 為 medium,6+ 為 high/critical"
46
+
47
+ compilation:
48
+ description: "編譯或型別檢查錯誤(TypeScript tsc、Go build、Rust cargo 等)"
49
+ detection_hint: "build / tsc / compile 指令 exit code != 0"
50
+ recommended_recovery: "fix_loop"
51
+ severity_range: [high, medium, low]
52
+
53
+ test_failure:
54
+ description: "測試失敗(unit / integration / system / e2e 任一層級)"
55
+ detection_hint: "test 指令 exit code != 0"
56
+ recommended_recovery: "fix_loop"
57
+ severity_range: [high, medium, low]
58
+
59
+ tool_failure:
60
+ description: "工具層失敗(MCP server 無回應、Plugin 載入失敗、CLI 工具不存在)"
61
+ detection_hint: "MCP / Plugin / shell 工具執行失敗或 timeout"
62
+ recommended_recovery: "circuit_breaker 保護後重試,或降級模式繼續"
63
+ severity_range: [critical, high, medium]
64
+
65
+ policy_violation:
66
+ description: "安全或治理策略攔截(Guardian deny、SafetyHook 阻擋、Fail-Closed 觸發)"
67
+ detection_hint: "SecurityDecision 為 deny / Guardian verdict 為 blocking: true"
68
+ recommended_recovery: "human_checkpoint(不自動重試,需人工審查)"
69
+ severity_range: [critical, high]
70
+
71
+ resource_exhaustion:
72
+ description: "資源耗盡(token 預算超限、時間 timeout、美元預算耗盡)"
73
+ detection_hint: "error_max_turns / error_max_budget_usd / token zone BLOCKING"
74
+ recommended_recovery: "degraded_mode 或 human_checkpoint"
75
+ severity_range: [critical, high]
76
+
77
+ types:
78
+ FailureSource:
79
+ description: "8 類失敗來源的 union type"
80
+ values:
81
+ - prompt_delivery
82
+ - model_degradation
83
+ - branch_divergence
84
+ - compilation
85
+ - test_failure
86
+ - tool_failure
87
+ - policy_violation
88
+ - resource_exhaustion
89
+
90
+ FailureDetail:
91
+ description: "結構化失敗細節"
92
+ fields:
93
+ source: FailureSource
94
+ raw_error: string
95
+ detected_by: "string # 偵測元件名稱(quality-gate / claude-adapter / safety-hook / branch-drift)"
96
+ timestamp: "string # ISO 8601"
97
+
98
+ priority_rules:
99
+ description: "當多個失敗來源並存時的優先級規則"
100
+ rules:
101
+ - "branch_divergence > compilation(分支漂移通常是 compilation 失敗的根因)"
102
+ - "policy_violation > 其他(安全優先,不嘗試繞過)"
103
+ - "resource_exhaustion > 其他(資源耗盡時無意義重試)"
104
+ - "其他情況取最先偵測到的來源"
105
+
106
+ integration_points:
107
+ devap:
108
+ files:
109
+ - "packages/core/src/types.ts — TaskResult.failureSource / FailureSource type"
110
+ - "packages/core/src/quality-gate.ts — QualityGateResult.failureSource 推斷"
111
+ - "packages/adapter-claude/src/claude-adapter.ts — resource_exhaustion 映射"
112
+ vibeops:
113
+ files:
114
+ - "src/types/index.ts — IterationRecord.failureSource(獨立定義,AGPL 隔離)"
115
+ - "src/runner/pipeline-runner.ts — agent:error 事件 payload"