universal-dev-standards 5.1.0-beta.6 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +6 -0
  2. package/bin/uds.js +14 -0
  3. package/bundled/ai/standards/agent-communication-protocol.ai.yaml +34 -0
  4. package/bundled/ai/standards/anti-sycophancy-prompting.ai.yaml +111 -0
  5. package/bundled/ai/standards/capability-declaration.ai.yaml +113 -0
  6. package/bundled/ai/standards/circuit-breaker.ai.yaml +93 -0
  7. package/bundled/ai/standards/developer-memory.ai.yaml +13 -0
  8. package/bundled/ai/standards/dual-phase-output.ai.yaml +108 -0
  9. package/bundled/ai/standards/failure-source-taxonomy.ai.yaml +115 -0
  10. package/bundled/ai/standards/frontend-design-standards.ai.yaml +305 -0
  11. package/bundled/ai/standards/health-check-standards.ai.yaml +140 -0
  12. package/bundled/ai/standards/immutability-first.ai.yaml +112 -0
  13. package/bundled/ai/standards/model-selection.ai.yaml +111 -3
  14. package/bundled/ai/standards/packaging-standards.ai.yaml +142 -0
  15. package/bundled/ai/standards/recovery-recipe-registry.ai.yaml +200 -0
  16. package/bundled/ai/standards/retry-standards.ai.yaml +134 -0
  17. package/bundled/ai/standards/security-decision.ai.yaml +87 -0
  18. package/bundled/ai/standards/skill-standard-alignment-check.ai.yaml +119 -0
  19. package/bundled/ai/standards/standard-admission-criteria.ai.yaml +107 -0
  20. package/bundled/ai/standards/standard-lifecycle-management.ai.yaml +144 -0
  21. package/bundled/ai/standards/timeout-standards.ai.yaml +104 -0
  22. package/bundled/ai/standards/token-budget.ai.yaml +108 -0
  23. package/bundled/ai/standards/translation-lifecycle-standards.ai.yaml +145 -0
  24. package/bundled/core/anti-sycophancy-prompting.md +184 -0
  25. package/bundled/core/capability-declaration.md +59 -0
  26. package/bundled/core/circuit-breaker.md +58 -0
  27. package/bundled/core/developer-memory.md +29 -1
  28. package/bundled/core/dual-phase-output.md +56 -0
  29. package/bundled/core/failure-source-taxonomy.md +72 -0
  30. package/bundled/core/frontend-design-standards.md +474 -0
  31. package/bundled/core/health-check-standards.md +72 -0
  32. package/bundled/core/immutability-first.md +105 -0
  33. package/bundled/core/model-selection.md +80 -0
  34. package/bundled/core/packaging-standards.md +216 -0
  35. package/bundled/core/recovery-recipe-registry.md +69 -0
  36. package/bundled/core/retry-standards.md +62 -0
  37. package/bundled/core/security-decision.md +65 -0
  38. package/bundled/core/skill-standard-alignment-check.md +79 -0
  39. package/bundled/core/standard-admission-criteria.md +84 -0
  40. package/bundled/core/standard-lifecycle-management.md +94 -0
  41. package/bundled/core/timeout-standards.md +63 -0
  42. package/bundled/core/token-budget.md +58 -0
  43. package/bundled/core/translation-lifecycle-standards.md +162 -0
  44. package/bundled/locales/zh-CN/CHANGELOG.md +51 -3
  45. package/bundled/locales/zh-CN/README.md +1 -1
  46. package/bundled/locales/zh-CN/core/anti-hallucination.md +22 -3
  47. package/bundled/locales/zh-CN/core/anti-sycophancy-prompting.md +192 -0
  48. package/bundled/locales/zh-CN/core/capability-declaration.md +123 -0
  49. package/bundled/locales/zh-CN/core/circuit-breaker.md +106 -0
  50. package/bundled/locales/zh-CN/core/dual-phase-output.md +103 -0
  51. package/bundled/locales/zh-CN/core/failure-source-taxonomy.md +99 -0
  52. package/bundled/locales/zh-CN/core/frontend-design-standards.md +289 -0
  53. package/bundled/locales/zh-CN/core/health-check-standards.md +144 -0
  54. package/bundled/locales/zh-CN/core/immutability-first.md +96 -0
  55. package/bundled/locales/zh-CN/core/packaging-standards.md +224 -0
  56. package/bundled/locales/zh-CN/core/recovery-recipe-registry.md +146 -0
  57. package/bundled/locales/zh-CN/core/retry-standards.md +131 -0
  58. package/bundled/locales/zh-CN/core/security-decision.md +104 -0
  59. package/bundled/locales/zh-CN/core/skill-standard-alignment-check.md +112 -0
  60. package/bundled/locales/zh-CN/core/standard-admission-criteria.md +104 -0
  61. package/bundled/locales/zh-CN/core/standard-lifecycle-management.md +116 -0
  62. package/bundled/locales/zh-CN/core/timeout-standards.md +117 -0
  63. package/bundled/locales/zh-CN/core/token-budget.md +108 -0
  64. package/bundled/locales/zh-CN/core/translation-lifecycle-standards.md +159 -0
  65. package/bundled/locales/zh-TW/CHANGELOG.md +51 -3
  66. package/bundled/locales/zh-TW/README.md +1 -1
  67. package/bundled/locales/zh-TW/core/anti-sycophancy-prompting.md +192 -0
  68. package/bundled/locales/zh-TW/core/capability-declaration.md +111 -0
  69. package/bundled/locales/zh-TW/core/circuit-breaker.md +111 -0
  70. package/bundled/locales/zh-TW/core/dual-phase-output.md +132 -0
  71. package/bundled/locales/zh-TW/core/failure-source-taxonomy.md +146 -0
  72. package/bundled/locales/zh-TW/core/frontend-design-standards.md +460 -0
  73. package/bundled/locales/zh-TW/core/health-check-standards.md +144 -0
  74. package/bundled/locales/zh-TW/core/immutability-first.md +159 -0
  75. package/bundled/locales/zh-TW/core/packaging-standards.md +224 -0
  76. package/bundled/locales/zh-TW/core/recovery-recipe-registry.md +146 -0
  77. package/bundled/locales/zh-TW/core/retry-standards.md +140 -0
  78. package/bundled/locales/zh-TW/core/security-decision.md +120 -0
  79. package/bundled/locales/zh-TW/core/skill-standard-alignment-check.md +112 -0
  80. package/bundled/locales/zh-TW/core/standard-admission-criteria.md +104 -0
  81. package/bundled/locales/zh-TW/core/standard-lifecycle-management.md +116 -0
  82. package/bundled/locales/zh-TW/core/timeout-standards.md +117 -0
  83. package/bundled/locales/zh-TW/core/token-budget.md +143 -0
  84. package/bundled/locales/zh-TW/core/translation-lifecycle-standards.md +159 -0
  85. package/bundled/skills/e2e-assistant/SKILL.md +19 -5
  86. package/bundled/skills/testing-guide/SKILL.md +5 -0
  87. package/bundled/skills/testing-guide/test-skeleton-templates.md +316 -0
  88. package/package.json +2 -1
  89. package/src/commands/check.js +6 -0
  90. package/src/commands/config.js +9 -0
  91. package/src/commands/init.js +97 -46
  92. package/src/commands/mcp.js +26 -0
  93. package/src/commands/run-intent.js +66 -0
  94. package/src/commands/update.js +41 -4
  95. package/src/core/command-router.js +85 -0
  96. package/src/core/project-config.js +91 -0
  97. package/src/flows/init-flow.js +6 -1
  98. package/src/i18n/messages.js +6 -6
  99. package/src/mcp/__tests__/server.test.js +251 -0
  100. package/src/mcp/server.js +352 -0
  101. package/src/prompts/init.js +157 -1
  102. package/src/reconciler/actual-state-scanner.js +24 -0
  103. package/src/uninstallers/hook-uninstaller.js +32 -1
  104. package/src/utils/detect-self-adoption.js +173 -0
  105. package/src/utils/e2e-analyzer.js +88 -5
  106. package/src/utils/e2e-detector.js +73 -1
  107. package/src/utils/integration-generator.js +22 -3
  108. package/standards-registry.json +203 -4
@@ -0,0 +1,184 @@
1
+ # Anti-Sycophancy Prompting Standards
2
+
3
+ > **Language**: English | [繁體中文](../locales/zh-TW/core/anti-sycophancy-prompting.md)
4
+
5
+ **Version**: 1.0.0
6
+ **Last Updated**: 2026-04-15
7
+ **Applicability**: All AI agent implementations and LLM prompt design
8
+ **Scope**: universal
9
+ **Industry Standards**: None (UDS original, informed by RLHF sycophancy research)
10
+
11
+ ---
12
+
13
+ ## Purpose
14
+
15
+ This standard defines techniques and rules for designing prompts that elicit genuine, critical responses from LLMs rather than sycophantic agreement with the user's implied preferences.
16
+
17
+ Sycophancy in LLMs originates from RLHF training objectives where human raters prefer agreeable responses, causing models to optimize for user satisfaction over accuracy.
18
+
19
+ ---
20
+
21
+ ## Core Techniques
22
+
23
+ ### 1. Socratic Critique Framework (REQ-1)
24
+
25
+ Reframe the task from "evaluate my idea" to "attack my idea" to eliminate the incentive for sycophancy.
26
+
27
+ | DO | DO NOT |
28
+ |----|--------|
29
+ | ✅ Ask for the 3 most fatal objections to the idea | ❌ Ask "is this a good idea?" |
30
+ | ✅ Require each objection to be technically grounded | ❌ Allow vague positive framing |
31
+ | ✅ Prohibit positive opening phrases | ❌ Accept "Great idea, but..." patterns |
32
+
33
+ **Prompt Template**:
34
+ ```
35
+ Do not evaluate whether this is good or bad.
36
+ List the 3 most fatal objections to: [idea]
37
+ Each objection must be technically grounded and non-trivial to dismiss.
38
+ ```
39
+
40
+ ---
41
+
42
+ ### 2. Anchor Prevention Protocol (REQ-2)
43
+
44
+ Obtain the LLM's independent judgment before revealing the user's position, preventing anchoring bias.
45
+
46
+ | Step | Action |
47
+ |------|--------|
48
+ | 1 | Ask for neutral comparison without revealing preference |
49
+ | 2 | Receive independent judgment |
50
+ | 3 | Reveal user's position |
51
+ | 4 | Require explicit technical justification if model changes stance |
52
+
53
+ **Workflow**:
54
+ ```
55
+ Round 1: "Compare [A] vs [B] for [context]. Which is better?"
56
+ → Wait for independent judgment
57
+
58
+ Round 2: "I prefer [A]. Does this change your assessment? Why?"
59
+ → Model must justify any position change with technical facts
60
+ ```
61
+
62
+ ---
63
+
64
+ ### 3. Symmetric Dual-Column Output (REQ-3)
65
+
66
+ Use format constraints to force balanced presentation of opposing viewpoints.
67
+
68
+ **Required Format**:
69
+ ```
70
+ | Arguments FOR the decision | Arguments AGAINST the decision |
71
+ |---------------------------|-------------------------------|
72
+ | [Equal weight content] | [Equal weight content] |
73
+
74
+ Net Recommendation: [Must take a clear stance, may recommend against]
75
+ ```
76
+
77
+ **Rules**:
78
+ - Both columns must have similar length (< 20% difference)
79
+ - Net recommendation must be explicit and may be negative
80
+ - Model cannot escape the format by padding one side
81
+
82
+ ---
83
+
84
+ ### 4. Confidence and Uncertainty Labeling (REQ-4)
85
+
86
+ Require confidence scores on all recommendations to surface uncertainty.
87
+
88
+ **Format**:
89
+ ```
90
+ Recommendation: [specific action]
91
+ Confidence: [1-5] — [reason for uncertainty]
92
+ Unknown: [what information would change this assessment]
93
+ ```
94
+
95
+ **Confidence Scale**:
96
+
97
+ | Level | Meaning |
98
+ |-------|---------|
99
+ | 5 | Validated at similar scale, high certainty |
100
+ | 4 | Industry standard with sufficient documentation |
101
+ | 3 | Reasonable inference, PoC recommended |
102
+ | 2 | Uncertain, Spike strongly recommended |
103
+ | 1 | Highly uncertain, not recommended for direct adoption |
104
+
105
+ **Rules**:
106
+ - Confidence < 3 must include "More information needed before confirming"
107
+ - All major claims require confidence labeling
108
+ - Uncertainty must be actionable (specify what information resolves it)
109
+
110
+ ---
111
+
112
+ ### 5. Sycophancy Detection Heuristics (REQ-5)
113
+
114
+ Heuristics for identifying sycophantic responses, usable in automated post-processing.
115
+
116
+ | Signal Type | Detection Rule |
117
+ |-------------|---------------|
118
+ | Positive opener | Response starts with agreeable phrase within first 50 tokens (e.g., "great", "interesting", "certainly", "of course") |
119
+ | Position flip | Model reverses stance after user reveals preference without new technical evidence |
120
+ | Risk minimization | Pattern: "While there are some minor issues, overall..." without specifying the issues |
121
+ | Missing quantification | Major recommendation lacks confidence score or specific metrics |
122
+
123
+ **Trigger**: If 2+ signals detected → invoke re-evaluation with explicit Red Team framing.
124
+
125
+ ---
126
+
127
+ ## Prohibited Behaviors
128
+
129
+ | Prohibited | Correct Action |
130
+ |-----------|----------------|
131
+ | Opening critique with positive affirmation | Start directly with the analysis |
132
+ | Reversing stance without new technical evidence | Maintain position or cite specific new information |
133
+ | Describing risks as "minor" without evidence | Quantify risk or explain why it is bounded |
134
+ | Providing major recommendations without confidence | Always include confidence (1-5) and uncertainty statement |
135
+
136
+ ---
137
+
138
+ ## Integration with Agent Prompts
139
+
140
+ When applying to AI agents:
141
+
142
+ | Agent Type | Apply Rules |
143
+ |------------|-------------|
144
+ | Code Review Agent | REQ-1 (Socratic) + REQ-3 (Dual-column) + REQ-5 (Detection) |
145
+ | Architecture Advisor Agent | REQ-2 (Anchor Prevention) + REQ-4 (Confidence) + REQ-5 (Detection) |
146
+ | Bug Analysis Agent | REQ-1 (Socratic) + REQ-4 (Confidence) |
147
+ | General Consultation Agent | REQ-3 (Dual-column) + REQ-4 (Confidence) |
148
+
149
+ ---
150
+
151
+ ## Complete Anti-Sycophancy Prompt Template
152
+
153
+ ```
154
+ You are a domain expert with no emotional investment in my satisfaction.
155
+ Your role is to identify flaws in my thinking, not to make me feel good.
156
+
157
+ Rules:
158
+ - Do NOT open with positive phrases (good, interesting, nice, certainly)
159
+ - Every recommendation must include a confidence level (1-5) and what you are uncertain about
160
+ - If my direction is wrong, say so directly
161
+
162
+ My question: [question]
163
+
164
+ First, list the incorrect assumptions I may be holding about this problem.
165
+ Then give your honest recommendation.
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Checklist
171
+
172
+ - [ ] Prompt does not invite agreement ("is this good?")
173
+ - [ ] Positive opening phrases explicitly prohibited
174
+ - [ ] Model's independent stance obtained before revealing user preference (if applicable)
175
+ - [ ] Dual-column format enforced for evaluation tasks
176
+ - [ ] Confidence levels required on major recommendations
177
+ - [ ] Sycophancy detection applied to output before presenting to user
178
+
179
+ ---
180
+
181
+ ## Related Standards
182
+
183
+ - [anti-hallucination.md](anti-hallucination.md) — Prevents fabrication; complements anti-sycophancy
184
+ - [agent-epistemic-calibration.md](agent-epistemic-calibration.md) — Epistemic humility in agent design (where applicable)
@@ -0,0 +1,59 @@
1
+ # Capability Declaration Standard
2
+
3
+ > **Source**: XSPEC-037 | **Borrowed from**: claude-code-book Ch.3
4
+
5
+ ## Overview
6
+
7
+ The Capability Declaration Standard mandates that all tools, adapters, and agents explicitly declare their safety properties. **All properties default to the most conservative (Fail-Closed) values** — a developer who forgets to declare capabilities gets safe behavior, not dangerous behavior.
8
+
9
+ Borrowed from claude-code-book's `buildTool` factory design, where `isConcurrencySafe()` and `isReadOnly()` default to `false`, requiring explicit opt-in for performance optimizations.
10
+
11
+ ## Fail-Closed Defaults
12
+
13
+ ```typescript
14
+ const FAIL_CLOSED_DEFAULTS: CapabilityDeclaration = {
15
+ isConcurrencySafe: false, // Cannot run in parallel
16
+ isReadOnly: false, // Assumed to have side effects
17
+ requiresUserConfirmation: true, // Must confirm before execution
18
+ trustLevel: "untrusted", // Maximum sandbox restrictions
19
+ };
20
+ ```
21
+
22
+ ## CapabilityDeclaration Interface
23
+
24
+ | Field | Type | Default | Description |
25
+ |-------|------|---------|-------------|
26
+ | `isConcurrencySafe` | boolean | **false** | Safe to run in parallel with other operations |
27
+ | `isReadOnly` | boolean | **false** | Makes no persistent state changes |
28
+ | `requiresUserConfirmation` | boolean | **true** | Requires explicit user approval before execution |
29
+ | `trustLevel` | enum | **untrusted** | Sandbox isolation level |
30
+
31
+ ## Trust Levels
32
+
33
+ | Level | Description | Sandbox |
34
+ |-------|-------------|---------|
35
+ | `trusted` | Built-in or audited plugin | No restrictions |
36
+ | `sandboxed` | Third-party tool | Restricted execution environment |
37
+ | `untrusted` | Unknown source | Maximum restrictions (default) |
38
+
39
+ ## Well-Known Declarations
40
+
41
+ | Tool | isConcurrencySafe | isReadOnly | requiresConfirmation | trustLevel |
42
+ |------|-------------------|------------|---------------------|------------|
43
+ | GrepTool | ✅ true | ✅ true | ❌ false | trusted |
44
+ | GlobTool | ✅ true | ✅ true | ❌ false | trusted |
45
+ | FileReadTool | ✅ true | ✅ true | ❌ false | trusted |
46
+ | FileEditTool | ❌ false | ❌ false | ✅ true | trusted |
47
+ | BashTool | ❌ false | ❌ false | ✅ true | sandboxed |
48
+
49
+ ## Enforcement
50
+
51
+ - **Missing declaration**: Use `FAIL_CLOSED_DEFAULTS` + log `[WARN] Capability not declared for: {name}`
52
+ - **False claim detection**: If declared `isReadOnly: true` but performs writes → log `CAPABILITY_MISMATCH` event, revert to Fail-Closed
53
+ - **Concurrency**: Only components with `isConcurrencySafe: true` may be batched into parallel execution
54
+
55
+ ## References
56
+
57
+ - AI-optimized: [ai/standards/capability-declaration.ai.yaml](../ai/standards/capability-declaration.ai.yaml)
58
+ - XSPEC-037: Cross-project specification
59
+ - Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.3 `buildTool` Fail-Closed factory
@@ -0,0 +1,58 @@
1
+ # Circuit Breaker Standard
2
+
3
+ > **Source**: XSPEC-036 | **Borrowed from**: claude-code-book Ch.2
4
+
5
+ ## Overview
6
+
7
+ The Circuit Breaker pattern protects Agent systems from API stampedes caused by repeated failures. After `failureThreshold` consecutive failures, the breaker opens and immediately rejects all requests — no waiting for timeout. After a cooldown period, it allows one probe call to test recovery.
8
+
9
+ Real-world data: Before introducing circuit breakers, claude-code-book measured ~250K wasted API calls per day across 1,279 sessions with >50 consecutive failures each (max: 3,272 consecutive failures).
10
+
11
+ ## States
12
+
13
+ ```
14
+ CLOSED ──(N consecutive failures)──→ OPEN
15
+ OPEN ──(cooldownMs elapsed)──→ HALF_OPEN
16
+ HALF_OPEN ──(probe success)──→ CLOSED
17
+ HALF_OPEN ──(probe failure)──→ OPEN
18
+ ```
19
+
20
+ | State | Behavior |
21
+ |-------|----------|
22
+ | **CLOSED** | Normal operation, requests forwarded |
23
+ | **OPEN** | All requests rejected immediately with `CircuitOpenError` |
24
+ | **HALF_OPEN** | One probe request allowed; success → CLOSED, failure → OPEN |
25
+
26
+ ## Configuration
27
+
28
+ | Parameter | Default | Description |
29
+ |-----------|---------|-------------|
30
+ | `failureThreshold` | 3 | Consecutive failures before opening |
31
+ | `cooldownMs` | 30000 | OPEN → HALF_OPEN wait time (ms) |
32
+ | `successThreshold` | 1 | Probe successes needed to close |
33
+
34
+ ## Interface
35
+
36
+ ```typescript
37
+ interface CircuitBreaker {
38
+ readonly name: string;
39
+ readonly state: "CLOSED" | "HALF_OPEN" | "OPEN";
40
+ execute<T>(fn: () => Promise<T>): Promise<T>; // throws CircuitOpenError when OPEN
41
+ getState(): CircuitBreakerState;
42
+ reset(): void; // admin manual reset
43
+ }
44
+ ```
45
+
46
+ ## Applicable Scenarios
47
+
48
+ - DevAP Fix Loop retries
49
+ - DevAP LLM API call protection
50
+ - VibeOps Feedback Loop retries
51
+ - VibeOps FLARE retrieval retries
52
+ - Any component using retry with external dependencies
53
+
54
+ ## References
55
+
56
+ - AI-optimized: [ai/standards/circuit-breaker.ai.yaml](../ai/standards/circuit-breaker.ai.yaml)
57
+ - XSPEC-036: Cross-project specification
58
+ - Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.2 `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES`
@@ -329,7 +329,35 @@ Multiple pitfalls → Pattern → Mental Model
329
329
 
330
330
  ---
331
331
 
332
- ## 5. Noise Control
332
+ ## 5. Memory Verification Principle(記憶是線索,非結論)
333
+
334
+ > 借鑑 lintsinghua/claude-code-book 記憶驗證原則。
335
+
336
+ 記憶提供方向,但**不能直接作為事實使用**。使用記憶前須獨立驗證:
337
+
338
+ | 記憶內容 | 驗證方法 | 衝突時處理 |
339
+ |---------|---------|-----------|
340
+ | 檔案路徑 | 確認檔案仍存在(Glob/Read) | 標記記憶為 `needs-revision` |
341
+ | 函式名稱/API flag | 確認仍存在(Grep/文件) | 標記記憶為 `needs-revision` |
342
+ | 架構快照/Repo 狀態 | 優先信任 `git log`/原始碼 | 更新記憶為當前狀態 |
343
+ | 套件版本/相依 | 確認 package.json/lockfile | 以實際版本為準 |
344
+
345
+ ### 禁止行為
346
+
347
+ - 直接引用記憶中的具體 API/路徑/函式名稱推薦給使用者,未先驗證
348
+ - 宣稱「根據記憶,X 存在」而未執行獨立確認
349
+ - 因記憶內容與現況衝突時,選擇信任記憶而非當前觀察
350
+
351
+ ### 記憶用途場景
352
+
353
+ 記憶適合提供:
354
+ - **搜尋方向**:「這類問題上次在 X 模組找到答案」
355
+ - **模式線索**:「這個錯誤模式對應已知 pitfall MEM-2026-0042」
356
+ - **決策背景**:「此設計決策的歷史背景是...」
357
+
358
+ ---
359
+
360
+ ## 6. Noise Control
333
361
 
334
362
  ### Push Levels
335
363
 
@@ -0,0 +1,56 @@
1
+ # Dual-Phase LLM Output Standard
2
+
3
+ > **Source**: XSPEC-035 | **Borrowed from**: claude-code-book Ch.7 AutoCompact
4
+
5
+ ## Overview
6
+
7
+ The Dual-Phase LLM Output pattern requires LLM review agents to produce two XML blocks in a single response: an `<analysis>` thinking scratchpad (discarded after processing) and a `<summary>` structured conclusion (retained). This lets the model reason thoroughly while preventing thinking processes from accumulating in the conversation context.
8
+
9
+ ## Problem
10
+
11
+ Review agents (Judge, Evaluator, Guardian) typically generate 2000–5000 token responses, with 50–70% being reasoning that accumulates in conversation history. In repeated review scenarios (Fix Loop 3× retries), this wastes 3000–10500 tokens per task.
12
+
13
+ ## Format
14
+
15
+ ```xml
16
+ <analysis>
17
+ [Reasoning scratchpad — DISCARDED after processing]
18
+ - Step-by-step evaluation
19
+ - Edge case considerations
20
+ - Alternative comparisons
21
+ </analysis>
22
+
23
+ <summary>
24
+ decision: approved | rejected | needs_revision
25
+ confidence: high | medium | low
26
+ findings:
27
+ - [type] description
28
+ next_action: [recommended follow-up action]
29
+ </summary>
30
+ ```
31
+
32
+ ## Post-Processing Rules
33
+
34
+ 1. Extract `<summary>` content → persist to context
35
+ 2. Discard `<analysis>` content → never write to conversation history
36
+ 3. If `<summary>` tag missing → fallback: treat full response as summary, log `[WARN] dual-phase format missing`
37
+
38
+ ## Extension Fields
39
+
40
+ Applications may add fields inside `<summary>` but must not remove core fields:
41
+ - **Security (Guardian)**: `severity: critical | high | medium | low`, `cwe_ids: [CWE-NNN]`
42
+ - **Quality (Evaluator)**: `test_coverage: number`, `tech_debt_score: number`
43
+
44
+ ## Token Impact
45
+
46
+ | Scenario | Savings |
47
+ |----------|---------|
48
+ | Single review | 1000–3500 tokens |
49
+ | Fix Loop (3× retries) | 3000–10500 tokens |
50
+ | VibeOps pipeline (evaluator + guardian) | 2000–7000 tokens per run |
51
+
52
+ ## References
53
+
54
+ - AI-optimized: [ai/standards/dual-phase-output.ai.yaml](../ai/standards/dual-phase-output.ai.yaml)
55
+ - XSPEC-035: Cross-project specification
56
+ - Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.7 `formatCompactSummary`
@@ -0,0 +1,72 @@
1
+ # Failure Source Taxonomy Standard
2
+
3
+ > **Source**: XSPEC-045 | **Borrowed from**: ultraworkers/claw-code ROADMAP Phase 2 Failure Taxonomy
4
+
5
+ ## Overview
6
+
7
+ The Failure Source Taxonomy adds a `failureSource` (why) dimension on top of the existing `TaskStatus` (what). Structured failure sources allow the downstream recovery mechanism (Recovery Recipe Registry, XSPEC-046) to precisely match strategies, avoiding the application of the same retry logic to fundamentally different failure types.
8
+
9
+ ## 8 Failure Sources
10
+
11
+ | Source | Description | Recommended Recovery |
12
+ |--------|-------------|---------------------|
13
+ | `prompt_delivery` | Prompt not delivered to LLM (API 4xx, empty response, parse error) | retry or model_switch |
14
+ | `model_degradation` | LLM quality degrades (repetitive output, irrelevant response) | model_switch |
15
+ | `branch_divergence` | Working branch falls behind base branch | rebase_and_retry |
16
+ | `compilation` | Compile or type-check errors (tsc, cargo, go build) | fix_loop |
17
+ | `test_failure` | Test failures (unit / integration / system / e2e) | fix_loop |
18
+ | `tool_failure` | Tool layer failure (MCP server unresponsive, plugin load failure) | circuit_breaker then retry |
19
+ | `policy_violation` | Safety/governance policy block (Guardian deny, SafetyHook) | human_checkpoint |
20
+ | `resource_exhaustion` | Resource exhausted (token budget exceeded, timeout, USD budget) | degraded_mode or human_checkpoint |
21
+
22
+ ## Priority Rules
23
+
24
+ When multiple failure sources coexist, apply:
25
+
26
+ 1. `branch_divergence` > `compilation` — divergence is usually the root cause of compilation failures
27
+ 2. `policy_violation` > others — security takes precedence, do not attempt bypass
28
+ 3. `resource_exhaustion` > others — retrying when resources are exhausted is meaningless
29
+ 4. Otherwise: use the first detected source
30
+
31
+ ## Types
32
+
33
+ ```typescript
34
+ type FailureSource =
35
+ | "prompt_delivery"
36
+ | "model_degradation"
37
+ | "branch_divergence"
38
+ | "compilation"
39
+ | "test_failure"
40
+ | "tool_failure"
41
+ | "policy_violation"
42
+ | "resource_exhaustion";
43
+
44
+ interface FailureDetail {
45
+ source: FailureSource;
46
+ raw_error: string;
47
+ detected_by: string; // quality-gate / claude-adapter / safety-hook / branch-drift
48
+ timestamp: string; // ISO 8601
49
+ }
50
+ ```
51
+
52
+ ## Guidelines
53
+
54
+ - All failure results should carry `failureSource` to enable precise recovery strategy matching
55
+ - `failureSource` is an **optional** field — must not break existing code without this field
56
+ - Select the most fundamental source as `failureSource` in a single failure event
57
+ - `failureSource` should be set by the component that detects the failure
58
+ - DevAP and VibeOps each define `FailureSource` type independently (AGPL isolation)
59
+
60
+ ## Applicable Scenarios
61
+
62
+ - DevAP QualityGate failure result enrichment
63
+ - VibeOps PipelineRunner `agent:error` event payload
64
+ - Recovery Recipe Registry (XSPEC-046) match key
65
+ - Telemetry failure analytics dimension
66
+
67
+ ## References
68
+
69
+ - AI-optimized: [ai/standards/failure-source-taxonomy.ai.yaml](../ai/standards/failure-source-taxonomy.ai.yaml)
70
+ - XSPEC-045: Cross-project specification
71
+ - Depends on: Recovery Recipe Registry (XSPEC-046)
72
+ - Borrowed from: [ultraworkers/claw-code](https://github.com/ultraworkers/claw-code) ROADMAP Phase 2 Failure Taxonomy