universal-dev-standards 5.1.0-beta.6 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/bin/uds.js +14 -0
- package/bundled/ai/standards/agent-communication-protocol.ai.yaml +34 -0
- package/bundled/ai/standards/anti-sycophancy-prompting.ai.yaml +111 -0
- package/bundled/ai/standards/capability-declaration.ai.yaml +113 -0
- package/bundled/ai/standards/circuit-breaker.ai.yaml +93 -0
- package/bundled/ai/standards/developer-memory.ai.yaml +13 -0
- package/bundled/ai/standards/dual-phase-output.ai.yaml +108 -0
- package/bundled/ai/standards/failure-source-taxonomy.ai.yaml +115 -0
- package/bundled/ai/standards/frontend-design-standards.ai.yaml +305 -0
- package/bundled/ai/standards/health-check-standards.ai.yaml +140 -0
- package/bundled/ai/standards/immutability-first.ai.yaml +112 -0
- package/bundled/ai/standards/model-selection.ai.yaml +111 -3
- package/bundled/ai/standards/packaging-standards.ai.yaml +142 -0
- package/bundled/ai/standards/recovery-recipe-registry.ai.yaml +200 -0
- package/bundled/ai/standards/retry-standards.ai.yaml +134 -0
- package/bundled/ai/standards/security-decision.ai.yaml +87 -0
- package/bundled/ai/standards/skill-standard-alignment-check.ai.yaml +119 -0
- package/bundled/ai/standards/standard-admission-criteria.ai.yaml +107 -0
- package/bundled/ai/standards/standard-lifecycle-management.ai.yaml +144 -0
- package/bundled/ai/standards/timeout-standards.ai.yaml +104 -0
- package/bundled/ai/standards/token-budget.ai.yaml +108 -0
- package/bundled/ai/standards/translation-lifecycle-standards.ai.yaml +145 -0
- package/bundled/core/anti-sycophancy-prompting.md +184 -0
- package/bundled/core/capability-declaration.md +59 -0
- package/bundled/core/circuit-breaker.md +58 -0
- package/bundled/core/developer-memory.md +29 -1
- package/bundled/core/dual-phase-output.md +56 -0
- package/bundled/core/failure-source-taxonomy.md +72 -0
- package/bundled/core/frontend-design-standards.md +474 -0
- package/bundled/core/health-check-standards.md +72 -0
- package/bundled/core/immutability-first.md +105 -0
- package/bundled/core/model-selection.md +80 -0
- package/bundled/core/packaging-standards.md +216 -0
- package/bundled/core/recovery-recipe-registry.md +69 -0
- package/bundled/core/retry-standards.md +62 -0
- package/bundled/core/security-decision.md +65 -0
- package/bundled/core/skill-standard-alignment-check.md +79 -0
- package/bundled/core/standard-admission-criteria.md +84 -0
- package/bundled/core/standard-lifecycle-management.md +94 -0
- package/bundled/core/timeout-standards.md +63 -0
- package/bundled/core/token-budget.md +58 -0
- package/bundled/core/translation-lifecycle-standards.md +162 -0
- package/bundled/locales/zh-CN/CHANGELOG.md +51 -3
- package/bundled/locales/zh-CN/README.md +1 -1
- package/bundled/locales/zh-CN/core/anti-hallucination.md +22 -3
- package/bundled/locales/zh-CN/core/anti-sycophancy-prompting.md +192 -0
- package/bundled/locales/zh-CN/core/capability-declaration.md +123 -0
- package/bundled/locales/zh-CN/core/circuit-breaker.md +106 -0
- package/bundled/locales/zh-CN/core/dual-phase-output.md +103 -0
- package/bundled/locales/zh-CN/core/failure-source-taxonomy.md +99 -0
- package/bundled/locales/zh-CN/core/frontend-design-standards.md +289 -0
- package/bundled/locales/zh-CN/core/health-check-standards.md +144 -0
- package/bundled/locales/zh-CN/core/immutability-first.md +96 -0
- package/bundled/locales/zh-CN/core/packaging-standards.md +224 -0
- package/bundled/locales/zh-CN/core/recovery-recipe-registry.md +146 -0
- package/bundled/locales/zh-CN/core/retry-standards.md +131 -0
- package/bundled/locales/zh-CN/core/security-decision.md +104 -0
- package/bundled/locales/zh-CN/core/skill-standard-alignment-check.md +112 -0
- package/bundled/locales/zh-CN/core/standard-admission-criteria.md +104 -0
- package/bundled/locales/zh-CN/core/standard-lifecycle-management.md +116 -0
- package/bundled/locales/zh-CN/core/timeout-standards.md +117 -0
- package/bundled/locales/zh-CN/core/token-budget.md +108 -0
- package/bundled/locales/zh-CN/core/translation-lifecycle-standards.md +159 -0
- package/bundled/locales/zh-TW/CHANGELOG.md +51 -3
- package/bundled/locales/zh-TW/README.md +1 -1
- package/bundled/locales/zh-TW/core/anti-sycophancy-prompting.md +192 -0
- package/bundled/locales/zh-TW/core/capability-declaration.md +111 -0
- package/bundled/locales/zh-TW/core/circuit-breaker.md +111 -0
- package/bundled/locales/zh-TW/core/dual-phase-output.md +132 -0
- package/bundled/locales/zh-TW/core/failure-source-taxonomy.md +146 -0
- package/bundled/locales/zh-TW/core/frontend-design-standards.md +460 -0
- package/bundled/locales/zh-TW/core/health-check-standards.md +144 -0
- package/bundled/locales/zh-TW/core/immutability-first.md +159 -0
- package/bundled/locales/zh-TW/core/packaging-standards.md +224 -0
- package/bundled/locales/zh-TW/core/recovery-recipe-registry.md +146 -0
- package/bundled/locales/zh-TW/core/retry-standards.md +140 -0
- package/bundled/locales/zh-TW/core/security-decision.md +120 -0
- package/bundled/locales/zh-TW/core/skill-standard-alignment-check.md +112 -0
- package/bundled/locales/zh-TW/core/standard-admission-criteria.md +104 -0
- package/bundled/locales/zh-TW/core/standard-lifecycle-management.md +116 -0
- package/bundled/locales/zh-TW/core/timeout-standards.md +117 -0
- package/bundled/locales/zh-TW/core/token-budget.md +143 -0
- package/bundled/locales/zh-TW/core/translation-lifecycle-standards.md +159 -0
- package/bundled/skills/e2e-assistant/SKILL.md +19 -5
- package/bundled/skills/testing-guide/SKILL.md +5 -0
- package/bundled/skills/testing-guide/test-skeleton-templates.md +316 -0
- package/package.json +2 -1
- package/src/commands/check.js +6 -0
- package/src/commands/config.js +9 -0
- package/src/commands/init.js +97 -46
- package/src/commands/mcp.js +26 -0
- package/src/commands/run-intent.js +66 -0
- package/src/commands/update.js +41 -4
- package/src/core/command-router.js +85 -0
- package/src/core/project-config.js +91 -0
- package/src/flows/init-flow.js +6 -1
- package/src/i18n/messages.js +6 -6
- package/src/mcp/__tests__/server.test.js +251 -0
- package/src/mcp/server.js +352 -0
- package/src/prompts/init.js +157 -1
- package/src/reconciler/actual-state-scanner.js +24 -0
- package/src/uninstallers/hook-uninstaller.js +32 -1
- package/src/utils/detect-self-adoption.js +173 -0
- package/src/utils/e2e-analyzer.js +88 -5
- package/src/utils/e2e-detector.js +73 -1
- package/src/utils/integration-generator.js +22 -3
- package/standards-registry.json +203 -4
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# Anti-Sycophancy Prompting Standards
|
|
2
|
+
|
|
3
|
+
> **Language**: English | [繁體中文](../locales/zh-TW/core/anti-sycophancy-prompting.md)
|
|
4
|
+
|
|
5
|
+
**Version**: 1.0.0
|
|
6
|
+
**Last Updated**: 2026-04-15
|
|
7
|
+
**Applicability**: All AI agent implementations and LLM prompt design
|
|
8
|
+
**Scope**: universal
|
|
9
|
+
**Industry Standards**: None (UDS original, informed by RLHF sycophancy research)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Purpose
|
|
14
|
+
|
|
15
|
+
This standard defines techniques and rules for designing prompts that elicit genuine, critical responses from LLMs rather than sycophantic agreement with the user's implied preferences.
|
|
16
|
+
|
|
17
|
+
Sycophancy in LLMs originates from RLHF training objectives where human raters prefer agreeable responses, causing models to optimize for user satisfaction over accuracy.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Core Techniques
|
|
22
|
+
|
|
23
|
+
### 1. Socratic Critique Framework (REQ-1)
|
|
24
|
+
|
|
25
|
+
Reframe the task from "evaluate my idea" to "attack my idea" to eliminate the incentive for sycophancy.
|
|
26
|
+
|
|
27
|
+
| DO | DO NOT |
|
|
28
|
+
|----|--------|
|
|
29
|
+
| ✅ Ask for the 3 most fatal objections to the idea | ❌ Ask "is this a good idea?" |
|
|
30
|
+
| ✅ Require each objection to be technically grounded | ❌ Allow vague positive framing |
|
|
31
|
+
| ✅ Prohibit positive opening phrases | ❌ Accept "Great idea, but..." patterns |
|
|
32
|
+
|
|
33
|
+
**Prompt Template**:
|
|
34
|
+
```
|
|
35
|
+
Do not evaluate whether this is good or bad.
|
|
36
|
+
List the 3 most fatal objections to: [idea]
|
|
37
|
+
Each objection must be technically grounded and non-trivial to dismiss.
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
### 2. Anchor Prevention Protocol (REQ-2)
|
|
43
|
+
|
|
44
|
+
Obtain the LLM's independent judgment before revealing the user's position, preventing anchoring bias.
|
|
45
|
+
|
|
46
|
+
| Step | Action |
|
|
47
|
+
|------|--------|
|
|
48
|
+
| 1 | Ask for neutral comparison without revealing preference |
|
|
49
|
+
| 2 | Receive independent judgment |
|
|
50
|
+
| 3 | Reveal user's position |
|
|
51
|
+
| 4 | Require explicit technical justification if model changes stance |
|
|
52
|
+
|
|
53
|
+
**Workflow**:
|
|
54
|
+
```
|
|
55
|
+
Round 1: "Compare [A] vs [B] for [context]. Which is better?"
|
|
56
|
+
→ Wait for independent judgment
|
|
57
|
+
|
|
58
|
+
Round 2: "I prefer [A]. Does this change your assessment? Why?"
|
|
59
|
+
→ Model must justify any position change with technical facts
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
### 3. Symmetric Dual-Column Output (REQ-3)
|
|
65
|
+
|
|
66
|
+
Use format constraints to force balanced presentation of opposing viewpoints.
|
|
67
|
+
|
|
68
|
+
**Required Format**:
|
|
69
|
+
```
|
|
70
|
+
| Arguments FOR the decision | Arguments AGAINST the decision |
|
|
71
|
+
|---------------------------|-------------------------------|
|
|
72
|
+
| [Equal weight content] | [Equal weight content] |
|
|
73
|
+
|
|
74
|
+
Net Recommendation: [Must take a clear stance, may recommend against]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Rules**:
|
|
78
|
+
- Both columns must have similar length (< 20% difference)
|
|
79
|
+
- Net recommendation must be explicit and may be negative
|
|
80
|
+
- Model cannot escape the format by padding one side
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
### 4. Confidence and Uncertainty Labeling (REQ-4)
|
|
85
|
+
|
|
86
|
+
Require confidence scores on all recommendations to surface uncertainty.
|
|
87
|
+
|
|
88
|
+
**Format**:
|
|
89
|
+
```
|
|
90
|
+
Recommendation: [specific action]
|
|
91
|
+
Confidence: [1-5] — [reason for uncertainty]
|
|
92
|
+
Unknown: [what information would change this assessment]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Confidence Scale**:
|
|
96
|
+
|
|
97
|
+
| Level | Meaning |
|
|
98
|
+
|-------|---------|
|
|
99
|
+
| 5 | Validated at similar scale, high certainty |
|
|
100
|
+
| 4 | Industry standard with sufficient documentation |
|
|
101
|
+
| 3 | Reasonable inference, PoC recommended |
|
|
102
|
+
| 2 | Uncertain, Spike strongly recommended |
|
|
103
|
+
| 1 | Highly uncertain, not recommended for direct adoption |
|
|
104
|
+
|
|
105
|
+
**Rules**:
|
|
106
|
+
- Confidence < 3 must include "More information needed before confirming"
|
|
107
|
+
- All major claims require confidence labeling
|
|
108
|
+
- Uncertainty must be actionable (specify what information resolves it)
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
### 5. Sycophancy Detection Heuristics (REQ-5)
|
|
113
|
+
|
|
114
|
+
Heuristics for identifying sycophantic responses, usable in automated post-processing.
|
|
115
|
+
|
|
116
|
+
| Signal Type | Detection Rule |
|
|
117
|
+
|-------------|---------------|
|
|
118
|
+
| Positive opener | Response starts with agreeable phrase within first 50 tokens (e.g., "great", "interesting", "certainly", "of course") |
|
|
119
|
+
| Position flip | Model reverses stance after user reveals preference without new technical evidence |
|
|
120
|
+
| Risk minimization | Pattern: "While there are some minor issues, overall..." without specifying the issues |
|
|
121
|
+
| Missing quantification | Major recommendation lacks confidence score or specific metrics |
|
|
122
|
+
|
|
123
|
+
**Trigger**: If 2+ signals detected → invoke re-evaluation with explicit Red Team framing.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Prohibited Behaviors
|
|
128
|
+
|
|
129
|
+
| Prohibited | Correct Action |
|
|
130
|
+
|-----------|----------------|
|
|
131
|
+
| Opening critique with positive affirmation | Start directly with the analysis |
|
|
132
|
+
| Reversing stance without new technical evidence | Maintain position or cite specific new information |
|
|
133
|
+
| Describing risks as "minor" without evidence | Quantify risk or explain why it is bounded |
|
|
134
|
+
| Providing major recommendations without confidence | Always include confidence (1-5) and uncertainty statement |
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Integration with Agent Prompts
|
|
139
|
+
|
|
140
|
+
When applying to AI agents:
|
|
141
|
+
|
|
142
|
+
| Agent Type | Apply Rules |
|
|
143
|
+
|------------|-------------|
|
|
144
|
+
| Code Review Agent | REQ-1 (Socratic) + REQ-3 (Dual-column) + REQ-5 (Detection) |
|
|
145
|
+
| Architecture Advisor Agent | REQ-2 (Anchor Prevention) + REQ-4 (Confidence) + REQ-5 (Detection) |
|
|
146
|
+
| Bug Analysis Agent | REQ-1 (Socratic) + REQ-4 (Confidence) |
|
|
147
|
+
| General Consultation Agent | REQ-3 (Dual-column) + REQ-4 (Confidence) |
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Complete Anti-Sycophancy Prompt Template
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
You are a domain expert with no emotional investment in my satisfaction.
|
|
155
|
+
Your role is to identify flaws in my thinking, not to make me feel good.
|
|
156
|
+
|
|
157
|
+
Rules:
|
|
158
|
+
- Do NOT open with positive phrases (good, interesting, nice, certainly)
|
|
159
|
+
- Every recommendation must include a confidence level (1-5) and what you are uncertain about
|
|
160
|
+
- If my direction is wrong, say so directly
|
|
161
|
+
|
|
162
|
+
My question: [question]
|
|
163
|
+
|
|
164
|
+
First, list the incorrect assumptions I may be holding about this problem.
|
|
165
|
+
Then give your honest recommendation.
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Checklist
|
|
171
|
+
|
|
172
|
+
- [ ] Prompt does not invite agreement ("is this good?")
|
|
173
|
+
- [ ] Positive opening phrases explicitly prohibited
|
|
174
|
+
- [ ] Model's independent stance obtained before revealing user preference (if applicable)
|
|
175
|
+
- [ ] Dual-column format enforced for evaluation tasks
|
|
176
|
+
- [ ] Confidence levels required on major recommendations
|
|
177
|
+
- [ ] Sycophancy detection applied to output before presenting to user
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Related Standards
|
|
182
|
+
|
|
183
|
+
- [anti-hallucination.md](anti-hallucination.md) — Prevents fabrication; complements anti-sycophancy
|
|
184
|
+
- [agent-epistemic-calibration.md](agent-epistemic-calibration.md) — Epistemic humility in agent design (where applicable)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Capability Declaration Standard
|
|
2
|
+
|
|
3
|
+
> **Source**: XSPEC-037 | **Borrowed from**: claude-code-book Ch.3
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The Capability Declaration Standard mandates that all tools, adapters, and agents explicitly declare their safety properties. **All properties default to the most conservative (Fail-Closed) values** — a developer who forgets to declare capabilities gets safe behavior, not dangerous behavior.
|
|
8
|
+
|
|
9
|
+
Borrowed from claude-code-book's `buildTool` factory design, where `isConcurrencySafe()` and `isReadOnly()` default to `false`, requiring explicit opt-in for performance optimizations.
|
|
10
|
+
|
|
11
|
+
## Fail-Closed Defaults
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
const FAIL_CLOSED_DEFAULTS: CapabilityDeclaration = {
|
|
15
|
+
isConcurrencySafe: false, // Cannot run in parallel
|
|
16
|
+
isReadOnly: false, // Assumed to have side effects
|
|
17
|
+
requiresUserConfirmation: true, // Must confirm before execution
|
|
18
|
+
trustLevel: "untrusted", // Maximum sandbox restrictions
|
|
19
|
+
};
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## CapabilityDeclaration Interface
|
|
23
|
+
|
|
24
|
+
| Field | Type | Default | Description |
|
|
25
|
+
|-------|------|---------|-------------|
|
|
26
|
+
| `isConcurrencySafe` | boolean | **false** | Safe to run in parallel with other operations |
|
|
27
|
+
| `isReadOnly` | boolean | **false** | Makes no persistent state changes |
|
|
28
|
+
| `requiresUserConfirmation` | boolean | **true** | Requires explicit user approval before execution |
|
|
29
|
+
| `trustLevel` | enum | **untrusted** | Sandbox isolation level |
|
|
30
|
+
|
|
31
|
+
## Trust Levels
|
|
32
|
+
|
|
33
|
+
| Level | Description | Sandbox |
|
|
34
|
+
|-------|-------------|---------|
|
|
35
|
+
| `trusted` | Built-in or audited plugin | No restrictions |
|
|
36
|
+
| `sandboxed` | Third-party tool | Restricted execution environment |
|
|
37
|
+
| `untrusted` | Unknown source | Maximum restrictions (default) |
|
|
38
|
+
|
|
39
|
+
## Well-Known Declarations
|
|
40
|
+
|
|
41
|
+
| Tool | isConcurrencySafe | isReadOnly | requiresConfirmation | trustLevel |
|
|
42
|
+
|------|-------------------|------------|---------------------|------------|
|
|
43
|
+
| GrepTool | ✅ true | ✅ true | ❌ false | trusted |
|
|
44
|
+
| GlobTool | ✅ true | ✅ true | ❌ false | trusted |
|
|
45
|
+
| FileReadTool | ✅ true | ✅ true | ❌ false | trusted |
|
|
46
|
+
| FileEditTool | ❌ false | ❌ false | ✅ true | trusted |
|
|
47
|
+
| BashTool | ❌ false | ❌ false | ✅ true | sandboxed |
|
|
48
|
+
|
|
49
|
+
## Enforcement
|
|
50
|
+
|
|
51
|
+
- **Missing declaration**: Use `FAIL_CLOSED_DEFAULTS` + log `[WARN] Capability not declared for: {name}`
|
|
52
|
+
- **False claim detection**: If declared `isReadOnly: true` but performs writes → log `CAPABILITY_MISMATCH` event, revert to Fail-Closed
|
|
53
|
+
- **Concurrency**: Only components with `isConcurrencySafe: true` may be batched into parallel execution
|
|
54
|
+
|
|
55
|
+
## References
|
|
56
|
+
|
|
57
|
+
- AI-optimized: [ai/standards/capability-declaration.ai.yaml](../ai/standards/capability-declaration.ai.yaml)
|
|
58
|
+
- XSPEC-037: Cross-project specification
|
|
59
|
+
- Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.3 `buildTool` Fail-Closed factory
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Circuit Breaker Standard
|
|
2
|
+
|
|
3
|
+
> **Source**: XSPEC-036 | **Borrowed from**: claude-code-book Ch.2
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The Circuit Breaker pattern protects Agent systems from API stampedes caused by repeated failures. After `failureThreshold` consecutive failures, the breaker opens and immediately rejects all requests — no waiting for timeout. After a cooldown period, it allows one probe call to test recovery.
|
|
8
|
+
|
|
9
|
+
Real-world data: Before introducing circuit breakers, claude-code-book measured ~250K wasted API calls per day across 1,279 sessions with >50 consecutive failures each (max: 3,272 consecutive failures).
|
|
10
|
+
|
|
11
|
+
## States
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
CLOSED ──(N consecutive failures)──→ OPEN
|
|
15
|
+
OPEN ──(cooldownMs elapsed)──→ HALF_OPEN
|
|
16
|
+
HALF_OPEN ──(probe success)──→ CLOSED
|
|
17
|
+
HALF_OPEN ──(probe failure)──→ OPEN
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
| State | Behavior |
|
|
21
|
+
|-------|----------|
|
|
22
|
+
| **CLOSED** | Normal operation, requests forwarded |
|
|
23
|
+
| **OPEN** | All requests rejected immediately with `CircuitOpenError` |
|
|
24
|
+
| **HALF_OPEN** | One probe request allowed; success → CLOSED, failure → OPEN |
|
|
25
|
+
|
|
26
|
+
## Configuration
|
|
27
|
+
|
|
28
|
+
| Parameter | Default | Description |
|
|
29
|
+
|-----------|---------|-------------|
|
|
30
|
+
| `failureThreshold` | 3 | Consecutive failures before opening |
|
|
31
|
+
| `cooldownMs` | 30000 | OPEN → HALF_OPEN wait time (ms) |
|
|
32
|
+
| `successThreshold` | 1 | Probe successes needed to close |
|
|
33
|
+
|
|
34
|
+
## Interface
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
interface CircuitBreaker {
|
|
38
|
+
readonly name: string;
|
|
39
|
+
readonly state: "CLOSED" | "HALF_OPEN" | "OPEN";
|
|
40
|
+
execute<T>(fn: () => Promise<T>): Promise<T>; // throws CircuitOpenError when OPEN
|
|
41
|
+
getState(): CircuitBreakerState;
|
|
42
|
+
reset(): void; // admin manual reset
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Applicable Scenarios
|
|
47
|
+
|
|
48
|
+
- DevAP Fix Loop retries
|
|
49
|
+
- DevAP LLM API call protection
|
|
50
|
+
- VibeOps Feedback Loop retries
|
|
51
|
+
- VibeOps FLARE retrieval retries
|
|
52
|
+
- Any component using retry with external dependencies
|
|
53
|
+
|
|
54
|
+
## References
|
|
55
|
+
|
|
56
|
+
- AI-optimized: [ai/standards/circuit-breaker.ai.yaml](../ai/standards/circuit-breaker.ai.yaml)
|
|
57
|
+
- XSPEC-036: Cross-project specification
|
|
58
|
+
- Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.2 `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES`
|
|
@@ -329,7 +329,35 @@ Multiple pitfalls → Pattern → Mental Model
|
|
|
329
329
|
|
|
330
330
|
---
|
|
331
331
|
|
|
332
|
-
## 5.
|
|
332
|
+
## 5. Memory Verification Principle(記憶是線索,非結論)
|
|
333
|
+
|
|
334
|
+
> 借鑑 lintsinghua/claude-code-book 記憶驗證原則。
|
|
335
|
+
|
|
336
|
+
記憶提供方向,但**不能直接作為事實使用**。使用記憶前須獨立驗證:
|
|
337
|
+
|
|
338
|
+
| 記憶內容 | 驗證方法 | 衝突時處理 |
|
|
339
|
+
|---------|---------|-----------|
|
|
340
|
+
| 檔案路徑 | 確認檔案仍存在(Glob/Read) | 標記記憶為 `needs-revision` |
|
|
341
|
+
| 函式名稱/API flag | 確認仍存在(Grep/文件) | 標記記憶為 `needs-revision` |
|
|
342
|
+
| 架構快照/Repo 狀態 | 優先信任 `git log`/原始碼 | 更新記憶為當前狀態 |
|
|
343
|
+
| 套件版本/相依 | 確認 package.json/lockfile | 以實際版本為準 |
|
|
344
|
+
|
|
345
|
+
### 禁止行為
|
|
346
|
+
|
|
347
|
+
- 直接引用記憶中的具體 API/路徑/函式名稱推薦給使用者,未先驗證
|
|
348
|
+
- 宣稱「根據記憶,X 存在」而未執行獨立確認
|
|
349
|
+
- 因記憶內容與現況衝突時,選擇信任記憶而非當前觀察
|
|
350
|
+
|
|
351
|
+
### 記憶用途場景
|
|
352
|
+
|
|
353
|
+
記憶適合提供:
|
|
354
|
+
- **搜尋方向**:「這類問題上次在 X 模組找到答案」
|
|
355
|
+
- **模式線索**:「這個錯誤模式對應已知 pitfall MEM-2026-0042」
|
|
356
|
+
- **決策背景**:「此設計決策的歷史背景是...」
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## 6. Noise Control
|
|
333
361
|
|
|
334
362
|
### Push Levels
|
|
335
363
|
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Dual-Phase LLM Output Standard
|
|
2
|
+
|
|
3
|
+
> **Source**: XSPEC-035 | **Borrowed from**: claude-code-book Ch.7 AutoCompact
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The Dual-Phase LLM Output pattern requires LLM review agents to produce two XML blocks in a single response: an `<analysis>` thinking scratchpad (discarded after processing) and a `<summary>` structured conclusion (retained). This lets the model reason thoroughly while preventing thinking processes from accumulating in the conversation context.
|
|
8
|
+
|
|
9
|
+
## Problem
|
|
10
|
+
|
|
11
|
+
Review agents (Judge, Evaluator, Guardian) typically generate 2000–5000 token responses, with 50–70% being reasoning that accumulates in conversation history. In repeated review scenarios (Fix Loop 3× retries), this wastes 3000–10500 tokens per task.
|
|
12
|
+
|
|
13
|
+
## Format
|
|
14
|
+
|
|
15
|
+
```xml
|
|
16
|
+
<analysis>
|
|
17
|
+
[Reasoning scratchpad — DISCARDED after processing]
|
|
18
|
+
- Step-by-step evaluation
|
|
19
|
+
- Edge case considerations
|
|
20
|
+
- Alternative comparisons
|
|
21
|
+
</analysis>
|
|
22
|
+
|
|
23
|
+
<summary>
|
|
24
|
+
decision: approved | rejected | needs_revision
|
|
25
|
+
confidence: high | medium | low
|
|
26
|
+
findings:
|
|
27
|
+
- [type] description
|
|
28
|
+
next_action: [recommended follow-up action]
|
|
29
|
+
</summary>
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Post-Processing Rules
|
|
33
|
+
|
|
34
|
+
1. Extract `<summary>` content → persist to context
|
|
35
|
+
2. Discard `<analysis>` content → never write to conversation history
|
|
36
|
+
3. If `<summary>` tag missing → fallback: treat full response as summary, log `[WARN] dual-phase format missing`
|
|
37
|
+
|
|
38
|
+
## Extension Fields
|
|
39
|
+
|
|
40
|
+
Applications may add fields inside `<summary>` but must not remove core fields:
|
|
41
|
+
- **Security (Guardian)**: `severity: critical | high | medium | low`, `cwe_ids: [CWE-NNN]`
|
|
42
|
+
- **Quality (Evaluator)**: `test_coverage: number`, `tech_debt_score: number`
|
|
43
|
+
|
|
44
|
+
## Token Impact
|
|
45
|
+
|
|
46
|
+
| Scenario | Savings |
|
|
47
|
+
|----------|---------|
|
|
48
|
+
| Single review | 1000–3500 tokens |
|
|
49
|
+
| Fix Loop (3× retries) | 3000–10500 tokens |
|
|
50
|
+
| VibeOps pipeline (evaluator + guardian) | 2000–7000 tokens per run |
|
|
51
|
+
|
|
52
|
+
## References
|
|
53
|
+
|
|
54
|
+
- AI-optimized: [ai/standards/dual-phase-output.ai.yaml](../ai/standards/dual-phase-output.ai.yaml)
|
|
55
|
+
- XSPEC-035: Cross-project specification
|
|
56
|
+
- Borrowed from: [claude-code-book](https://github.com/lintsinghua/claude-code-book) Ch.7 `formatCompactSummary`
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Failure Source Taxonomy Standard
|
|
2
|
+
|
|
3
|
+
> **Source**: XSPEC-045 | **Borrowed from**: ultraworkers/claw-code ROADMAP Phase 2 Failure Taxonomy
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The Failure Source Taxonomy adds a `failureSource` (why) dimension on top of the existing `TaskStatus` (what). Structured failure sources allow the downstream recovery mechanism (Recovery Recipe Registry, XSPEC-046) to precisely match strategies, avoiding the application of the same retry logic to fundamentally different failure types.
|
|
8
|
+
|
|
9
|
+
## 8 Failure Sources
|
|
10
|
+
|
|
11
|
+
| Source | Description | Recommended Recovery |
|
|
12
|
+
|--------|-------------|---------------------|
|
|
13
|
+
| `prompt_delivery` | Prompt not delivered to LLM (API 4xx, empty response, parse error) | retry or model_switch |
|
|
14
|
+
| `model_degradation` | LLM quality degrades (repetitive output, irrelevant response) | model_switch |
|
|
15
|
+
| `branch_divergence` | Working branch falls behind base branch | rebase_and_retry |
|
|
16
|
+
| `compilation` | Compile or type-check errors (tsc, cargo, go build) | fix_loop |
|
|
17
|
+
| `test_failure` | Test failures (unit / integration / system / e2e) | fix_loop |
|
|
18
|
+
| `tool_failure` | Tool layer failure (MCP server unresponsive, plugin load failure) | circuit_breaker then retry |
|
|
19
|
+
| `policy_violation` | Safety/governance policy block (Guardian deny, SafetyHook) | human_checkpoint |
|
|
20
|
+
| `resource_exhaustion` | Resource exhausted (token budget exceeded, timeout, USD budget) | degraded_mode or human_checkpoint |
|
|
21
|
+
|
|
22
|
+
## Priority Rules
|
|
23
|
+
|
|
24
|
+
When multiple failure sources coexist, apply:
|
|
25
|
+
|
|
26
|
+
1. `branch_divergence` > `compilation` — divergence is usually the root cause of compilation failures
|
|
27
|
+
2. `policy_violation` > others — security takes precedence, do not attempt bypass
|
|
28
|
+
3. `resource_exhaustion` > others — retrying when resources are exhausted is meaningless
|
|
29
|
+
4. Otherwise: use the first detected source
|
|
30
|
+
|
|
31
|
+
## Types
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
type FailureSource =
|
|
35
|
+
| "prompt_delivery"
|
|
36
|
+
| "model_degradation"
|
|
37
|
+
| "branch_divergence"
|
|
38
|
+
| "compilation"
|
|
39
|
+
| "test_failure"
|
|
40
|
+
| "tool_failure"
|
|
41
|
+
| "policy_violation"
|
|
42
|
+
| "resource_exhaustion";
|
|
43
|
+
|
|
44
|
+
interface FailureDetail {
|
|
45
|
+
source: FailureSource;
|
|
46
|
+
raw_error: string;
|
|
47
|
+
detected_by: string; // quality-gate / claude-adapter / safety-hook / branch-drift
|
|
48
|
+
timestamp: string; // ISO 8601
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Guidelines
|
|
53
|
+
|
|
54
|
+
- All failure results should carry `failureSource` to enable precise recovery strategy matching
|
|
55
|
+
- `failureSource` is an **optional** field — must not break existing code without this field
|
|
56
|
+
- Select the most fundamental source as `failureSource` in a single failure event
|
|
57
|
+
- `failureSource` should be set by the component that detects the failure
|
|
58
|
+
- DevAP and VibeOps each define `FailureSource` type independently (AGPL isolation)
|
|
59
|
+
|
|
60
|
+
## Applicable Scenarios
|
|
61
|
+
|
|
62
|
+
- DevAP QualityGate failure result enrichment
|
|
63
|
+
- VibeOps PipelineRunner `agent:error` event payload
|
|
64
|
+
- Recovery Recipe Registry (XSPEC-046) match key
|
|
65
|
+
- Telemetry failure analytics dimension
|
|
66
|
+
|
|
67
|
+
## References
|
|
68
|
+
|
|
69
|
+
- AI-optimized: [ai/standards/failure-source-taxonomy.ai.yaml](../ai/standards/failure-source-taxonomy.ai.yaml)
|
|
70
|
+
- XSPEC-045: Cross-project specification
|
|
71
|
+
- Depends on: Recovery Recipe Registry (XSPEC-046)
|
|
72
|
+
- Borrowed from: [ultraworkers/claw-code](https://github.com/ultraworkers/claw-code) ROADMAP Phase 2 Failure Taxonomy
|