universal-dev-standards 5.5.0 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/ai/options/testing/integration-testing.ai.yaml +2 -2
- package/bundled/ai/options/testing/unit-testing.ai.yaml +2 -2
- package/bundled/ai/standards/agent-communication-protocol.ai.yaml +8 -9
- package/bundled/ai/standards/agent-dispatch.ai.yaml +8 -9
- package/bundled/ai/standards/branch-completion.ai.yaml +8 -10
- package/bundled/ai/standards/browser-compatibility-standards.ai.yaml +63 -0
- package/bundled/ai/standards/capability-declaration.ai.yaml +4 -4
- package/bundled/ai/standards/change-batching-standards.ai.yaml +8 -10
- package/bundled/ai/standards/circuit-breaker.ai.yaml +7 -7
- package/bundled/ai/standards/contract-testing-standards.ai.yaml +62 -0
- package/bundled/ai/standards/cross-flow-regression.ai.yaml +61 -0
- package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +1 -1
- package/bundled/ai/standards/dual-phase-output.ai.yaml +3 -3
- package/bundled/ai/standards/execution-history.ai.yaml +8 -10
- package/bundled/ai/standards/failure-source-taxonomy.ai.yaml +8 -10
- package/bundled/ai/standards/full-coverage-testing.ai.yaml +192 -0
- package/bundled/ai/standards/git-worktree.ai.yaml +1 -1
- package/bundled/ai/standards/governance-layer.ai.yaml +114 -0
- package/bundled/ai/standards/mock-boundary.ai.yaml +1 -1
- package/bundled/ai/standards/model-selection.ai.yaml +1 -1
- package/bundled/ai/standards/packaging-standards.ai.yaml +8 -8
- package/bundled/ai/standards/pipeline-integration-standards.ai.yaml +8 -9
- package/bundled/ai/standards/pipeline-security-gates.ai.yaml +4 -0
- package/bundled/ai/standards/recovery-recipe-registry.ai.yaml +6 -10
- package/bundled/ai/standards/release-readiness-gate.ai.yaml +77 -0
- package/bundled/ai/standards/security-decision.ai.yaml +3 -3
- package/bundled/ai/standards/server-ops-security.ai.yaml +1 -1
- package/bundled/ai/standards/standard-admission-criteria.ai.yaml +1 -1
- package/bundled/ai/standards/standard-lifecycle-management.ai.yaml +1 -1
- package/bundled/ai/standards/supply-chain-attestation.ai.yaml +1 -1
- package/bundled/ai/standards/testing.ai.yaml +20 -13
- package/bundled/ai/standards/token-budget.ai.yaml +3 -3
- package/bundled/ai/standards/workflow-enforcement.ai.yaml +8 -11
- package/bundled/ai/standards/workflow-state-protocol.ai.yaml +8 -10
- package/bundled/core/accessibility-standards.md +58 -0
- package/bundled/core/adversarial-test.md +1 -1
- package/bundled/core/agent-behavior-discipline.md +4 -4
- package/bundled/core/agent-communication-protocol.md +5 -5
- package/bundled/core/branch-completion.md +4 -0
- package/bundled/core/browser-compatibility-standards.md +220 -0
- package/bundled/core/checkin-standards.md +1 -0
- package/bundled/core/circuit-breaker.md +4 -4
- package/bundled/core/container-security.md +8 -8
- package/bundled/core/contract-testing-standards.md +182 -0
- package/bundled/core/cross-flow-regression.md +190 -0
- package/bundled/core/disaster-recovery-drill.md +3 -3
- package/bundled/core/dual-phase-output.md +1 -1
- package/bundled/core/failure-source-taxonomy.md +3 -3
- package/bundled/core/flow-based-testing.md +135 -2
- package/bundled/core/full-coverage-testing.md +183 -0
- package/bundled/core/git-worktree.md +1 -1
- package/bundled/core/governance-layer.md +151 -0
- package/bundled/core/llm-output-validation.md +2 -2
- package/bundled/core/mock-boundary.md +1 -1
- package/bundled/core/packaging-standards.md +14 -14
- package/bundled/core/performance-standards.md +65 -0
- package/bundled/core/policy-as-code-testing.md +9 -9
- package/bundled/core/recovery-recipe-registry.md +2 -2
- package/bundled/core/release-quality-manifest.md +58 -12
- package/bundled/core/release-readiness-gate.md +184 -0
- package/bundled/core/sast-advanced.md +5 -5
- package/bundled/core/secure-op.md +5 -5
- package/bundled/core/security-decision.md +1 -1
- package/bundled/core/server-ops-security.md +15 -15
- package/bundled/core/smoke-test.md +1 -1
- package/bundled/core/standard-admission-criteria.md +1 -1
- package/bundled/core/standard-lifecycle-management.md +1 -1
- package/bundled/core/supply-chain-attestation.md +4 -4
- package/bundled/core/token-budget.md +3 -3
- package/bundled/locales/zh-CN/CHANGELOG.md +51 -4
- package/bundled/locales/zh-CN/README.md +11 -27
- package/bundled/locales/zh-CN/core/agent-communication-protocol.md +5 -5
- package/bundled/locales/zh-CN/core/circuit-breaker.md +1 -1
- package/bundled/locales/zh-CN/core/git-worktree.md +1 -1
- package/bundled/locales/zh-CN/core/packaging-standards.md +14 -14
- package/bundled/locales/zh-CN/core/recovery-recipe-registry.md +6 -9
- package/bundled/locales/zh-CN/core/standard-admission-criteria.md +1 -1
- package/bundled/locales/zh-CN/core/standard-lifecycle-management.md +1 -1
- package/bundled/locales/zh-CN/core/token-budget.md +1 -1
- package/bundled/locales/zh-TW/CHANGELOG.md +51 -4
- package/bundled/locales/zh-TW/README.md +11 -27
- package/bundled/locales/zh-TW/core/agent-communication-protocol.md +5 -5
- package/bundled/locales/zh-TW/core/browser-compatibility-standards.md +11 -0
- package/bundled/locales/zh-TW/core/capability-declaration.md +4 -4
- package/bundled/locales/zh-TW/core/circuit-breaker.md +7 -7
- package/bundled/locales/zh-TW/core/contract-testing-standards.md +11 -0
- package/bundled/locales/zh-TW/core/cross-flow-regression.md +11 -0
- package/bundled/locales/zh-TW/core/dual-phase-output.md +3 -3
- package/bundled/locales/zh-TW/core/failure-source-taxonomy.md +7 -9
- package/bundled/locales/zh-TW/core/governance-layer.md +159 -0
- package/bundled/locales/zh-TW/core/packaging-standards.md +14 -14
- package/bundled/locales/zh-TW/core/recovery-recipe-registry.md +6 -9
- package/bundled/locales/zh-TW/core/release-readiness-gate.md +11 -0
- package/bundled/locales/zh-TW/core/security-decision.md +3 -3
- package/bundled/locales/zh-TW/core/standard-admission-criteria.md +1 -1
- package/bundled/locales/zh-TW/core/standard-lifecycle-management.md +1 -1
- package/bundled/locales/zh-TW/core/token-budget.md +3 -3
- package/bundled/skills/README.md +23 -0
- package/bundled/skills/atdd-assistant/SKILL.md +4 -5
- package/bundled/skills/bdd-assistant/SKILL.md +4 -5
- package/bundled/skills/checkin-assistant/SKILL.md +4 -6
- package/bundled/skills/code-review-assistant/SKILL.md +4 -5
- package/bundled/skills/commands/observability.md +42 -0
- package/bundled/skills/commands/runbook.md +44 -0
- package/bundled/skills/commands/slo.md +45 -0
- package/bundled/skills/journey-test-assistant/SKILL.md +1 -1
- package/bundled/skills/orchestrate/SKILL.md +1 -1
- package/bundled/skills/plan/SKILL.md +1 -1
- package/bundled/skills/pr-automation-assistant/SKILL.md +4 -5
- package/bundled/skills/push/SKILL.md +1 -1
- package/bundled/skills/spec-driven-dev/SKILL.md +4 -5
- package/bundled/skills/sweep/SKILL.md +3 -3
- package/bundled/skills/tdd-assistant/SKILL.md +4 -5
- package/package.json +6 -6
- package/src/commands/check.js +43 -0
- package/src/commands/flow.js +7 -5
- package/src/commands/init.js +2 -1
- package/src/commands/start.js +7 -6
- package/src/commands/sweep.js +7 -6
- package/src/commands/update.js +10 -0
- package/src/commands/workflow.js +7 -6
- package/src/core/agent-communication-protocol.js +10 -3
- package/standards-registry.json +107 -51
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Full Coverage Testing Standards
|
|
2
|
+
|
|
3
|
+
> **AI-optimized version**: `ai/standards/full-coverage-testing.ai.yaml`
|
|
4
|
+
> **XSPEC**: XSPEC-178
|
|
5
|
+
> **Replaces**: Pyramid threshold model (UT≥80%, IT≥70%, E2E happy-path-only)
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Full Coverage Testing is a behavior-completeness paradigm designed for the AI-era, where the cost of generating tests equals the cost of generating code. Traditional pyramid thresholds assumed tests were expensive to write — this assumption no longer holds.
|
|
10
|
+
|
|
11
|
+
**Core principle**: Every public function must be tested for all three behavioral paths. Coverage is measured by behavior completeness, not percentage floors. CI enforces a ratchet: coverage can only increase, never decrease.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Behavior-Completeness Model
|
|
16
|
+
|
|
17
|
+
Instead of "80% line coverage", require:
|
|
18
|
+
|
|
19
|
+
| Path | Description | Example |
|
|
20
|
+
|------|-------------|---------|
|
|
21
|
+
| **Happy path** | Normal input produces correct output | `calculateDiscount(100, 0.1) → 90` |
|
|
22
|
+
| **Edge case** | Boundary values do not cause unexpected errors | `calculateDiscount(0, 1.0) → 0 without throwing` |
|
|
23
|
+
| **Error path** | Invalid input raises clear error or error state | `calculateDiscount(-1, 2.0) → throws ArgumentError` |
|
|
24
|
+
|
|
25
|
+
Every public function requires all three. This replaces the "80% of business logic" target with a qualitative, behavior-driven requirement.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Ratchet CI Policy
|
|
30
|
+
|
|
31
|
+
- The current coverage baseline is the minimum acceptable coverage
|
|
32
|
+
- Any PR that decreases coverage is blocked from merging
|
|
33
|
+
- Improvements update the baseline automatically on merge
|
|
34
|
+
- No fixed percentage floor — the coverage achieved today is tomorrow's floor
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Stored in .coverage-baseline.json
|
|
38
|
+
{ "line": 91.3, "branch": 88.7, "timestamp": "2026-05-06" }
|
|
39
|
+
|
|
40
|
+
# PR regression → blocked
|
|
41
|
+
Coverage regression: 91.3% → 89.1%. Ratchet threshold violated.
|
|
42
|
+
|
|
43
|
+
# PR improvement → baseline updated
|
|
44
|
+
Coverage improved: 91.3% → 92.0%. New baseline set.
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Anti-Fake Test Rules
|
|
50
|
+
|
|
51
|
+
### Forbidden: Tautology Assertions
|
|
52
|
+
|
|
53
|
+
Assertions that always pass regardless of behavior provide false coverage.
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
// ❌ FORBIDDEN — always passes, tests nothing
|
|
57
|
+
expect(true).toBe(true)
|
|
58
|
+
expect(result).toBeDefined() // without specific value
|
|
59
|
+
|
|
60
|
+
// ✅ REQUIRED — verifies actual behavior
|
|
61
|
+
expect(result).toBe(90)
|
|
62
|
+
expect(result).toEqual({ discount: 10, total: 90 })
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Forbidden: Mocking Core Business Logic
|
|
66
|
+
|
|
67
|
+
Mocking your own code means the business logic is never actually executed.
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
// ❌ FORBIDDEN — business logic never runs
|
|
71
|
+
jest.mock('./orderService', () => ({ calculateTotal: jest.fn(() => 100) }))
|
|
72
|
+
|
|
73
|
+
// ✅ ALLOWED — mock only external dependencies
|
|
74
|
+
// MOCK: External Stripe API — no sandbox available in CI
|
|
75
|
+
jest.mock('./payment-gateway', () => ({ charge: jest.fn().mockResolvedValue({ id: 'ch_test' }) }))
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Required: Mock Reason Comments
|
|
79
|
+
|
|
80
|
+
Every mock must explain why the dependency cannot be real.
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
// ❌ FORBIDDEN — no explanation
|
|
84
|
+
jest.mock('./payment-gateway')
|
|
85
|
+
|
|
86
|
+
// ✅ REQUIRED — explicit reason
|
|
87
|
+
// MOCK: External payment gateway — network dependency, no sandbox in CI
|
|
88
|
+
jest.mock('./payment-gateway', () => ({ ... }))
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Mock Boundary: What Can Be Mocked
|
|
92
|
+
|
|
93
|
+
| ✅ Allowed to Mock | ❌ Forbidden to Mock |
|
|
94
|
+
|-------------------|---------------------|
|
|
95
|
+
| External HTTP APIs (payment, OAuth) | Core business calculation functions |
|
|
96
|
+
| Hardware interfaces (sensors, GPIO) | Your own service layer methods |
|
|
97
|
+
| Third-party SDKs without test mode | Database queries (use in-memory SQLite) |
|
|
98
|
+
| Docker daemon | Your own utility functions |
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## STUB Marker Protocol
|
|
103
|
+
|
|
104
|
+
All temporary/placeholder implementations MUST be marked with the standard STUB marker. This is enforced by pre-push hooks and deploy.sh.
|
|
105
|
+
|
|
106
|
+
### Marking a STUB
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
// WARNING: STUB — Remove before UAT
|
|
110
|
+
async function validatePayment(card: Card): Promise<boolean> {
|
|
111
|
+
return true; // Always approve — replace with real Stripe call
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Exempting a Genuine Limitation
|
|
116
|
+
|
|
117
|
+
When a dependency truly cannot be tested (hardware, live API without sandbox):
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
// COVERAGE_EXEMPT: Hardware temperature sensor — no simulation available in CI
|
|
121
|
+
async function readTemperature(): Promise<number> {
|
|
122
|
+
return hardwareSensor.read();
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
The exemption reason MUST be non-empty and specific.
|
|
127
|
+
|
|
128
|
+
### Deployment Gates
|
|
129
|
+
|
|
130
|
+
| Environment | STUB Present | Action |
|
|
131
|
+
|-------------|-------------|--------|
|
|
132
|
+
| Feature branch push | Yes | ⚠️ Warning (not blocked) |
|
|
133
|
+
| `main` branch push | Yes | ❌ Blocked |
|
|
134
|
+
| Staging deploy | Yes | ⚠️ Warning (not blocked) |
|
|
135
|
+
| UAT deploy | Yes | ❌ Blocked |
|
|
136
|
+
| Production deploy | Yes | ❌ Blocked (critical log) |
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## AC Traceability
|
|
141
|
+
|
|
142
|
+
Link each test to its Acceptance Criteria using the `@ac` JSDoc tag:
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
/**
|
|
146
|
+
* @ac AC-US03-2
|
|
147
|
+
*/
|
|
148
|
+
it('should block PR when coverage regresses below baseline', () => {
|
|
149
|
+
// test body
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
// If no AC maps to this test:
|
|
153
|
+
/**
|
|
154
|
+
* @ac UNTRACED
|
|
155
|
+
*/
|
|
156
|
+
it('helper utility returns correct format', () => { ... })
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
CI reports AC coverage rate. If more than 20% of ACs lack `@ac`-tagged tests, a warning is shown.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Migration from Pyramid Model
|
|
164
|
+
|
|
165
|
+
If your project previously used pyramid thresholds:
|
|
166
|
+
|
|
167
|
+
1. **Delete** any hardcoded coverage thresholds from `jest.config.js` / `vitest.config.ts` (`coverageThreshold` option)
|
|
168
|
+
2. **Install** `.coverage-baseline.json` with current coverage as the starting ratchet
|
|
169
|
+
3. **Add** `scripts/check-coverage-ratchet.sh` to CI
|
|
170
|
+
4. **Add** `scripts/check-stubs.sh` to deploy.sh and pre-push hook
|
|
171
|
+
5. **Add** `scripts/check-anti-fake-tests.sh` to pre-commit or CI
|
|
172
|
+
|
|
173
|
+
The ratchet starts at your current coverage. From that point on, it can only increase.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Related Standards
|
|
178
|
+
|
|
179
|
+
- `testing.ai.yaml` — Test structure, FIRST principles, AAA pattern (pyramid thresholds deprecated here)
|
|
180
|
+
- `unit-testing.ai.yaml` — Unit test scope and organization
|
|
181
|
+
- `integration-testing.ai.yaml` — Integration test patterns
|
|
182
|
+
- `deployment-standards.ai.yaml` — Deploy gate requirements
|
|
183
|
+
- XSPEC-178 — Full specification and implementation phases
|
|
@@ -42,7 +42,7 @@ Define a lifecycle for using Git worktrees to isolate development work, ensuring
|
|
|
42
42
|
|
|
43
43
|
1. **Choose worktree location** — priority order:
|
|
44
44
|
- Existing configured path
|
|
45
|
-
- `.
|
|
45
|
+
- `.uds/worktrees/` or similar project-local directory
|
|
46
46
|
- Ask the user
|
|
47
47
|
2. **Verify `.gitignore`** — run `git check-ignore` to confirm the worktree directory is ignored
|
|
48
48
|
3. **Create the worktree** — `git worktree add <path> -b <branch-name>`
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Governance Layer Standard
|
|
2
|
+
|
|
3
|
+
> **Language**: English | [繁體中文](../locales/zh-TW/core/governance-layer.md)
|
|
4
|
+
|
|
5
|
+
**Version**: 1.0.0
|
|
6
|
+
**Last Updated**: 2026-05-07
|
|
7
|
+
**Applicability**: All software projects with multi-agent or multi-role AI workflows
|
|
8
|
+
**Scope**: universal
|
|
9
|
+
**Industry Standards**: None (UDS original)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Purpose
|
|
14
|
+
|
|
15
|
+
A governance layer provides a shared anchor for all agents and roles in a project:
|
|
16
|
+
Vision (direction) → Mission (boundaries + red lines) → Goals (measurable KPIs).
|
|
17
|
+
|
|
18
|
+
It is **Standard #0**: evaluated before all other standards. When any conflict exists between this standard and other domain standards, this standard takes precedence.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Three-Layer Schema
|
|
23
|
+
|
|
24
|
+
### Vision
|
|
25
|
+
|
|
26
|
+
| Field | Requirement |
|
|
27
|
+
|-------|-------------|
|
|
28
|
+
| Format | Single sentence, ≤ 50 tokens |
|
|
29
|
+
| Content | Long-term direction; timeless; no metrics |
|
|
30
|
+
| Change frequency | Annual review |
|
|
31
|
+
|
|
32
|
+
**Example**:
|
|
33
|
+
> "To be the most trusted AI development workflow standard for software teams worldwide."
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
### Mission
|
|
38
|
+
|
|
39
|
+
| Field | Requirement |
|
|
40
|
+
|-------|-------------|
|
|
41
|
+
| Format | 3–5 commitment statements + red lines table (≤ 300 tokens total) |
|
|
42
|
+
| Content | What we do / don't do; red lines with trigger conditions + actions |
|
|
43
|
+
| Change frequency | Quarterly review |
|
|
44
|
+
|
|
45
|
+
**Red line mandatory fields**:
|
|
46
|
+
|
|
47
|
+
| Field | Type | Description |
|
|
48
|
+
|-------|------|-------------|
|
|
49
|
+
| `id` | string | Unique identifier (e.g., R1, GUARD-001) |
|
|
50
|
+
| `category` | string | Classification (quality / safety / compliance / ethics) |
|
|
51
|
+
| `clause` | string | Human-readable statement of what is forbidden or required |
|
|
52
|
+
| `action` | enum | One of `block` \| `warn` \| `escalate_to_human` |
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
### Goals
|
|
57
|
+
|
|
58
|
+
| Field | Requirement |
|
|
59
|
+
|-------|-------------|
|
|
60
|
+
| Format | KPI table, ≤ 500 tokens |
|
|
61
|
+
| Change frequency | Per-Sprint calibration |
|
|
62
|
+
| Falsifiability | Every KPI must be measurable — no vague terms like "improve" or "enhance" |
|
|
63
|
+
|
|
64
|
+
**KPI mandatory fields**:
|
|
65
|
+
|
|
66
|
+
| Field | Type | Description |
|
|
67
|
+
|-------|------|-------------|
|
|
68
|
+
| `id` | string | Unique identifier (e.g., KPI-01) |
|
|
69
|
+
| `metric_name` | string | Name of the metric being tracked |
|
|
70
|
+
| `threshold` | string | Quantified target (e.g., ≥ 95%, < 200 ms) |
|
|
71
|
+
| `measurement_method` | string | How and when the metric is measured |
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Priority
|
|
76
|
+
|
|
77
|
+
The governance layer has **higher priority** than all other standards. Resolution order when conflicts exist:
|
|
78
|
+
|
|
79
|
+
1. **Governance layer** (this standard) — direction, red lines, KPIs
|
|
80
|
+
2. **Domain standards** (testing, commit message, deployment, etc.)
|
|
81
|
+
3. **Project-specific overrides** (local `.standards/` customizations)
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Red Lines Format
|
|
86
|
+
|
|
87
|
+
Each red line entry must contain all mandatory fields. Enforcement actions:
|
|
88
|
+
|
|
89
|
+
| Action | Behavior |
|
|
90
|
+
|--------|----------|
|
|
91
|
+
| `block` | Halt the pipeline immediately; do not proceed |
|
|
92
|
+
| `warn` | Log the violation and continue; escalate if threshold exceeded |
|
|
93
|
+
| `escalate_to_human` | Pause and require human decision before continuing |
|
|
94
|
+
|
|
95
|
+
Additionally, each red line should include a `mission_clause_ref` field referencing the mission commitment it enforces.
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Evaluator Integration
|
|
100
|
+
|
|
101
|
+
When a project uses an AI evaluator agent, the governance layer provides scoring anchors:
|
|
102
|
+
|
|
103
|
+
| Axis | Weight | Veto threshold |
|
|
104
|
+
|------|--------|---------------|
|
|
105
|
+
| Correctness | 0.4 | < 0.3 → FAIL |
|
|
106
|
+
| Mission alignment | 0.3 | < 0.3 → FAIL |
|
|
107
|
+
| Goal achievement | 0.3 | < 0.3 → FAIL |
|
|
108
|
+
|
|
109
|
+
- **mission_alignment_score**: Degree to which the output aligns with Mission commitments
|
|
110
|
+
- **goal_achievement_score**: Degree to which the output advances Goals KPIs
|
|
111
|
+
- Any single axis falling below 0.3 triggers a FAIL regardless of the weighted sum
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Risk Acceptance (trace_only mode)
|
|
116
|
+
|
|
117
|
+
If a project relaxes human gates (e.g., `gate.mode = trace_only`), a **Risk Acceptance Clause** must be written explicitly into `mission.md`, containing:
|
|
118
|
+
|
|
119
|
+
| Required Field | Description |
|
|
120
|
+
|---------------|-------------|
|
|
121
|
+
| `date` | Date the risk was accepted |
|
|
122
|
+
| `signatory` | Person or role accepting the risk |
|
|
123
|
+
| `gates_bypassed` | Enumerated list of human gates that are bypassed |
|
|
124
|
+
| `risks_accepted` | Explicit description of accepted risks |
|
|
125
|
+
|
|
126
|
+
Without a valid Risk Acceptance Clause, the pipeline **must refuse to start (fail-closed)**.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Governance File Structure
|
|
131
|
+
|
|
132
|
+
Projects adopting this standard should maintain the following files:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
governance/
|
|
136
|
+
├── vision.md # Single-sentence vision statement
|
|
137
|
+
├── mission.md # Commitments + red lines table
|
|
138
|
+
└── goals.md # KPI table (updated each Sprint)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Compliance Checklist
|
|
144
|
+
|
|
145
|
+
- [ ] Vision is a single sentence ≤ 50 tokens and contains no metrics
|
|
146
|
+
- [ ] Mission has 3–5 commitments and a red lines table with all mandatory fields
|
|
147
|
+
- [ ] Every red line has: id, category, clause, action
|
|
148
|
+
- [ ] Goals table is present with all KPIs containing: id, metric_name, threshold, measurement_method
|
|
149
|
+
- [ ] No KPI uses vague language ("improve", "enhance", "better")
|
|
150
|
+
- [ ] If `gate.mode = trace_only`, a Risk Acceptance Clause is present in `mission.md`
|
|
151
|
+
- [ ] All AI evaluators weight correctness/mission_alignment/goal_achievement with fail-closed veto at < 0.3
|
|
@@ -135,10 +135,10 @@ LLM 產生「聽起來正確但實際上沒有根據」的內容。例如:
|
|
|
135
135
|
|
|
136
136
|
```bash
|
|
137
137
|
# 1. 修改前:用 temperature=0 記錄 golden output
|
|
138
|
-
|
|
138
|
+
ai-agent run planner --input fixtures/planner-input.json --temp 0 > golden.json
|
|
139
139
|
|
|
140
140
|
# 2. 修改後:重跑並比對
|
|
141
|
-
|
|
141
|
+
ai-agent run planner --input fixtures/planner-input.json --temp 0 > after.json
|
|
142
142
|
|
|
143
143
|
# 3. 用 contract test 驗證 after.json 仍符合 schema
|
|
144
144
|
npx vitest run agents/__tests__/contract.test.ts
|
|
@@ -21,7 +21,7 @@ This document defines rules for what can and cannot be mocked in tests. Its goal
|
|
|
21
21
|
|
|
22
22
|
A hollow test mocks so much of the system that the test becomes a specification of mock wiring rather than system behavior. The classic symptom: you can delete the implementation file and the test still passes.
|
|
23
23
|
|
|
24
|
-
**Real example (
|
|
24
|
+
**Real example (Multi-agent pipeline SPEC-002.test.ts)**:
|
|
25
25
|
|
|
26
26
|
```typescript
|
|
27
27
|
vi.mock('../../src/runner/agent-runner.js') // Core logic replaced
|
|
@@ -4,19 +4,19 @@
|
|
|
4
4
|
|
|
5
5
|
**Version**: 1.0.0
|
|
6
6
|
**Last Updated**: 2026-04-15
|
|
7
|
-
**Applicability**: Projects using UDS
|
|
7
|
+
**Applicability**: Projects using a UDS-aware toolchain
|
|
8
8
|
**Scope**: universal
|
|
9
9
|
|
|
10
10
|
---
|
|
11
11
|
|
|
12
12
|
## Purpose
|
|
13
13
|
|
|
14
|
-
This standard defines a Recipe-based packaging framework that enables user projects to declare packaging targets in
|
|
14
|
+
This standard defines a Recipe-based packaging framework that enables user projects to declare packaging targets in their packaging config (file path is adoption-layer specific). UDS provides the Recipe definitions and built-in Recipe library; the adoption-layer runtime executes the orchestration at pipeline time.
|
|
15
15
|
|
|
16
16
|
The framework separates concerns:
|
|
17
17
|
- **User project**: declares *what* to package (targets + config overrides)
|
|
18
18
|
- **UDS**: defines *how* to package (Recipe structure + built-in Recipes)
|
|
19
|
-
- **
|
|
19
|
+
- **Adoption-layer pipeline**: executes *when* to package (pipeline stage between Review and Deploy)
|
|
20
20
|
|
|
21
21
|
---
|
|
22
22
|
|
|
@@ -25,9 +25,9 @@ The framework separates concerns:
|
|
|
25
25
|
| Principle | Description |
|
|
26
26
|
|-----------|-------------|
|
|
27
27
|
| **Recipe-based** | Every packaging target references a named Recipe; no ad-hoc scripts in pipeline YAML |
|
|
28
|
-
| **Declarative targets** | Projects declare targets in
|
|
28
|
+
| **Declarative targets** | Projects declare targets in their packaging config (file path adoption-layer specific); the runtime resolves and executes |
|
|
29
29
|
| **Customizable** | Four customization layers allow config overrides, hook injection, custom Recipes, and escape hatches |
|
|
30
|
-
| **Pipeline-integrated** | Packaging runs as a named stage between Review and Deploy in the
|
|
30
|
+
| **Pipeline-integrated** | Packaging runs as a named stage between Review and Deploy in the adoption-layer pipeline |
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
|
|
@@ -111,15 +111,15 @@ Projects that need to deviate from built-in Recipe defaults should use the lowes
|
|
|
111
111
|
|
|
112
112
|
| Layer | Mechanism | When to Use |
|
|
113
113
|
|-------|-----------|-------------|
|
|
114
|
-
| **L1 — Config Override** | `config:` block in `.
|
|
115
|
-
| **L2 — Hook Injection** | `hooks:` block in `.
|
|
116
|
-
| **L3 — Custom Recipe** | New `.yaml` file in project's `.
|
|
114
|
+
| **L1 — Config Override** | `config:` block in `.uds/packaging.yaml` | Change default values (registry URL, tag, output dir) |
|
|
115
|
+
| **L2 — Hook Injection** | `hooks:` block in `.uds/packaging.yaml` | Run extra commands before/after build or publish |
|
|
116
|
+
| **L3 — Custom Recipe** | New `.yaml` file in project's `.uds/recipes/` | Entirely different build process; built-ins don't apply |
|
|
117
117
|
| **L4 — Escape Hatch** | `script:` key replacing `recipe:` in target definition | Raw shell script when no Recipe abstraction is suitable |
|
|
118
118
|
|
|
119
119
|
### L1 Example — Config Override
|
|
120
120
|
|
|
121
121
|
```yaml
|
|
122
|
-
# .
|
|
122
|
+
# .uds/packaging.yaml
|
|
123
123
|
targets:
|
|
124
124
|
- name: publish-npm
|
|
125
125
|
recipe: npm-library
|
|
@@ -132,7 +132,7 @@ targets:
|
|
|
132
132
|
### L2 Example — Hook Injection
|
|
133
133
|
|
|
134
134
|
```yaml
|
|
135
|
-
# .
|
|
135
|
+
# .uds/packaging.yaml
|
|
136
136
|
targets:
|
|
137
137
|
- name: docker-push
|
|
138
138
|
recipe: docker-service
|
|
@@ -145,7 +145,7 @@ targets:
|
|
|
145
145
|
### L3 Example — Custom Recipe
|
|
146
146
|
|
|
147
147
|
```yaml
|
|
148
|
-
# .
|
|
148
|
+
# .uds/recipes/electron-app.yaml
|
|
149
149
|
name: electron-app
|
|
150
150
|
description: Build Electron desktop application
|
|
151
151
|
requires:
|
|
@@ -161,7 +161,7 @@ config:
|
|
|
161
161
|
### L4 Example — Escape Hatch
|
|
162
162
|
|
|
163
163
|
```yaml
|
|
164
|
-
# .
|
|
164
|
+
# .uds/packaging.yaml
|
|
165
165
|
targets:
|
|
166
166
|
- name: legacy-bundle
|
|
167
167
|
script: |
|
|
@@ -179,9 +179,9 @@ A packaging run is considered **successful** when ALL of the following condition
|
|
|
179
179
|
|-----------|-----------|-------|
|
|
180
180
|
| All `requires` files exist | 100% | Checked before any step runs |
|
|
181
181
|
| All steps exit with code 0 | 100% | Any non-zero exit fails the run |
|
|
182
|
-
| `postBuild` artifact exists | Present in expected path | Verified by
|
|
182
|
+
| `postBuild` artifact exists | Present in expected path | Verified by the adoption-layer runtime after build step |
|
|
183
183
|
| Hook commands exit with code 0 | 100% | Hook failure propagates as step failure |
|
|
184
|
-
| Published artifact is retrievable | HTTP 200 / registry query succeeds | Verified by
|
|
184
|
+
| Published artifact is retrievable | HTTP 200 / registry query succeeds | Verified by the adoption-layer runtime post-publish smoke check |
|
|
185
185
|
|
|
186
186
|
### Failure Handling
|
|
187
187
|
|
|
@@ -323,6 +323,70 @@ Analogous to the SRE Error Budget concept, a Performance Budget defines the tole
|
|
|
323
323
|
|
|
324
324
|
---
|
|
325
325
|
|
|
326
|
+
## Per-Release Capacity Sign-off
|
|
327
|
+
|
|
328
|
+
This section defines the **capacity gate** that must be satisfied before production release (Dimension 10 in `release-readiness-gate.md`, Tier-3).
|
|
329
|
+
|
|
330
|
+
### Capacity Forecast
|
|
331
|
+
|
|
332
|
+
Before each release candidate, produce a capacity forecast based on:
|
|
333
|
+
|
|
334
|
+
1. **Baseline**: 90-day rolling average of peak TPS and resource utilization (CPU, memory, DB connections, storage growth rate)
|
|
335
|
+
2. **Release impact estimate**: expected traffic delta from new features (e.g., +15% TPS from new notification flow)
|
|
336
|
+
3. **Seasonal adjustment**: any known traffic spikes within the next 30 days (marketing campaigns, seasonal peaks)
|
|
337
|
+
|
|
338
|
+
### Headroom Thresholds
|
|
339
|
+
|
|
340
|
+
| Metric | Target (PASS) | Warn Band | Fail Threshold |
|
|
341
|
+
|--------|--------------|-----------|----------------|
|
|
342
|
+
| CPU headroom at projected peak | ≥ 30% | 20–30% | < 20% |
|
|
343
|
+
| Memory headroom | ≥ 25% | 15–25% | < 15% |
|
|
344
|
+
| DB connection pool headroom | ≥ 40% | 25–40% | < 25% |
|
|
345
|
+
| p99 latency vs baseline | ≤ +5% | +5% to +10% | > +10% regression |
|
|
346
|
+
| Error rate at peak load | < 0.1% | 0.1–0.5% | > 0.5% |
|
|
347
|
+
|
|
348
|
+
### Load Test Requirement
|
|
349
|
+
|
|
350
|
+
Run the load test scenario defined in the Performance Testing sections above (Soak + Spike test minimum) before finalizing the capacity sign-off:
|
|
351
|
+
|
|
352
|
+
```bash
|
|
353
|
+
# Example: k6 capacity verification run
|
|
354
|
+
k6 run --vus 500 --duration 20m scripts/perf/soak-test.js
|
|
355
|
+
# Pass criterion: headroom metrics above, p99 within budget
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Sign-off Evidence
|
|
359
|
+
|
|
360
|
+
The capacity gate requires **two named sign-offs** — both Engineering Lead and SRE Lead:
|
|
361
|
+
|
|
362
|
+
```markdown
|
|
363
|
+
## Capacity Sign-off — <version>
|
|
364
|
+
|
|
365
|
+
**Projection date**: YYYY-MM-DD
|
|
366
|
+
**Baseline period**: last 90 days
|
|
367
|
+
|
|
368
|
+
| Metric | Baseline peak | Projected peak | Headroom | Status |
|
|
369
|
+
|--------|-------------|---------------|----------|--------|
|
|
370
|
+
| CPU | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
|
|
371
|
+
| Memory | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
|
|
372
|
+
| DB pool | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
|
|
373
|
+
| p99 latency | [X]ms | [Y]ms | [±Z]% | PASS/WARN/FAIL |
|
|
374
|
+
|
|
375
|
+
**Load test artifact**: [link to load test report]
|
|
376
|
+
|
|
377
|
+
**Eng Lead sign-off**: _______________ Date: __________
|
|
378
|
+
**SRE Lead sign-off**: _______________ Date: __________
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
### When Tier-3 Applies as N/A
|
|
382
|
+
|
|
383
|
+
The capacity sign-off is `N/A` (with documented rationale) when:
|
|
384
|
+
- Project has < 100 DAU and no significant traffic growth expected
|
|
385
|
+
- Internal tooling with fixed user count
|
|
386
|
+
- Static content / documentation site
|
|
387
|
+
|
|
388
|
+
---
|
|
389
|
+
|
|
326
390
|
## Related Standards
|
|
327
391
|
|
|
328
392
|
- [Testing Standards](testing-standards.md) - Performance testing integration
|
|
@@ -330,6 +394,7 @@ Analogous to the SRE Error Budget concept, a Performance Budget defines the tole
|
|
|
330
394
|
- [Logging Standards](logging-standards.md) - Performance logging
|
|
331
395
|
- [Code Review Checklist](code-review-checklist.md) - Performance review
|
|
332
396
|
- [Deployment Standards](deployment-standards.md) - Performance validation pre-deployment
|
|
397
|
+
- [Release Readiness Gate](release-readiness-gate.md) - Dimension 1 (load) and Dimension 10 (capacity)
|
|
333
398
|
|
|
334
399
|
---
|
|
335
400
|
|
|
@@ -25,20 +25,20 @@ Policy as Code 的特殊風險:
|
|
|
25
25
|
```rego
|
|
26
26
|
# 檔案命名:<policy_module>_test.rego
|
|
27
27
|
# Package:<policy_package>_test
|
|
28
|
-
package
|
|
28
|
+
package governance.guardian.forbidden_patterns_test
|
|
29
29
|
|
|
30
30
|
import future.keywords.if
|
|
31
31
|
|
|
32
32
|
# 正向測試:規則應觸發(assert rule fires)
|
|
33
33
|
test_drop_database_is_forbidden if {
|
|
34
|
-
data.
|
|
34
|
+
data.governance.guardian.forbidden_patterns.has_forbidden_pattern with input as {
|
|
35
35
|
"plan": [{"command_type": "sql", "command": "DROP DATABASE prod_main", "reversible": false}]
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
# 負向測試:規則不應觸發(assert rule does NOT fire)
|
|
40
40
|
test_safe_select_is_not_forbidden if {
|
|
41
|
-
not data.
|
|
41
|
+
not data.governance.guardian.forbidden_patterns.has_forbidden_pattern with input as {
|
|
42
42
|
"plan": [{"command_type": "sql", "command": "SELECT * FROM users LIMIT 10", "reversible": true}]
|
|
43
43
|
}
|
|
44
44
|
}
|
|
@@ -79,9 +79,9 @@ docker run --rm \
|
|
|
79
79
|
default allow = false
|
|
80
80
|
|
|
81
81
|
allow if {
|
|
82
|
-
not data.
|
|
83
|
-
not data.
|
|
84
|
-
not data.
|
|
82
|
+
not data.governance.guardian.forbidden_patterns.has_forbidden_pattern
|
|
83
|
+
not data.governance.guardian.env_policy.prod_violation
|
|
84
|
+
not data.governance.guardian.logic_constraints.has_logic_violation
|
|
85
85
|
}
|
|
86
86
|
```
|
|
87
87
|
|
|
@@ -93,9 +93,9 @@ OPA ≥ 0.40 的型別系統嚴格區分 array 和 set。`violations` partial ru
|
|
|
93
93
|
|
|
94
94
|
```rego
|
|
95
95
|
# ✅ 正確:partial set rule 集合 violations
|
|
96
|
-
deny_reasons[r] if { r := data.
|
|
97
|
-
deny_reasons[r] if { r := data.
|
|
98
|
-
deny_reasons[r] if { r := data.
|
|
96
|
+
deny_reasons[r] if { r := data.governance.guardian.forbidden_patterns.violations[_] }
|
|
97
|
+
deny_reasons[r] if { r := data.governance.guardian.env_policy.violations[_] }
|
|
98
|
+
deny_reasons[r] if { r := data.governance.guardian.logic_constraints.violations[_] }
|
|
99
99
|
|
|
100
100
|
# ❌ 錯誤:array.concat 用在 set 上 → rego_type_error
|
|
101
101
|
# deny_reasons := array.concat(violations1, violations2)
|
|
@@ -56,8 +56,8 @@ escalation: # required
|
|
|
56
56
|
|
|
57
57
|
## Applicable Scenarios
|
|
58
58
|
|
|
59
|
-
-
|
|
60
|
-
-
|
|
59
|
+
- Orchestrator (adoption layer) selects recovery strategy before fix loop
|
|
60
|
+
- Pipeline Runner (adoption layer) handles `agent:error` with registry lookup
|
|
61
61
|
- Custom `recovery-recipes.yaml` for project-level recipe override
|
|
62
62
|
- Telemetry tracking recovery strategy effectiveness
|
|
63
63
|
|