universal-dev-standards 5.4.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/ai/options/testing/integration-testing.ai.yaml +2 -2
- package/bundled/ai/options/testing/unit-testing.ai.yaml +2 -2
- package/bundled/ai/standards/adversarial-test.ai.yaml +277 -0
- package/bundled/ai/standards/audit-trail.ai.yaml +113 -0
- package/bundled/ai/standards/browser-compatibility-standards.ai.yaml +63 -0
- package/bundled/ai/standards/chaos-injection-tests.ai.yaml +91 -0
- package/bundled/ai/standards/container-image-standards.ai.yaml +88 -0
- package/bundled/ai/standards/container-security.ai.yaml +331 -0
- package/bundled/ai/standards/contract-testing-standards.ai.yaml +62 -0
- package/bundled/ai/standards/cost-budget-test.ai.yaml +96 -0
- package/bundled/ai/standards/cross-flow-regression.ai.yaml +61 -0
- package/bundled/ai/standards/data-contract.ai.yaml +110 -0
- package/bundled/ai/standards/data-migration-testing.ai.yaml +96 -0
- package/bundled/ai/standards/data-pipeline.ai.yaml +113 -0
- package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +89 -0
- package/bundled/ai/standards/flaky-test-management.ai.yaml +89 -0
- package/bundled/ai/standards/flow-based-testing.ai.yaml +240 -0
- package/bundled/ai/standards/full-coverage-testing.ai.yaml +192 -0
- package/bundled/ai/standards/iac-design-principles.ai.yaml +83 -0
- package/bundled/ai/standards/incident-response.ai.yaml +107 -0
- package/bundled/ai/standards/license-compliance.ai.yaml +106 -0
- package/bundled/ai/standards/llm-output-validation.ai.yaml +269 -0
- package/bundled/ai/standards/mock-boundary.ai.yaml +250 -0
- package/bundled/ai/standards/mutation-testing.ai.yaml +192 -0
- package/bundled/ai/standards/pii-classification.ai.yaml +109 -0
- package/bundled/ai/standards/policy-as-code-testing.ai.yaml +227 -0
- package/bundled/ai/standards/prd-standards.ai.yaml +88 -0
- package/bundled/ai/standards/product-metrics-standards.ai.yaml +111 -0
- package/bundled/ai/standards/prompt-regression.ai.yaml +94 -0
- package/bundled/ai/standards/property-based-testing.ai.yaml +105 -0
- package/bundled/ai/standards/release-quality-manifest.ai.yaml +135 -0
- package/bundled/ai/standards/release-readiness-gate.ai.yaml +77 -0
- package/bundled/ai/standards/replay-test.ai.yaml +111 -0
- package/bundled/ai/standards/runbook.ai.yaml +104 -0
- package/bundled/ai/standards/sast-advanced.ai.yaml +135 -0
- package/bundled/ai/standards/schema-evolution.ai.yaml +111 -0
- package/bundled/ai/standards/secret-management-standards.ai.yaml +105 -0
- package/bundled/ai/standards/secure-op.ai.yaml +365 -0
- package/bundled/ai/standards/security-testing.ai.yaml +171 -0
- package/bundled/ai/standards/server-ops-security.ai.yaml +274 -0
- package/bundled/ai/standards/slo-sli.ai.yaml +97 -0
- package/bundled/ai/standards/smoke-test.ai.yaml +87 -0
- package/bundled/ai/standards/supply-chain-attestation.ai.yaml +109 -0
- package/bundled/ai/standards/test-completeness-dimensions.ai.yaml +52 -5
- package/bundled/ai/standards/testing.ai.yaml +20 -13
- package/bundled/ai/standards/user-story-mapping.ai.yaml +108 -0
- package/bundled/core/accessibility-standards.md +58 -0
- package/bundled/core/adversarial-test.md +212 -0
- package/bundled/core/branch-completion.md +4 -0
- package/bundled/core/browser-compatibility-standards.md +220 -0
- package/bundled/core/chaos-injection-tests.md +116 -0
- package/bundled/core/checkin-standards.md +1 -0
- package/bundled/core/container-security.md +521 -0
- package/bundled/core/contract-testing-standards.md +182 -0
- package/bundled/core/cost-budget-test.md +69 -0
- package/bundled/core/cross-flow-regression.md +190 -0
- package/bundled/core/data-migration-testing.md +110 -0
- package/bundled/core/disaster-recovery-drill.md +73 -0
- package/bundled/core/flaky-test-management.md +73 -0
- package/bundled/core/flow-based-testing.md +275 -0
- package/bundled/core/full-coverage-testing.md +183 -0
- package/bundled/core/llm-output-validation.md +178 -0
- package/bundled/core/mock-boundary.md +100 -0
- package/bundled/core/mutation-testing.md +97 -0
- package/bundled/core/performance-standards.md +65 -0
- package/bundled/core/policy-as-code-testing.md +188 -0
- package/bundled/core/prompt-regression.md +72 -0
- package/bundled/core/property-based-testing.md +73 -0
- package/bundled/core/release-quality-manifest.md +193 -0
- package/bundled/core/release-readiness-gate.md +184 -0
- package/bundled/core/replay-test.md +86 -0
- package/bundled/core/sast-advanced.md +300 -0
- package/bundled/core/secure-op.md +314 -0
- package/bundled/core/security-testing.md +87 -0
- package/bundled/core/server-ops-security.md +493 -0
- package/bundled/core/smoke-test.md +65 -0
- package/bundled/core/supply-chain-attestation.md +117 -0
- package/bundled/locales/zh-CN/CHANGELOG.md +3 -3
- package/bundled/locales/zh-CN/README.md +1 -1
- package/bundled/locales/zh-CN/skills/ai-instruction-standards/SKILL.md +5 -5
- package/bundled/locales/zh-TW/CHANGELOG.md +3 -3
- package/bundled/locales/zh-TW/README.md +1 -1
- package/bundled/locales/zh-TW/core/browser-compatibility-standards.md +11 -0
- package/bundled/locales/zh-TW/core/contract-testing-standards.md +11 -0
- package/bundled/locales/zh-TW/core/cross-flow-regression.md +11 -0
- package/bundled/locales/zh-TW/core/release-readiness-gate.md +11 -0
- package/bundled/locales/zh-TW/skills/ai-instruction-standards/SKILL.md +183 -79
- package/bundled/skills/README.md +4 -3
- package/bundled/skills/SKILL_NAMING.md +94 -0
- package/bundled/skills/ai-instruction-standards/SKILL.md +181 -88
- package/bundled/skills/atdd-assistant/SKILL.md +8 -0
- package/bundled/skills/bdd-assistant/SKILL.md +7 -0
- package/bundled/skills/checkin-assistant/SKILL.md +8 -0
- package/bundled/skills/code-review-assistant/SKILL.md +7 -0
- package/bundled/skills/journey-test-assistant/SKILL.md +203 -0
- package/bundled/skills/orchestrate/SKILL.md +167 -0
- package/bundled/skills/plan/SKILL.md +234 -0
- package/bundled/skills/pr-automation-assistant/SKILL.md +8 -0
- package/bundled/skills/push/SKILL.md +49 -2
- package/bundled/skills/{process-automation → skill-builder}/SKILL.md +1 -1
- package/bundled/skills/{forward-derivation → spec-derivation}/SKILL.md +1 -1
- package/bundled/skills/spec-driven-dev/SKILL.md +7 -0
- package/bundled/skills/sweep/SKILL.md +145 -0
- package/bundled/skills/tdd-assistant/SKILL.md +7 -0
- package/package.json +6 -6
- package/src/commands/check.js +43 -0
- package/src/commands/flow.js +8 -0
- package/src/commands/init.js +2 -1
- package/src/commands/start.js +14 -0
- package/src/commands/sweep.js +8 -0
- package/src/commands/update.js +10 -0
- package/src/commands/workflow.js +8 -0
- package/standards-registry.json +483 -5
- package/bundled/locales/zh-CN/skills/ac-coverage-assistant/SKILL.md +0 -190
- package/bundled/locales/zh-CN/skills/forward-derivation/SKILL.md +0 -71
- package/bundled/locales/zh-CN/skills/forward-derivation/guide.md +0 -130
- package/bundled/locales/zh-CN/skills/methodology-system/SKILL.md +0 -88
- package/bundled/locales/zh-CN/skills/methodology-system/create-methodology.md +0 -350
- package/bundled/locales/zh-CN/skills/methodology-system/guide.md +0 -131
- package/bundled/locales/zh-CN/skills/methodology-system/runtime.md +0 -279
- package/bundled/locales/zh-CN/skills/process-automation/SKILL.md +0 -143
- package/bundled/locales/zh-TW/skills/ac-coverage-assistant/SKILL.md +0 -195
- package/bundled/locales/zh-TW/skills/deploy-assistant/SKILL.md +0 -178
- package/bundled/locales/zh-TW/skills/forward-derivation/SKILL.md +0 -69
- package/bundled/locales/zh-TW/skills/forward-derivation/guide.md +0 -415
- package/bundled/locales/zh-TW/skills/methodology-system/SKILL.md +0 -86
- package/bundled/locales/zh-TW/skills/methodology-system/create-methodology.md +0 -350
- package/bundled/locales/zh-TW/skills/methodology-system/guide.md +0 -131
- package/bundled/locales/zh-TW/skills/methodology-system/runtime.md +0 -279
- package/bundled/locales/zh-TW/skills/process-automation/SKILL.md +0 -144
- /package/bundled/skills/{ac-coverage-assistant → ac-coverage}/SKILL.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/SKILL.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/create-methodology.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/guide.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/integrated-flow.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/prerequisite-check.md +0 -0
- /package/bundled/skills/{methodology-system → dev-methodology}/runtime.md +0 -0
- /package/bundled/skills/{forward-derivation → spec-derivation}/guide.md +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Cost Budget Test Standards
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
AI agent systems that call LLM APIs can accumulate costs rapidly if a pipeline runs in a runaway loop, encounters an unexpected token explosion, or has misconfigured budget thresholds. Cost budget tests verify that the zone classifier, threshold constants, and pipeline budget guards behave correctly at every boundary.
|
|
6
|
+
|
|
7
|
+
## Zone Classification
|
|
8
|
+
|
|
9
|
+
Most AI agent token budget systems divide usage ratios into zones. The boundaries between zones are the highest-risk points for off-by-one errors.
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
// Vitest boundary tests using TOKEN_BUDGET constants (not magic numbers)
|
|
13
|
+
import { classifyTokenZone, TOKEN_BUDGET } from "../types/index.js"
|
|
14
|
+
import { describe, it, expect } from "vitest"
|
|
15
|
+
|
|
16
|
+
describe("TokenBudgetZone classification boundaries", () => {
|
|
17
|
+
it.each([
|
|
18
|
+
[0.0, "safe", "zero usage"],
|
|
19
|
+
[TOKEN_BUDGET.WARNING_THRESHOLD - 0.01, "safe", "just below WARNING"],
|
|
20
|
+
[TOKEN_BUDGET.WARNING_THRESHOLD, "warning", "exactly at WARNING"],
|
|
21
|
+
[TOKEN_BUDGET.DANGER_THRESHOLD - 0.01, "warning", "just below DANGER"],
|
|
22
|
+
[TOKEN_BUDGET.DANGER_THRESHOLD, "danger", "exactly at DANGER"],
|
|
23
|
+
[TOKEN_BUDGET.BLOCKING_THRESHOLD - 0.01,"danger", "just below BLOCKING"],
|
|
24
|
+
[TOKEN_BUDGET.BLOCKING_THRESHOLD, "blocking", "exactly at BLOCKING"],
|
|
25
|
+
[1.0, "blocking", "fully exhausted"],
|
|
26
|
+
])("ratio=%f → %s (%s)", (ratio, expected) => {
|
|
27
|
+
expect(classifyTokenZone(ratio)).toBe(expected)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it("returns 'blocking' for ratio > 1.0 (over-budget)", () => {
|
|
31
|
+
expect(classifyTokenZone(1.5)).toBe("blocking")
|
|
32
|
+
})
|
|
33
|
+
})
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Pipeline Budget Config Tests
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import type { PipelineBudgetConfig } from "../types/index.js"
|
|
40
|
+
|
|
41
|
+
describe("PipelineBudgetConfig semantics", () => {
|
|
42
|
+
it("warningThreshold defaults should be 0-1 range", () => {
|
|
43
|
+
const config: PipelineBudgetConfig = {
|
|
44
|
+
maxCostPerRun: 1.0,
|
|
45
|
+
maxCostPerDay: 10.0,
|
|
46
|
+
warningThreshold: 0.8,
|
|
47
|
+
autoDowngrade: true,
|
|
48
|
+
}
|
|
49
|
+
expect(config.warningThreshold).toBeGreaterThan(0)
|
|
50
|
+
expect(config.warningThreshold).toBeLessThan(1)
|
|
51
|
+
})
|
|
52
|
+
})
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## What to Test
|
|
56
|
+
|
|
57
|
+
| Test Category | Why |
|
|
58
|
+
|---------------|-----|
|
|
59
|
+
| Exact boundary values (WARNING/DANGER/BLOCKING) | Off-by-one errors hide here |
|
|
60
|
+
| Below each boundary | Confirm zone below is correct |
|
|
61
|
+
| Zero usage ratio | Clean state |
|
|
62
|
+
| Ratio > 1.0 | Over-budget should still block |
|
|
63
|
+
| All TOKEN_BUDGET constants referenced | Mutation survival prevention |
|
|
64
|
+
|
|
65
|
+
## Related Standards
|
|
66
|
+
|
|
67
|
+
- [Mutation Testing Standards](mutation-testing.md) — constants without test coverage survive mutations
|
|
68
|
+
- [Testing Standards](testing.md) — overall test pyramid
|
|
69
|
+
- [LLM Output Validation](llm-output-validation.md) — output-layer budget constraints
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Cross-Flow Regression
|
|
2
|
+
|
|
3
|
+
> **Language**: English | [繁體中文](../locales/zh-TW/core/cross-flow-regression.md)
|
|
4
|
+
|
|
5
|
+
**Version**: 1.0.0
|
|
6
|
+
**Last Updated**: 2026-05-05
|
|
7
|
+
**Applicability**: All software projects with multiple user flows or business processes
|
|
8
|
+
**Scope**: universal
|
|
9
|
+
**Industry Standards**: ISTQB Advanced Test Analyst (Regression Test Strategy)
|
|
10
|
+
**References**: `core/flow-based-testing.md`, `core/testing-standards.md`
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Purpose
|
|
15
|
+
|
|
16
|
+
This standard defines cross-flow regression testing — verifying that changes to one flow do not break other flows, and that **combinations of flows** behave correctly when executed in sequence.
|
|
17
|
+
|
|
18
|
+
### Boundary with `flow-based-testing.md`
|
|
19
|
+
|
|
20
|
+
| Standard | Scope | What It Catches |
|
|
21
|
+
|----------|-------|----------------|
|
|
22
|
+
| `flow-based-testing.md` (Multi-Gate Model) | Single flow: Decision Points, Terminal States, Decision Table | Intra-flow branch coverage gaps |
|
|
23
|
+
| **This standard** | Multiple flows: interaction, shared state, sequential composition | Inter-flow contamination, accumulated-state bugs, regression across flows |
|
|
24
|
+
|
|
25
|
+
These are complementary, not overlapping. A project needs both.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Why Cross-Flow Bugs Are Distinct
|
|
30
|
+
|
|
31
|
+
Intra-flow testing (Multi-Gate) proves that "Login" handles all 7 terminal states. But it cannot detect:
|
|
32
|
+
|
|
33
|
+
- **State contamination**: after a failed "Create Order" (FAIL_QUOTA_EXCEEDED), the quota counter is corrupted → next "Create Order" attempt fails even after quota resets
|
|
34
|
+
- **Shared resource conflicts**: "Report Generation" and "Data Export" running concurrently corrupt a shared temp directory
|
|
35
|
+
- **Sequential dependency**: "Cancel Subscription" succeeds, but the subsequent "Reactivate" flow assumes subscription still exists → NullPointerException
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Cross-Flow Test Suite Definition
|
|
40
|
+
|
|
41
|
+
### Tier-1: Critical User Journeys (CUJ)
|
|
42
|
+
|
|
43
|
+
Critical User Journeys are end-to-end sequences spanning ≥ 2 flows that represent core business value paths. Every release must include a CUJ regression suite.
|
|
44
|
+
|
|
45
|
+
**CUJ identification**:
|
|
46
|
+
1. List all flows (from requirement-template §2.4)
|
|
47
|
+
2. Identify pairs/triples that share state or are commonly executed in sequence
|
|
48
|
+
3. Tag business-critical combinations (purchase, onboarding, authentication + downstream)
|
|
49
|
+
|
|
50
|
+
**CUJ Coverage Requirement**:
|
|
51
|
+
|
|
52
|
+
| Metric | Tier-2 Threshold (default) | Tier-1 Critical Path |
|
|
53
|
+
|--------|--------------------------|---------------------|
|
|
54
|
+
| CUJ pass rate | ≥ 95% | 100% |
|
|
55
|
+
| Business-critical flow combos | 100% | 100% |
|
|
56
|
+
|
|
57
|
+
### Tier-2: Regression on Flow Change
|
|
58
|
+
|
|
59
|
+
When any flow's §2.4 (Decision Points, Terminal States) is modified, the full CUJ suite must re-run — not just the tests for the changed flow. The triggering logic:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# In CI: detect flow spec changes
|
|
63
|
+
changed_flows=$(git diff origin/main... --name-only | grep -E "requirement-template|SPEC.*\.md")
|
|
64
|
+
if [ -n "$changed_flows" ]; then
|
|
65
|
+
npm run test:cross-flow-regression
|
|
66
|
+
fi
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Tier-3: Concurrency and Shared Resource Tests
|
|
70
|
+
|
|
71
|
+
For projects with concurrent user operations:
|
|
72
|
+
- Two users executing the same flow simultaneously
|
|
73
|
+
- Flow A and Flow B sharing a write resource
|
|
74
|
+
- Long-running Flow (async) interacting with a short Flow result
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Test Structure
|
|
79
|
+
|
|
80
|
+
Cross-flow regression tests use **sequential state threading** across flows (extending the `ctx` pattern from `flow-based-testing.md`):
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
describe("CUJ: Register → Verify Email → Create First Order", () => {
|
|
84
|
+
const ctx: {
|
|
85
|
+
userId?: string
|
|
86
|
+
token?: string
|
|
87
|
+
orderId?: string
|
|
88
|
+
} = {}
|
|
89
|
+
|
|
90
|
+
// Flow 1: Register
|
|
91
|
+
it("Flow-1 Step 1: Register new user", async () => {
|
|
92
|
+
ctx.userId = await registerUser({ email: testEmail, plan: "trial" })
|
|
93
|
+
expect(ctx.userId).toBeDefined()
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
// Flow 2: Email Verification (depends on Flow 1 output)
|
|
97
|
+
it("Flow-2 Step 1: Verify email token", async () => {
|
|
98
|
+
const token = await getEmailToken(ctx.userId!)
|
|
99
|
+
ctx.token = await verifyEmail(token)
|
|
100
|
+
expect(ctx.token).toBeDefined()
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
// Flow 3: Create Order (depends on Flow 2 auth token)
|
|
104
|
+
it("Flow-3 Step 1: Create first order", async () => {
|
|
105
|
+
ctx.orderId = await createOrder(ctx.token!, orderPayload)
|
|
106
|
+
expect(ctx.orderId).toMatch(/^ord-/)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
// Cross-flow verification: order state reflects trial plan limits
|
|
110
|
+
it("Cross-flow: Trial plan quota enforced on first order", async () => {
|
|
111
|
+
const order = await getOrder(ctx.token!, ctx.orderId!)
|
|
112
|
+
expect(order.quota_applied).toBe("trial")
|
|
113
|
+
})
|
|
114
|
+
})
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Cross-Flow Failure Isolation
|
|
118
|
+
|
|
119
|
+
When a cross-flow test fails, the failure message must identify **which flow** introduced the state corruption:
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
// BAD: generic assertion
|
|
123
|
+
expect(result).toBe("success")
|
|
124
|
+
|
|
125
|
+
// GOOD: includes flow context
|
|
126
|
+
expect(result).toBe("success") // Flow-3 depends on Flow-2 token being valid
|
|
127
|
+
// If this fails, check Flow-2 email verification output
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Release Gate Integration
|
|
133
|
+
|
|
134
|
+
Cross-flow regression is **Dimension 6** in `release-readiness-gate.md` (Tier-2).
|
|
135
|
+
|
|
136
|
+
### Trigger Conditions
|
|
137
|
+
|
|
138
|
+
| Trigger | Scope |
|
|
139
|
+
|---------|-------|
|
|
140
|
+
| Every release candidate | Full CUJ suite |
|
|
141
|
+
| PR modifying any flow §2.4 | Full CUJ suite (pre-merge) |
|
|
142
|
+
| Post-deploy to staging | Smoke subset of CUJ |
|
|
143
|
+
| Post-deploy to production | Critical path CUJ only (canary) |
|
|
144
|
+
|
|
145
|
+
### Evidence for Sign-off
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
| 6 | Cross-flow Regression | PASS | CUJ suite: 47/47 passed; 0 flow-interaction failures | QA Lead |
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### WARN Threshold
|
|
152
|
+
|
|
153
|
+
- ≥ 95% CUJ pass rate but < 100% → WARN with specific failed CUJ documented and root-caused
|
|
154
|
+
- < 95% CUJ pass rate → FAIL (release blocked)
|
|
155
|
+
- Business-critical combo fails → FAIL regardless of overall rate
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Anti-Patterns
|
|
160
|
+
|
|
161
|
+
- **Running cross-flow tests only when CI is slow** — they must run on every release candidate by definition
|
|
162
|
+
- **Testing each flow in isolation and calling it "regression"** — flow isolation is covered by Multi-Gate; cross-flow must have inter-flow state dependencies
|
|
163
|
+
- **Reusing the same `ctx` object across unrelated `describe` blocks** — each CUJ needs a clean, isolated `ctx`; contamination between CUJs masks bugs
|
|
164
|
+
- **No flow attribution in failure messages** — cross-flow failures are hard to debug; always indicate which upstream flow produced the corrupted state
|
|
165
|
+
- **Treating CUJ failures as flaky** — cross-flow state bugs are deterministic; "flaky" cross-flow tests are almost always a symptom of shared state corruption
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Relationship to Other Standards
|
|
170
|
+
|
|
171
|
+
- **`flow-based-testing.md`** — intra-flow gate (prerequisite for cross-flow)
|
|
172
|
+
- **`testing-standards.md`** — regression layer in the testing pyramid
|
|
173
|
+
- **`release-readiness-gate.md`** — Dimension 6 (Tier-2)
|
|
174
|
+
- **`e2e-testing.md`** — CUJ tests typically run at E2E or System Test level
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Version History
|
|
179
|
+
|
|
180
|
+
| Version | Date | Changes |
|
|
181
|
+
|---------|------|---------|
|
|
182
|
+
| 1.0.0 | 2026-05-05 | Initial release: CUJ definition, sequential state threading, release gate criteria, Tier-1/2/3 classification |
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## License
|
|
187
|
+
|
|
188
|
+
This standard is released under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/).
|
|
189
|
+
|
|
190
|
+
**Source**: [universal-dev-standards](https://github.com/AsiaOstrich/universal-dev-standards)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Data Migration Testing
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Database schema migrations are high-risk operations: they transform persistent data in ways that cannot be easily undone without a tested rollback path. A comprehensive migration test suite validates three axes — correctness (up applies cleanly), safety (down restores state), and robustness (applying twice is harmless).
|
|
6
|
+
|
|
7
|
+
## Requirements Summary
|
|
8
|
+
|
|
9
|
+
| ID | Rule | Rationale |
|
|
10
|
+
|----|------|-----------|
|
|
11
|
+
| REQ-DMT-001 | Every migration must have an up test | Unverified migrations corrupt production schema |
|
|
12
|
+
| REQ-DMT-002 | Every migration with a down function must have a rollback test | Untested rollbacks fail during incidents |
|
|
13
|
+
| REQ-DMT-003 | Applying the same migration twice must not fail | CI retries can trigger double-apply |
|
|
14
|
+
| REQ-DMT-004 | Migrations that alter data must include a data preservation test | Schema correctness ≠ data correctness |
|
|
15
|
+
| REQ-DMT-005 | Each test must use an isolated database | Shared state causes non-deterministic failures |
|
|
16
|
+
|
|
17
|
+
## Test Structure
|
|
18
|
+
|
|
19
|
+
### Isolation
|
|
20
|
+
|
|
21
|
+
Every migration test runs against an isolated database — either in-memory (SQLite `:memory:`) or a fresh Docker container (PostgreSQL). Never run migration tests against a shared development or staging database.
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
// Good: isolated in-memory database per test file
|
|
25
|
+
const db = new Database(':memory:')
|
|
26
|
+
await applyBaseline(db)
|
|
27
|
+
|
|
28
|
+
// Bad: tests share a dev database
|
|
29
|
+
const db = openDatabase(process.env.DATABASE_URL)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Up Test
|
|
33
|
+
|
|
34
|
+
Apply the migration to a baseline schema. Assert the expected post-state.
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
it('adds email column to users table', async () => {
|
|
38
|
+
await migrate.up(db)
|
|
39
|
+
const columns = db.prepare("PRAGMA table_info(users)").all()
|
|
40
|
+
expect(columns.map(c => c.name)).toContain('email')
|
|
41
|
+
})
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Down Test (Rollback)
|
|
45
|
+
|
|
46
|
+
Apply up, then down. Assert the schema returns to its pre-migration state.
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
it('rollback removes email column', async () => {
|
|
50
|
+
await migrate.up(db)
|
|
51
|
+
await migrate.down(db)
|
|
52
|
+
const columns = db.prepare("PRAGMA table_info(users)").all()
|
|
53
|
+
expect(columns.map(c => c.name)).not.toContain('email')
|
|
54
|
+
})
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Idempotency Test
|
|
58
|
+
|
|
59
|
+
Apply the migration twice. The second apply must not throw.
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
it('applying migration twice is safe', async () => {
|
|
63
|
+
await migrate.up(db)
|
|
64
|
+
await expect(migrate.up(db)).resolves.not.toThrow()
|
|
65
|
+
})
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Data Preservation Test
|
|
69
|
+
|
|
70
|
+
Seed rows before the migration. Assert data integrity after.
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
it('preserves existing user rows', async () => {
|
|
74
|
+
db.prepare("INSERT INTO users (id, name) VALUES (1, 'Alice')").run()
|
|
75
|
+
await migrate.up(db)
|
|
76
|
+
const user = db.prepare("SELECT * FROM users WHERE id = 1").get()
|
|
77
|
+
expect(user.name).toBe('Alice')
|
|
78
|
+
})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Tooling
|
|
82
|
+
|
|
83
|
+
### SQLite / Drizzle ORM
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
import Database from 'better-sqlite3'
|
|
87
|
+
import { drizzle } from 'drizzle-orm/better-sqlite3'
|
|
88
|
+
import { migrate } from 'drizzle-orm/better-sqlite3/migrator'
|
|
89
|
+
|
|
90
|
+
const sqlite = new Database(':memory:')
|
|
91
|
+
const db = drizzle(sqlite)
|
|
92
|
+
await migrate(db, { migrationsFolder: './drizzle' })
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### PostgreSQL
|
|
96
|
+
|
|
97
|
+
Use `testcontainers` to spin up a fresh PostgreSQL container per test suite. The container is destroyed after the suite, guaranteeing isolation.
|
|
98
|
+
|
|
99
|
+
## Anti-Patterns
|
|
100
|
+
|
|
101
|
+
- **Testing against a shared database** — causes cross-test pollution, non-repeatable builds
|
|
102
|
+
- **Skipping down migration tests** — rollbacks fail during production incidents
|
|
103
|
+
- **Writing migration tests after the fact without seeding data** — misses data preservation bugs entirely
|
|
104
|
+
- **Committing a migration without a corresponding test** — the migration is untested until production
|
|
105
|
+
|
|
106
|
+
## See Also
|
|
107
|
+
|
|
108
|
+
- `database-standards.ai.yaml` — schema design principles
|
|
109
|
+
- `testing.ai.yaml` — general test structure and pyramid
|
|
110
|
+
- `verification-evidence.ai.yaml` — audit evidence requirements
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Disaster Recovery Drill Standards
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
An untested DR plan is a false sense of security. Teams that have never executed their recovery runbook under pressure will discover gaps at the worst possible time. DR drills expose these gaps safely.
|
|
6
|
+
|
|
7
|
+
## RTO/RPO Targets
|
|
8
|
+
|
|
9
|
+
Define these before writing the runbook:
|
|
10
|
+
|
|
11
|
+
| Metric | Definition | VibeOps Commercial Target |
|
|
12
|
+
|--------|-----------|--------------------------|
|
|
13
|
+
| RTO (Recovery Time Objective) | Max acceptable downtime | < 1 hour |
|
|
14
|
+
| RPO (Recovery Point Objective) | Max acceptable data loss | < 24 hours (daily backup) |
|
|
15
|
+
|
|
16
|
+
## Backup Restore Script
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
#!/usr/bin/env bash
|
|
20
|
+
# scripts/backup-restore.sh — DR drill backup restore verification
|
|
21
|
+
set -euo pipefail
|
|
22
|
+
|
|
23
|
+
BACKUP_DIR="${BACKUP_DIR:-/var/backups/vibeops}"
|
|
24
|
+
RESTORE_DIR="${RESTORE_DIR:-/tmp/dr-restore}"
|
|
25
|
+
DB_FILE="${DB_FILE:-vibeops.db}"
|
|
26
|
+
|
|
27
|
+
echo "=== DR Drill: Backup Restore Verification ==="
|
|
28
|
+
echo "Source: ${BACKUP_DIR}/${DB_FILE}.backup"
|
|
29
|
+
echo "Target: ${RESTORE_DIR}/${DB_FILE}"
|
|
30
|
+
|
|
31
|
+
mkdir -p "$RESTORE_DIR"
|
|
32
|
+
|
|
33
|
+
# Find latest backup
|
|
34
|
+
LATEST=$(ls -t "${BACKUP_DIR}"/*.backup 2>/dev/null | head -1)
|
|
35
|
+
if [[ -z "$LATEST" ]]; then
|
|
36
|
+
echo "FAIL: No backup found in ${BACKUP_DIR}"
|
|
37
|
+
exit 1
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Restore
|
|
41
|
+
cp "$LATEST" "${RESTORE_DIR}/${DB_FILE}"
|
|
42
|
+
|
|
43
|
+
# Verify integrity (SQLite)
|
|
44
|
+
if command -v sqlite3 >/dev/null 2>&1; then
|
|
45
|
+
sqlite3 "${RESTORE_DIR}/${DB_FILE}" "PRAGMA integrity_check;" | grep -q "ok" && \
|
|
46
|
+
echo "OK: Database integrity check passed" || \
|
|
47
|
+
{ echo "FAIL: Integrity check failed"; exit 1; }
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
BACKUP_AGE=$(( ($(date +%s) - $(stat -c %Y "$LATEST" 2>/dev/null || stat -f %m "$LATEST")) / 3600 ))
|
|
51
|
+
echo "OK: Backup age: ${BACKUP_AGE} hours (RPO target: 24h)"
|
|
52
|
+
|
|
53
|
+
echo "=== PASS: Restore complete ==="
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Game Day Protocol
|
|
57
|
+
|
|
58
|
+
1. **Announce**: Notify team 1 week in advance, define scope
|
|
59
|
+
2. **Baseline**: Document current system state
|
|
60
|
+
3. **Inject**: Simulate failure (rename/delete DB, kill process, etc.)
|
|
61
|
+
4. **Execute**: Team follows runbook from scratch — no shortcuts
|
|
62
|
+
5. **Measure**: Record RTO, RPO, issues encountered
|
|
63
|
+
6. **Retrospective**: What was unclear? What was missing?
|
|
64
|
+
|
|
65
|
+
## Runbook Template
|
|
66
|
+
|
|
67
|
+
See `docs/DR-RUNBOOK.md` for the full runbook template.
|
|
68
|
+
|
|
69
|
+
## Related Standards
|
|
70
|
+
|
|
71
|
+
- [Deployment Standards](deployment-standards.md) — deployment pipeline
|
|
72
|
+
- [Chaos Engineering Standards](chaos-engineering-standards.md) — failure injection
|
|
73
|
+
- [Verification Evidence Standards](verification-evidence.md) — drill records
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Flaky Test Management Standards
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
A single flaky test in a 3000-test suite can erode CI confidence enough that developers start ignoring failures. Once developers learn to "just re-run CI", real bugs slip through. The cost of eliminating flaky tests is always lower than the cost of the false sense of security they create.
|
|
6
|
+
|
|
7
|
+
## Definition
|
|
8
|
+
|
|
9
|
+
A test is **flaky** if it produces different results (pass/fail) on consecutive runs with the same code. The 2% threshold: if a test fails ≥ 2% of runs on `main` without code changes, it is flaky.
|
|
10
|
+
|
|
11
|
+
## Detection
|
|
12
|
+
|
|
13
|
+
Most CI systems can detect flakiness automatically:
|
|
14
|
+
|
|
15
|
+
- **GitHub Actions**: Look for `Flaky tests detected` annotations
|
|
16
|
+
- **Manual**: Run `npx vitest run --reporter=verbose` 5 times, look for non-deterministic results
|
|
17
|
+
- **Vitest**: `vitest run --repeat=5` (runs each test 5 times)
|
|
18
|
+
|
|
19
|
+
## Quarantine Workflow
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
Detected → Quarantine (< 48h) → Track → Fix or Delete (< 30 days)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Quarantine Annotation
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
// TODO: quarantined 2026-05-05 — flaky race condition, see issue #42
|
|
29
|
+
it.skip("reconnects after WebSocket disconnect", async () => {
|
|
30
|
+
// ... test body preserved for reference
|
|
31
|
+
})
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Tracking Issue Template
|
|
35
|
+
|
|
36
|
+
```markdown
|
|
37
|
+
**Flaky Test**: `describe > test name`
|
|
38
|
+
**File**: `src/path/to/test.ts`
|
|
39
|
+
**Quarantined**: 2026-05-05
|
|
40
|
+
**Failure rate**: ~5% on main
|
|
41
|
+
**Known failure mode**: `Cannot read property 'socket' of undefined`
|
|
42
|
+
**Root cause hypothesis**: Race condition in WebSocket teardown
|
|
43
|
+
**Deadline**: 2026-06-05
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Common Root Causes
|
|
47
|
+
|
|
48
|
+
| Root Cause | Fix |
|
|
49
|
+
|-----------|-----|
|
|
50
|
+
| Race condition | Use `waitFor()`, `vi.waitFor()`, proper async coordination |
|
|
51
|
+
| Shared state | Reset state in `beforeEach`/`afterEach` |
|
|
52
|
+
| External service | Mock the dependency |
|
|
53
|
+
| File system ordering | Use deterministic sort |
|
|
54
|
+
| Random without seed | Set fixed seed in test |
|
|
55
|
+
| Timing-dependent | Fake timers (`vi.useFakeTimers()`) |
|
|
56
|
+
|
|
57
|
+
## Vitest Configuration
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
// vitest.config.ts
|
|
61
|
+
export default defineConfig({
|
|
62
|
+
test: {
|
|
63
|
+
retry: 2, // retry failed tests up to 2 times
|
|
64
|
+
testTimeout: 10000, // 10s timeout prevents infinite hangs
|
|
65
|
+
hookTimeout: 5000, // 5s hook timeout
|
|
66
|
+
}
|
|
67
|
+
})
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Related Standards
|
|
71
|
+
|
|
72
|
+
- [Testing Standards](testing.md) — overall test pyramid
|
|
73
|
+
- [Test Governance Standards](test-governance.md) — CI policies
|