universal-dev-standards 5.4.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/bundled/ai/options/testing/integration-testing.ai.yaml +2 -2
  2. package/bundled/ai/options/testing/unit-testing.ai.yaml +2 -2
  3. package/bundled/ai/standards/adversarial-test.ai.yaml +277 -0
  4. package/bundled/ai/standards/audit-trail.ai.yaml +113 -0
  5. package/bundled/ai/standards/browser-compatibility-standards.ai.yaml +63 -0
  6. package/bundled/ai/standards/chaos-injection-tests.ai.yaml +91 -0
  7. package/bundled/ai/standards/container-image-standards.ai.yaml +88 -0
  8. package/bundled/ai/standards/container-security.ai.yaml +331 -0
  9. package/bundled/ai/standards/contract-testing-standards.ai.yaml +62 -0
  10. package/bundled/ai/standards/cost-budget-test.ai.yaml +96 -0
  11. package/bundled/ai/standards/cross-flow-regression.ai.yaml +61 -0
  12. package/bundled/ai/standards/data-contract.ai.yaml +110 -0
  13. package/bundled/ai/standards/data-migration-testing.ai.yaml +96 -0
  14. package/bundled/ai/standards/data-pipeline.ai.yaml +113 -0
  15. package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +89 -0
  16. package/bundled/ai/standards/flaky-test-management.ai.yaml +89 -0
  17. package/bundled/ai/standards/flow-based-testing.ai.yaml +240 -0
  18. package/bundled/ai/standards/full-coverage-testing.ai.yaml +192 -0
  19. package/bundled/ai/standards/iac-design-principles.ai.yaml +83 -0
  20. package/bundled/ai/standards/incident-response.ai.yaml +107 -0
  21. package/bundled/ai/standards/license-compliance.ai.yaml +106 -0
  22. package/bundled/ai/standards/llm-output-validation.ai.yaml +269 -0
  23. package/bundled/ai/standards/mock-boundary.ai.yaml +250 -0
  24. package/bundled/ai/standards/mutation-testing.ai.yaml +192 -0
  25. package/bundled/ai/standards/pii-classification.ai.yaml +109 -0
  26. package/bundled/ai/standards/policy-as-code-testing.ai.yaml +227 -0
  27. package/bundled/ai/standards/prd-standards.ai.yaml +88 -0
  28. package/bundled/ai/standards/product-metrics-standards.ai.yaml +111 -0
  29. package/bundled/ai/standards/prompt-regression.ai.yaml +94 -0
  30. package/bundled/ai/standards/property-based-testing.ai.yaml +105 -0
  31. package/bundled/ai/standards/release-quality-manifest.ai.yaml +135 -0
  32. package/bundled/ai/standards/release-readiness-gate.ai.yaml +77 -0
  33. package/bundled/ai/standards/replay-test.ai.yaml +111 -0
  34. package/bundled/ai/standards/runbook.ai.yaml +104 -0
  35. package/bundled/ai/standards/sast-advanced.ai.yaml +135 -0
  36. package/bundled/ai/standards/schema-evolution.ai.yaml +111 -0
  37. package/bundled/ai/standards/secret-management-standards.ai.yaml +105 -0
  38. package/bundled/ai/standards/secure-op.ai.yaml +365 -0
  39. package/bundled/ai/standards/security-testing.ai.yaml +171 -0
  40. package/bundled/ai/standards/server-ops-security.ai.yaml +274 -0
  41. package/bundled/ai/standards/slo-sli.ai.yaml +97 -0
  42. package/bundled/ai/standards/smoke-test.ai.yaml +87 -0
  43. package/bundled/ai/standards/supply-chain-attestation.ai.yaml +109 -0
  44. package/bundled/ai/standards/test-completeness-dimensions.ai.yaml +52 -5
  45. package/bundled/ai/standards/testing.ai.yaml +20 -13
  46. package/bundled/ai/standards/user-story-mapping.ai.yaml +108 -0
  47. package/bundled/core/accessibility-standards.md +58 -0
  48. package/bundled/core/adversarial-test.md +212 -0
  49. package/bundled/core/branch-completion.md +4 -0
  50. package/bundled/core/browser-compatibility-standards.md +220 -0
  51. package/bundled/core/chaos-injection-tests.md +116 -0
  52. package/bundled/core/checkin-standards.md +1 -0
  53. package/bundled/core/container-security.md +521 -0
  54. package/bundled/core/contract-testing-standards.md +182 -0
  55. package/bundled/core/cost-budget-test.md +69 -0
  56. package/bundled/core/cross-flow-regression.md +190 -0
  57. package/bundled/core/data-migration-testing.md +110 -0
  58. package/bundled/core/disaster-recovery-drill.md +73 -0
  59. package/bundled/core/flaky-test-management.md +73 -0
  60. package/bundled/core/flow-based-testing.md +275 -0
  61. package/bundled/core/full-coverage-testing.md +183 -0
  62. package/bundled/core/llm-output-validation.md +178 -0
  63. package/bundled/core/mock-boundary.md +100 -0
  64. package/bundled/core/mutation-testing.md +97 -0
  65. package/bundled/core/performance-standards.md +65 -0
  66. package/bundled/core/policy-as-code-testing.md +188 -0
  67. package/bundled/core/prompt-regression.md +72 -0
  68. package/bundled/core/property-based-testing.md +73 -0
  69. package/bundled/core/release-quality-manifest.md +193 -0
  70. package/bundled/core/release-readiness-gate.md +184 -0
  71. package/bundled/core/replay-test.md +86 -0
  72. package/bundled/core/sast-advanced.md +300 -0
  73. package/bundled/core/secure-op.md +314 -0
  74. package/bundled/core/security-testing.md +87 -0
  75. package/bundled/core/server-ops-security.md +493 -0
  76. package/bundled/core/smoke-test.md +65 -0
  77. package/bundled/core/supply-chain-attestation.md +117 -0
  78. package/bundled/locales/zh-CN/CHANGELOG.md +3 -3
  79. package/bundled/locales/zh-CN/README.md +1 -1
  80. package/bundled/locales/zh-CN/skills/ai-instruction-standards/SKILL.md +5 -5
  81. package/bundled/locales/zh-TW/CHANGELOG.md +3 -3
  82. package/bundled/locales/zh-TW/README.md +1 -1
  83. package/bundled/locales/zh-TW/core/browser-compatibility-standards.md +11 -0
  84. package/bundled/locales/zh-TW/core/contract-testing-standards.md +11 -0
  85. package/bundled/locales/zh-TW/core/cross-flow-regression.md +11 -0
  86. package/bundled/locales/zh-TW/core/release-readiness-gate.md +11 -0
  87. package/bundled/locales/zh-TW/skills/ai-instruction-standards/SKILL.md +183 -79
  88. package/bundled/skills/README.md +4 -3
  89. package/bundled/skills/SKILL_NAMING.md +94 -0
  90. package/bundled/skills/ai-instruction-standards/SKILL.md +181 -88
  91. package/bundled/skills/atdd-assistant/SKILL.md +8 -0
  92. package/bundled/skills/bdd-assistant/SKILL.md +7 -0
  93. package/bundled/skills/checkin-assistant/SKILL.md +8 -0
  94. package/bundled/skills/code-review-assistant/SKILL.md +7 -0
  95. package/bundled/skills/journey-test-assistant/SKILL.md +203 -0
  96. package/bundled/skills/orchestrate/SKILL.md +167 -0
  97. package/bundled/skills/plan/SKILL.md +234 -0
  98. package/bundled/skills/pr-automation-assistant/SKILL.md +8 -0
  99. package/bundled/skills/push/SKILL.md +49 -2
  100. package/bundled/skills/{process-automation → skill-builder}/SKILL.md +1 -1
  101. package/bundled/skills/{forward-derivation → spec-derivation}/SKILL.md +1 -1
  102. package/bundled/skills/spec-driven-dev/SKILL.md +7 -0
  103. package/bundled/skills/sweep/SKILL.md +145 -0
  104. package/bundled/skills/tdd-assistant/SKILL.md +7 -0
  105. package/package.json +6 -6
  106. package/src/commands/check.js +43 -0
  107. package/src/commands/flow.js +8 -0
  108. package/src/commands/init.js +2 -1
  109. package/src/commands/start.js +14 -0
  110. package/src/commands/sweep.js +8 -0
  111. package/src/commands/update.js +10 -0
  112. package/src/commands/workflow.js +8 -0
  113. package/standards-registry.json +483 -5
  114. package/bundled/locales/zh-CN/skills/ac-coverage-assistant/SKILL.md +0 -190
  115. package/bundled/locales/zh-CN/skills/forward-derivation/SKILL.md +0 -71
  116. package/bundled/locales/zh-CN/skills/forward-derivation/guide.md +0 -130
  117. package/bundled/locales/zh-CN/skills/methodology-system/SKILL.md +0 -88
  118. package/bundled/locales/zh-CN/skills/methodology-system/create-methodology.md +0 -350
  119. package/bundled/locales/zh-CN/skills/methodology-system/guide.md +0 -131
  120. package/bundled/locales/zh-CN/skills/methodology-system/runtime.md +0 -279
  121. package/bundled/locales/zh-CN/skills/process-automation/SKILL.md +0 -143
  122. package/bundled/locales/zh-TW/skills/ac-coverage-assistant/SKILL.md +0 -195
  123. package/bundled/locales/zh-TW/skills/deploy-assistant/SKILL.md +0 -178
  124. package/bundled/locales/zh-TW/skills/forward-derivation/SKILL.md +0 -69
  125. package/bundled/locales/zh-TW/skills/forward-derivation/guide.md +0 -415
  126. package/bundled/locales/zh-TW/skills/methodology-system/SKILL.md +0 -86
  127. package/bundled/locales/zh-TW/skills/methodology-system/create-methodology.md +0 -350
  128. package/bundled/locales/zh-TW/skills/methodology-system/guide.md +0 -131
  129. package/bundled/locales/zh-TW/skills/methodology-system/runtime.md +0 -279
  130. package/bundled/locales/zh-TW/skills/process-automation/SKILL.md +0 -144
  131. /package/bundled/skills/{ac-coverage-assistant → ac-coverage}/SKILL.md +0 -0
  132. /package/bundled/skills/{methodology-system → dev-methodology}/SKILL.md +0 -0
  133. /package/bundled/skills/{methodology-system → dev-methodology}/create-methodology.md +0 -0
  134. /package/bundled/skills/{methodology-system → dev-methodology}/guide.md +0 -0
  135. /package/bundled/skills/{methodology-system → dev-methodology}/integrated-flow.md +0 -0
  136. /package/bundled/skills/{methodology-system → dev-methodology}/prerequisite-check.md +0 -0
  137. /package/bundled/skills/{methodology-system → dev-methodology}/runtime.md +0 -0
  138. /package/bundled/skills/{forward-derivation → spec-derivation}/guide.md +0 -0
@@ -0,0 +1,100 @@
1
+ # Mock Boundary Standards
2
+
3
+ **Version**: 1.0.0
4
+ **Last Updated**: 2026-05-04
5
+ **Applicability**: All software projects with unit and integration tests
6
+ **Scope**: universal
7
+ **Industry Standards**: ISTQB Foundation (Test Doubles), xUnit Patterns (Gerard Meszaros)
8
+ **References**: "Working Effectively with Legacy Code" (Feathers), "Growing Object-Oriented Software" (Freeman & Pryce)
9
+
10
+ [English](.) | [繁體中文](../locales/zh-TW/core/mock-boundary.md)
11
+
12
+ ---
13
+
14
+ ## Purpose
15
+
16
+ This document defines rules for what can and cannot be mocked in tests. Its goal is to prevent **hollow tests** — tests that always pass but fail to detect real bugs because they replace the system's logic with stubs.
17
+
18
+ ---
19
+
20
+ ## The Hollow Test Problem
21
+
22
+ A hollow test mocks so much of the system that the test becomes a specification of mock wiring rather than system behavior. The classic symptom: you can delete the implementation file and the test still passes.
23
+
24
+ **Real example (VibeOps SPEC-002.test.ts)**:
25
+
26
+ ```typescript
27
+ vi.mock('../../src/runner/agent-runner.js') // Core logic replaced
28
+ vi.mock('../../src/runner/guardian-hooks.js') // Core logic replaced
29
+ vi.mock('../../src/runner/prototyper.js') // Core logic replaced
30
+ vi.mock('../../src/runner/iteration-report.js') // Core logic replaced
31
+ vi.mock('../../src/memory/memory-store.js') // Core logic replaced
32
+ vi.mock('node:fs/promises', ...) // I/O replaced
33
+
34
+ // All assertions verify mock call counts — not actual outputs.
35
+ // runPipeline() touches zero real code.
36
+ ```
37
+
38
+ ---
39
+
40
+ ## What You CAN Mock
41
+
42
+ | Category | Examples | Reason |
43
+ |----------|----------|--------|
44
+ | External HTTP services | LLM APIs, payment gateways, email services | Prevents flaky tests; controls response scenarios |
45
+ | Time functions | `Date.now()`, `new Date()`, `setTimeout` | Makes tests deterministic |
46
+ | Environment variables | `process.env.NODE_ENV`, `process.env.LICENSE_KEY` | Enables config variation |
47
+ | File system (unit tests only) | `fs.readFile`, `fs.writeFile` | Avoids I/O in fast unit tests |
48
+ | Cross-module boundaries (with IT counterpart) | Other modules' public APIs | Isolates unit under test |
49
+
50
+ ---
51
+
52
+ ## What You CANNOT Mock
53
+
54
+ | Category | Example Violation | Why Forbidden |
55
+ |----------|-------------------|---------------|
56
+ | Own module's core logic | `vi.mock('./pipeline-runner.js')` in pipeline-runner tests | Makes the test a no-op |
57
+ | Database in IT/flow/E2E tests | `vi.mock('./db/client.js')` in integration tests | Hides query bugs, schema issues |
58
+ | HTTP framework internals | `vi.mock('express')` | Real routing may be broken |
59
+ | Security controls | Always-pass auth middleware stub | Security regressions invisible |
60
+
61
+ ---
62
+
63
+ ## Hollow Test Detection
64
+
65
+ Before submitting a test file, check:
66
+
67
+ 1. **Mock count ≥ import count** → Review: at least one assertion must verify actual output
68
+ 2. **All assertions are `.toHaveBeenCalled()` variants** → Add output-value assertions
69
+ 3. **Mock path matches test subject directory** → Self-referential mock; remove it
70
+ 4. **More mock setup lines than assertion lines** → Likely hollow
71
+
72
+ ---
73
+
74
+ ## Anti-Patterns
75
+
76
+ - **Total Mock Isolation**: Every import mocked; only mock interactions asserted
77
+ - **Mock the World**: External + internal + DB + FS all mocked in one test
78
+ - **Orphan Mock**: Cross-module mock with no integration test counterpart
79
+ - **Security Bypass Mock**: Auth/permission logic replaced with pass-through stub
80
+ - **Database Mock Cascade**: DB returns hardcoded data, hiding real query errors
81
+
82
+ ---
83
+
84
+ ## Rules Summary
85
+
86
+ | Rule | Trigger | Action |
87
+ |------|---------|--------|
88
+ | No self-mock | Test file mocks its own module | Remove mock; let real code run |
89
+ | Real DB in IT/flow | Writing IT or flow test | Use in-memory SQLite or test schema |
90
+ | IT counterpart | Mocking cross-module boundary | Ensure corresponding IT exists |
91
+ | No security mock | Test involves auth/permissions | Use real test user + real token |
92
+ | Hollow review | Mock count ≥ import count | Add output-value assertion |
93
+
94
+ ---
95
+
96
+ ## Relationship to Other Standards
97
+
98
+ - **testing**: Mock boundary rules apply to all test levels in the testing pyramid
99
+ - **test-completeness-dimensions**: Dimension 8 (AI Test Quality) references these rules
100
+ - **flow-based-testing**: Flow tests must follow mock boundary rules
@@ -0,0 +1,97 @@
1
+ # Mutation Testing Standards
2
+
3
+ **Version**: 1.0.0
4
+ **Last Updated**: 2026-05-04
5
+ **Applicability**: All software projects with unit/integration tests
6
+ **Scope**: universal
7
+ **Industry Standards**: ISTQB Foundation Syllabus (test effectiveness metrics)
8
+ **References**: "Introduction to Software Testing" (Ammann & Offutt), Stryker Mutator docs
9
+
10
+ [English](.) | [繁體中文](../locales/zh-TW/core/mutation-testing.md)
11
+
12
+ ---
13
+
14
+ ## Purpose
15
+
16
+ Mutation testing evaluates test suite effectiveness by injecting artificial bugs and checking whether tests detect them. It answers the question that line coverage cannot: **"Do my tests actually verify correct behavior?"**
17
+
18
+ ---
19
+
20
+ ## Key Concept: Mutation Score
21
+
22
+ ```
23
+ Mutation Score = Killed Mutants / (Killed + Survived) × 100%
24
+ ```
25
+
26
+ - **Killed**: Test suite detected the artificial bug (test failed) ✅
27
+ - **Survived**: Test suite missed the bug (tests still pass) ❌
28
+
29
+ A test with `expect(x).toBeDefined()` can achieve 100% line coverage but survive many mutations (because `x` being `null`, `0`, or `"wrong"` all satisfy `.toBeDefined()`).
30
+
31
+ ---
32
+
33
+ ## Tools
34
+
35
+ | Language | Tool | Command |
36
+ |----------|------|---------|
37
+ | TypeScript/JS | Stryker Mutator | `npx stryker run` |
38
+ | Python | mutmut | `mutmut run` |
39
+ | Java | PIT (Pitest) | `mvn pitest:mutationCoverage` |
40
+
41
+ ---
42
+
43
+ ## Thresholds
44
+
45
+ | Module Type | Minimum Score | Enforcement |
46
+ |-------------|--------------|-------------|
47
+ | Auth/License/Payment/Security | 80% | Block release |
48
+ | Standard business logic | 70% | Warning; resolve before next release |
49
+ | AI-generated tests | 50% | Required; reject if below |
50
+ | Overall project | 60% | Track trend; alert on regression |
51
+
52
+ ---
53
+
54
+ ## When to Run
55
+
56
+ | Trigger | Command | Enforcement |
57
+ |---------|---------|-------------|
58
+ | Pre-release gate | `npm run test:mutation` | ≥ 60% overall |
59
+ | Critical module change | `npx stryker run --mutate 'src/auth/**'` | ≥ 80% |
60
+ | AI-generated test review | `npx stryker run` | ≥ 50% |
61
+
62
+ **Never** add mutation testing to commit hooks — it's too slow (10-60 minutes).
63
+
64
+ ---
65
+
66
+ ## Stryker Quick Start (TypeScript + Vitest)
67
+
68
+ ```bash
69
+ npm install --save-dev @stryker-mutator/core @stryker-mutator/vitest-runner
70
+ ```
71
+
72
+ ```json
73
+ // stryker.config.json
74
+ {
75
+ "testRunner": "vitest",
76
+ "coverageAnalysis": "perTest",
77
+ "mutate": ["src/license/**/*.ts", "!src/**/*.test.ts"],
78
+ "thresholds": { "high": 80, "low": 60, "break": 50 }
79
+ }
80
+ ```
81
+
82
+ ---
83
+
84
+ ## Anti-Patterns
85
+
86
+ - Treating line coverage as a proxy for test effectiveness
87
+ - Adding mutation testing to CI for every PR (too slow)
88
+ - Accepting AI-generated tests without mutation score validation
89
+ - Killing mutations by adding `toBeDefined()` assertions
90
+
91
+ ---
92
+
93
+ ## Relationship to Other Standards
94
+
95
+ - `test-completeness-dimensions`: Dimension 8 (AI Test Quality) references mutation score
96
+ - `mock-boundary`: Hollow tests survive many mutations; mock boundary rules prevent hollow tests
97
+ - `testing`: Mutation testing is the quality gate on top of the test pyramid
@@ -323,6 +323,70 @@ Analogous to the SRE Error Budget concept, a Performance Budget defines the tole
323
323
 
324
324
  ---
325
325
 
326
+ ## Per-Release Capacity Sign-off
327
+
328
+ This section defines the **capacity gate** that must be satisfied before production release (Dimension 10 in `release-readiness-gate.md`, Tier-3).
329
+
330
+ ### Capacity Forecast
331
+
332
+ Before each release candidate, produce a capacity forecast based on:
333
+
334
+ 1. **Baseline**: 90-day rolling average of peak TPS and resource utilization (CPU, memory, DB connections, storage growth rate)
335
+ 2. **Release impact estimate**: expected traffic delta from new features (e.g., +15% TPS from new notification flow)
336
+ 3. **Seasonal adjustment**: any known traffic spikes within the next 30 days (marketing campaigns, seasonal peaks)
337
+
338
+ ### Headroom Thresholds
339
+
340
+ | Metric | Target (PASS) | Warn Band | Fail Threshold |
341
+ |--------|--------------|-----------|----------------|
342
+ | CPU headroom at projected peak | ≥ 30% | 20–30% | < 20% |
343
+ | Memory headroom | ≥ 25% | 15–25% | < 15% |
344
+ | DB connection pool headroom | ≥ 40% | 25–40% | < 25% |
345
+ | p99 latency vs baseline | ≤ +5% | +5% to +10% | > +10% regression |
346
+ | Error rate at peak load | < 0.1% | 0.1–0.5% | > 0.5% |
347
+
348
+ ### Load Test Requirement
349
+
350
+ Run the load test scenario defined in the Performance Testing sections above (Soak + Spike test minimum) before finalizing the capacity sign-off:
351
+
352
+ ```bash
353
+ # Example: k6 capacity verification run
354
+ k6 run --vus 500 --duration 20m scripts/perf/soak-test.js
355
+ # Pass criterion: headroom metrics above, p99 within budget
356
+ ```
357
+
358
+ ### Sign-off Evidence
359
+
360
+ The capacity gate requires **two named sign-offs** — both Engineering Lead and SRE Lead:
361
+
362
+ ```markdown
363
+ ## Capacity Sign-off — <version>
364
+
365
+ **Projection date**: YYYY-MM-DD
366
+ **Baseline period**: last 90 days
367
+
368
+ | Metric | Baseline peak | Projected peak | Headroom | Status |
369
+ |--------|-------------|---------------|----------|--------|
370
+ | CPU | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
371
+ | Memory | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
372
+ | DB pool | [X]% | [Y]% | [Z]% | PASS/WARN/FAIL |
373
+ | p99 latency | [X]ms | [Y]ms | [±Z]% | PASS/WARN/FAIL |
374
+
375
+ **Load test artifact**: [link to load test report]
376
+
377
+ **Eng Lead sign-off**: _______________ Date: __________
378
+ **SRE Lead sign-off**: _______________ Date: __________
379
+ ```
380
+
381
+ ### When Tier-3 Applies as N/A
382
+
383
+ The capacity sign-off is `N/A` (with documented rationale) when:
384
+ - Project has < 100 DAU and no significant traffic growth expected
385
+ - Internal tooling with fixed user count
386
+ - Static content / documentation site
387
+
388
+ ---
389
+
326
390
  ## Related Standards
327
391
 
328
392
  - [Testing Standards](testing-standards.md) - Performance testing integration
@@ -330,6 +394,7 @@ Analogous to the SRE Error Budget concept, a Performance Budget defines the tole
330
394
  - [Logging Standards](logging-standards.md) - Performance logging
331
395
  - [Code Review Checklist](code-review-checklist.md) - Performance review
332
396
  - [Deployment Standards](deployment-standards.md) - Performance validation pre-deployment
397
+ - [Release Readiness Gate](release-readiness-gate.md) - Dimension 1 (load) and Dimension 10 (capacity)
333
398
 
334
399
  ---
335
400
 
@@ -0,0 +1,188 @@
1
+ # Policy as Code 測試標準
2
+
3
+ > 標準 ID:`policy-as-code-testing`
4
+ > 版本:v1.0.0
5
+ > 最後更新:2026-05-05
6
+
7
+ ---
8
+
9
+ ## 為什麼需要測試 Policy as Code?
10
+
11
+ OPA(Open Policy Agent)的 Rego policy 控制 AI Agent 能否執行生產環境操作。**未測試的 policy = 靜默的安全漏洞。**
12
+
13
+ Policy as Code 的特殊風險:
14
+ 1. **邊界條件難以推理**:`reversible: false` + `target_env: "prod"` 組合是否觸發?
15
+ 2. **型別錯誤只在執行時爆發**:`array.concat()` 用在 set 型別 → 靜默失效
16
+ 3. **Fail-Open 風險**:評估失敗若回傳 `allow: true`,攻擊者可觸發未定義路徑
17
+ 4. **Policy 改動回歸**:新增一條 rule 可能意外放行原本被擋的案例
18
+
19
+ ---
20
+
21
+ ## 一、OPA 測試框架
22
+
23
+ ### 測試規則格式
24
+
25
+ ```rego
26
+ # 檔案命名:<policy_module>_test.rego
27
+ # Package:<policy_package>_test
28
+ package vibeops.guardian.forbidden_patterns_test
29
+
30
+ import future.keywords.if
31
+
32
+ # 正向測試:規則應觸發(assert rule fires)
33
+ test_drop_database_is_forbidden if {
34
+ data.vibeops.guardian.forbidden_patterns.has_forbidden_pattern with input as {
35
+ "plan": [{"command_type": "sql", "command": "DROP DATABASE prod_main", "reversible": false}]
36
+ }
37
+ }
38
+
39
+ # 負向測試:規則不應觸發(assert rule does NOT fire)
40
+ test_safe_select_is_not_forbidden if {
41
+ not data.vibeops.guardian.forbidden_patterns.has_forbidden_pattern with input as {
42
+ "plan": [{"command_type": "sql", "command": "SELECT * FROM users LIMIT 10", "reversible": true}]
43
+ }
44
+ }
45
+ ```
46
+
47
+ ### 執行方式
48
+
49
+ ```bash
50
+ # OPA 已安裝時
51
+ opa test src/guardian/policies/ -v
52
+
53
+ # 透過 Docker(不需安裝 OPA)
54
+ docker run --rm \
55
+ -v "$(pwd)/src/guardian/policies:/policies:ro" \
56
+ openpolicyagent/opa:latest-static \
57
+ test /policies -v
58
+ ```
59
+
60
+ ---
61
+
62
+ ## 二、每個 Policy Module 的最低測試要求
63
+
64
+ | 類型 | 最少案例 | 說明 |
65
+ |------|---------|------|
66
+ | ALLOW cases | 2 | 應該通過的正常操作 |
67
+ | DENY cases | 3 | 應該被攔截的危險操作 |
68
+ | Boundary cases | 1 | 邊界條件(如 reversible=true vs. false)|
69
+ | Integration(main policy)| 2 | 整合 main.rego 的允許 + 拒絕路徑 |
70
+
71
+ ---
72
+
73
+ ## 三、Policy Module 設計原則
74
+
75
+ ### 3.1 Fail-Closed 預設
76
+
77
+ ```rego
78
+ # main.rego 必須包含以下預設
79
+ default allow = false
80
+
81
+ allow if {
82
+ not data.vibeops.guardian.forbidden_patterns.has_forbidden_pattern
83
+ not data.vibeops.guardian.env_policy.prod_violation
84
+ not data.vibeops.guardian.logic_constraints.has_logic_violation
85
+ }
86
+ ```
87
+
88
+ 任何 `undefined` 評估結果都應回傳 DENY,不能回傳 ALLOW。
89
+
90
+ ### 3.2 使用 Set(不要 array.concat)
91
+
92
+ OPA ≥ 0.40 的型別系統嚴格區分 array 和 set。`violations` partial rule 是 set 型別,**不可用 `array.concat()`**。
93
+
94
+ ```rego
95
+ # ✅ 正確:partial set rule 集合 violations
96
+ deny_reasons[r] if { r := data.vibeops.guardian.forbidden_patterns.violations[_] }
97
+ deny_reasons[r] if { r := data.vibeops.guardian.env_policy.violations[_] }
98
+ deny_reasons[r] if { r := data.vibeops.guardian.logic_constraints.violations[_] }
99
+
100
+ # ❌ 錯誤:array.concat 用在 set 上 → rego_type_error
101
+ # deny_reasons := array.concat(violations1, violations2)
102
+ ```
103
+
104
+ ### 3.3 禁止解析自由文字欄位
105
+
106
+ Policy 決策**不得依賴** `intent`、`description`、`annotation` 等使用者可控文字欄位。
107
+
108
+ ```rego
109
+ # ❌ 危險:解析 intent 欄位 → Prompt Injection 攻擊面(OWASP LLM01)
110
+ allow if { contains(input.intent, "EMERGENCY") }
111
+
112
+ # ✅ 安全:只使用結構化欄位
113
+ allow if {
114
+ input.target_env != "prod"
115
+ every step in input.plan { step.reversible == true }
116
+ }
117
+ ```
118
+
119
+ ### 3.4 一個 Module 管一個關注點
120
+
121
+ ```
122
+ policies/
123
+ forbidden_patterns.rego ← 禁止指令模式
124
+ forbidden_patterns_test.rego
125
+ env_policy.rego ← 環境特定規則(prod 保護)
126
+ env_policy_test.rego
127
+ logic_constraints.rego ← 邏輯一致性(stop+start 用 restart)
128
+ logic_constraints_test.rego
129
+ risk_gate.rego ← 風險分數閾值
130
+ risk_gate_test.rego
131
+ main.rego ← 整合所有 module,Fail-Closed
132
+ main_test.rego ← 整合測試
133
+ ```
134
+
135
+ ---
136
+
137
+ ## 四、CI 整合
138
+
139
+ ### GitHub Actions 步驟
140
+
141
+ ```yaml
142
+ - name: Test OPA Rego Policies
143
+ run: |
144
+ docker run --rm \
145
+ -v "${{ github.workspace }}/src/guardian/policies:/policies:ro" \
146
+ openpolicyagent/opa:latest-static \
147
+ test /policies -v
148
+ ```
149
+
150
+ ### npm script
151
+
152
+ ```json
153
+ {
154
+ "test:policy": "docker run --rm -v \"$(pwd)/src/guardian/policies:/policies:ro\" openpolicyagent/opa:latest-static test /policies -v"
155
+ }
156
+ ```
157
+
158
+ ---
159
+
160
+ ## 五、品質閘門
161
+
162
+ | 閘門 | 閾值 | 強制程度 |
163
+ |------|------|---------|
164
+ | OPA 測試通過率(CI) | 100%(所有 test_ rule 通過)| Block merge |
165
+ | Root policy Fail-Closed | `default allow = false` 存在 | Block merge |
166
+ | 每個 policy module 有 _test.rego | 每個 .rego 有對應測試 | Advisory |
167
+
168
+ ---
169
+
170
+ ## 六、反模式(Anti-patterns)
171
+
172
+ | 反模式 | 問題 | 正確做法 |
173
+ |--------|------|---------|
174
+ | `array.concat()` 用在 violations(set 型)| OPA 型別錯誤 | 改用 partial set rule |
175
+ | Root policy 缺少 `default allow = false` | Fail-Open 漏洞 | 加入 default |
176
+ | Intent 欄位參與安全決策 | Prompt Injection 攻擊面 | 只用結構化欄位 |
177
+ | 只測試 DENY(無 ALLOW 測試)| 無法偵測過度限制 | 加入 ALLOW 案例 |
178
+ | _test.rego 只在本機跑,不在 CI | policy 改動無安全網 | CI 加 `opa test` step |
179
+
180
+ ---
181
+
182
+ ## 參考標準
183
+
184
+ - [OPA Testing Guide](https://www.openpolicyagent.org/docs/latest/policy-testing/)
185
+ - NIST SP 800-204C — Attribute-based Access Control
186
+ - [UDS `secure-op.ai.yaml`](./secure-op.md) — AI Agent 安全操作六大支柱
187
+ - [UDS `adversarial-test.ai.yaml`](./adversarial-test.md) — 對抗性測試(OWASP LLM01)
188
+ - [UDS `container-security.ai.yaml`](./container-security.md) — 容器安全(OPA Sidecar 部署)
@@ -0,0 +1,72 @@
1
+ # Prompt Regression Standards
2
+
3
+ ## Overview
4
+
5
+ AI agent prompts are code. Unintended changes silently degrade agent behaviour without triggering type errors or unit test failures. Prompt regression tests use golden SHA-256 checksums to detect any modification, forcing developers to explicitly acknowledge and document prompt changes.
6
+
7
+ ## Why Checksums
8
+
9
+ - Diffs alone don't block CI — checksums do
10
+ - Prompts are large markdown files; minor edits (whitespace, punctuation) can shift model behaviour
11
+ - Checksum update + comment creates an audit trail of why each prompt changed
12
+
13
+ ## Implementation
14
+
15
+ ### 1. Compute Initial Checksums
16
+
17
+ ```bash
18
+ for f in agents/*/prompt.md; do
19
+ echo -n "$f: "
20
+ sha256sum "$f" | cut -d' ' -f1
21
+ done
22
+ ```
23
+
24
+ ### 2. Golden Checksum Test (Vitest)
25
+
26
+ ```typescript
27
+ // SPDX-License-Identifier: AGPL-3.0-only
28
+ import { createHash } from "crypto"
29
+ import { readFileSync } from "fs"
30
+ import { join } from "path"
31
+ import { describe, it, expect } from "vitest"
32
+
33
+ // Update these values ONLY when prompt changes are intentional.
34
+ // Add a comment on the same line explaining WHY the prompt changed.
35
+ const GOLDEN_CHECKSUMS: Record<string, string> = {
36
+ architect: "98017d39b0e48cda88b796687d21e0f884c810805e534453a23b7ad935e4a5ef",
37
+ builder: "5c2acda3e48dae771c61f55d3a5b0d5ac7383870054ef71e757714e367c50031",
38
+ // ... all agents
39
+ }
40
+
41
+ describe("Agent prompt regression (XSPEC-162)", () => {
42
+ for (const [agent, expected] of Object.entries(GOLDEN_CHECKSUMS)) {
43
+ it(`agents/${agent}/prompt.md checksum matches golden`, () => {
44
+ const filePath = join(__dirname, "..", "..", "agents", agent, "prompt.md")
45
+ const content = readFileSync(filePath)
46
+ const actual = createHash("sha256").update(content).digest("hex")
47
+ expect(actual, `Prompt for '${agent}' changed unexpectedly. If intentional, update GOLDEN_CHECKSUMS with a comment.`).toBe(expected)
48
+ })
49
+ }
50
+ })
51
+ ```
52
+
53
+ ### 3. CI Integration
54
+
55
+ The checksum test runs as part of the standard `npm run test:coverage` gate (already enforced via XSPEC-156). No additional CI step needed.
56
+
57
+ ### 4. Updating Checksums
58
+
59
+ When a prompt change is intentional:
60
+
61
+ ```typescript
62
+ // BEFORE:
63
+ architect: "98017d39...", // updated 2026-05-05: added Guardian policy XSPEC-160 reference
64
+ ```
65
+
66
+ The comment is mandatory. PRs that update checksums without explanatory comments should be rejected in code review.
67
+
68
+ ## Related Standards
69
+
70
+ - [LLM Output Validation](llm-output-validation.md) — schema-level validation
71
+ - [Adversarial Test](adversarial-test.md) — red-team corpus
72
+ - [Testing Standards](testing.md) — overall testing pyramid
@@ -0,0 +1,73 @@
1
+ # Property-Based Testing Standards
2
+
3
+ ## Overview
4
+
5
+ Example-based tests only verify the cases a developer thought to write. Property-based testing inverts this: you define an invariant ("the score is always between 0 and 100") and the framework generates hundreds of inputs to try to falsify it. When it finds a failing input, it shrinks it to the minimal counterexample.
6
+
7
+ ## When to Use
8
+
9
+ | Use Property Tests | Use Example Tests |
10
+ |-------------------|------------------|
11
+ | Pure math functions | Complex business logic |
12
+ | Parsers / serializers | Integration paths |
13
+ | Score clamping / rounding | UI behaviour |
14
+ | Hash / encoding | Database operations |
15
+ | Security validators | External API calls |
16
+
17
+ ## Tool: fast-check (TypeScript)
18
+
19
+ ```bash
20
+ npm install --save-dev fast-check
21
+ ```
22
+
23
+ ```typescript
24
+ import fc from "fast-check"
25
+ import { describe, it, expect } from "vitest"
26
+ import { classifyTokenZone, TOKEN_BUDGET } from "../types/index.js"
27
+
28
+ describe("classifyTokenZone property: result is always a valid zone", () => {
29
+ it("for any ratio in [0, 2], returns a valid TokenBudgetZone", () => {
30
+ fc.assert(
31
+ fc.property(
32
+ fc.float({ min: 0, max: 2, noNaN: true }),
33
+ (ratio) => {
34
+ const zone = classifyTokenZone(ratio)
35
+ return ["safe", "warning", "danger", "blocking"].includes(zone)
36
+ }
37
+ ),
38
+ { numRuns: 1000 }
39
+ )
40
+ })
41
+ })
42
+ ```
43
+
44
+ ## Guardian scoreReviewable Properties
45
+
46
+ Key invariants to test:
47
+
48
+ | Property | Description |
49
+ |----------|-------------|
50
+ | **Range clamping** | `score` is always `[0, 100]` |
51
+ | **Determinism** | Same input always produces same score |
52
+ | **Monotonicity** | prod > staging > dev for same operation |
53
+ | **Non-negativity** | `breakdown` values are all >= 0 |
54
+
55
+ ## Counterexample Shrinking
56
+
57
+ When fast-check finds a failing case, it automatically shrinks:
58
+
59
+ ```
60
+ Original failure: { target_env: "prod", command: "rm -rf /tmp/xyz123...", ... }
61
+ Shrunk to: { target_env: "prod", command: "rm", ... }
62
+ ```
63
+
64
+ Save the seed from the error message to reproduce:
65
+ ```typescript
66
+ fc.assert(property, { seed: 1234567890 })
67
+ ```
68
+
69
+ ## Related Standards
70
+
71
+ - [Mutation Testing Standards](mutation-testing.md) — complement to PBT
72
+ - [Testing Standards](testing-standards.md) — overall test pyramid
73
+ - [Adversarial Test Standards](adversarial-test.md) — security-focused fuzzing