universal-dev-standards 5.4.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/bundled/ai/options/testing/integration-testing.ai.yaml +2 -2
  2. package/bundled/ai/options/testing/unit-testing.ai.yaml +2 -2
  3. package/bundled/ai/standards/adversarial-test.ai.yaml +277 -0
  4. package/bundled/ai/standards/audit-trail.ai.yaml +113 -0
  5. package/bundled/ai/standards/browser-compatibility-standards.ai.yaml +63 -0
  6. package/bundled/ai/standards/chaos-injection-tests.ai.yaml +91 -0
  7. package/bundled/ai/standards/container-image-standards.ai.yaml +88 -0
  8. package/bundled/ai/standards/container-security.ai.yaml +331 -0
  9. package/bundled/ai/standards/contract-testing-standards.ai.yaml +62 -0
  10. package/bundled/ai/standards/cost-budget-test.ai.yaml +96 -0
  11. package/bundled/ai/standards/cross-flow-regression.ai.yaml +61 -0
  12. package/bundled/ai/standards/data-contract.ai.yaml +110 -0
  13. package/bundled/ai/standards/data-migration-testing.ai.yaml +96 -0
  14. package/bundled/ai/standards/data-pipeline.ai.yaml +113 -0
  15. package/bundled/ai/standards/disaster-recovery-drill.ai.yaml +89 -0
  16. package/bundled/ai/standards/flaky-test-management.ai.yaml +89 -0
  17. package/bundled/ai/standards/flow-based-testing.ai.yaml +240 -0
  18. package/bundled/ai/standards/full-coverage-testing.ai.yaml +192 -0
  19. package/bundled/ai/standards/iac-design-principles.ai.yaml +83 -0
  20. package/bundled/ai/standards/incident-response.ai.yaml +107 -0
  21. package/bundled/ai/standards/license-compliance.ai.yaml +106 -0
  22. package/bundled/ai/standards/llm-output-validation.ai.yaml +269 -0
  23. package/bundled/ai/standards/mock-boundary.ai.yaml +250 -0
  24. package/bundled/ai/standards/mutation-testing.ai.yaml +192 -0
  25. package/bundled/ai/standards/pii-classification.ai.yaml +109 -0
  26. package/bundled/ai/standards/policy-as-code-testing.ai.yaml +227 -0
  27. package/bundled/ai/standards/prd-standards.ai.yaml +88 -0
  28. package/bundled/ai/standards/product-metrics-standards.ai.yaml +111 -0
  29. package/bundled/ai/standards/prompt-regression.ai.yaml +94 -0
  30. package/bundled/ai/standards/property-based-testing.ai.yaml +105 -0
  31. package/bundled/ai/standards/release-quality-manifest.ai.yaml +135 -0
  32. package/bundled/ai/standards/release-readiness-gate.ai.yaml +77 -0
  33. package/bundled/ai/standards/replay-test.ai.yaml +111 -0
  34. package/bundled/ai/standards/runbook.ai.yaml +104 -0
  35. package/bundled/ai/standards/sast-advanced.ai.yaml +135 -0
  36. package/bundled/ai/standards/schema-evolution.ai.yaml +111 -0
  37. package/bundled/ai/standards/secret-management-standards.ai.yaml +105 -0
  38. package/bundled/ai/standards/secure-op.ai.yaml +365 -0
  39. package/bundled/ai/standards/security-testing.ai.yaml +171 -0
  40. package/bundled/ai/standards/server-ops-security.ai.yaml +274 -0
  41. package/bundled/ai/standards/slo-sli.ai.yaml +97 -0
  42. package/bundled/ai/standards/smoke-test.ai.yaml +87 -0
  43. package/bundled/ai/standards/supply-chain-attestation.ai.yaml +109 -0
  44. package/bundled/ai/standards/test-completeness-dimensions.ai.yaml +52 -5
  45. package/bundled/ai/standards/testing.ai.yaml +20 -13
  46. package/bundled/ai/standards/user-story-mapping.ai.yaml +108 -0
  47. package/bundled/core/accessibility-standards.md +58 -0
  48. package/bundled/core/adversarial-test.md +212 -0
  49. package/bundled/core/branch-completion.md +4 -0
  50. package/bundled/core/browser-compatibility-standards.md +220 -0
  51. package/bundled/core/chaos-injection-tests.md +116 -0
  52. package/bundled/core/checkin-standards.md +1 -0
  53. package/bundled/core/container-security.md +521 -0
  54. package/bundled/core/contract-testing-standards.md +182 -0
  55. package/bundled/core/cost-budget-test.md +69 -0
  56. package/bundled/core/cross-flow-regression.md +190 -0
  57. package/bundled/core/data-migration-testing.md +110 -0
  58. package/bundled/core/disaster-recovery-drill.md +73 -0
  59. package/bundled/core/flaky-test-management.md +73 -0
  60. package/bundled/core/flow-based-testing.md +275 -0
  61. package/bundled/core/full-coverage-testing.md +183 -0
  62. package/bundled/core/llm-output-validation.md +178 -0
  63. package/bundled/core/mock-boundary.md +100 -0
  64. package/bundled/core/mutation-testing.md +97 -0
  65. package/bundled/core/performance-standards.md +65 -0
  66. package/bundled/core/policy-as-code-testing.md +188 -0
  67. package/bundled/core/prompt-regression.md +72 -0
  68. package/bundled/core/property-based-testing.md +73 -0
  69. package/bundled/core/release-quality-manifest.md +193 -0
  70. package/bundled/core/release-readiness-gate.md +184 -0
  71. package/bundled/core/replay-test.md +86 -0
  72. package/bundled/core/sast-advanced.md +300 -0
  73. package/bundled/core/secure-op.md +314 -0
  74. package/bundled/core/security-testing.md +87 -0
  75. package/bundled/core/server-ops-security.md +493 -0
  76. package/bundled/core/smoke-test.md +65 -0
  77. package/bundled/core/supply-chain-attestation.md +117 -0
  78. package/bundled/locales/zh-CN/CHANGELOG.md +3 -3
  79. package/bundled/locales/zh-CN/README.md +1 -1
  80. package/bundled/locales/zh-CN/skills/ai-instruction-standards/SKILL.md +5 -5
  81. package/bundled/locales/zh-TW/CHANGELOG.md +3 -3
  82. package/bundled/locales/zh-TW/README.md +1 -1
  83. package/bundled/locales/zh-TW/core/browser-compatibility-standards.md +11 -0
  84. package/bundled/locales/zh-TW/core/contract-testing-standards.md +11 -0
  85. package/bundled/locales/zh-TW/core/cross-flow-regression.md +11 -0
  86. package/bundled/locales/zh-TW/core/release-readiness-gate.md +11 -0
  87. package/bundled/locales/zh-TW/skills/ai-instruction-standards/SKILL.md +183 -79
  88. package/bundled/skills/README.md +4 -3
  89. package/bundled/skills/SKILL_NAMING.md +94 -0
  90. package/bundled/skills/ai-instruction-standards/SKILL.md +181 -88
  91. package/bundled/skills/atdd-assistant/SKILL.md +8 -0
  92. package/bundled/skills/bdd-assistant/SKILL.md +7 -0
  93. package/bundled/skills/checkin-assistant/SKILL.md +8 -0
  94. package/bundled/skills/code-review-assistant/SKILL.md +7 -0
  95. package/bundled/skills/journey-test-assistant/SKILL.md +203 -0
  96. package/bundled/skills/orchestrate/SKILL.md +167 -0
  97. package/bundled/skills/plan/SKILL.md +234 -0
  98. package/bundled/skills/pr-automation-assistant/SKILL.md +8 -0
  99. package/bundled/skills/push/SKILL.md +49 -2
  100. package/bundled/skills/{process-automation → skill-builder}/SKILL.md +1 -1
  101. package/bundled/skills/{forward-derivation → spec-derivation}/SKILL.md +1 -1
  102. package/bundled/skills/spec-driven-dev/SKILL.md +7 -0
  103. package/bundled/skills/sweep/SKILL.md +145 -0
  104. package/bundled/skills/tdd-assistant/SKILL.md +7 -0
  105. package/package.json +6 -6
  106. package/src/commands/check.js +43 -0
  107. package/src/commands/flow.js +8 -0
  108. package/src/commands/init.js +2 -1
  109. package/src/commands/start.js +14 -0
  110. package/src/commands/sweep.js +8 -0
  111. package/src/commands/update.js +10 -0
  112. package/src/commands/workflow.js +8 -0
  113. package/standards-registry.json +483 -5
  114. package/bundled/locales/zh-CN/skills/ac-coverage-assistant/SKILL.md +0 -190
  115. package/bundled/locales/zh-CN/skills/forward-derivation/SKILL.md +0 -71
  116. package/bundled/locales/zh-CN/skills/forward-derivation/guide.md +0 -130
  117. package/bundled/locales/zh-CN/skills/methodology-system/SKILL.md +0 -88
  118. package/bundled/locales/zh-CN/skills/methodology-system/create-methodology.md +0 -350
  119. package/bundled/locales/zh-CN/skills/methodology-system/guide.md +0 -131
  120. package/bundled/locales/zh-CN/skills/methodology-system/runtime.md +0 -279
  121. package/bundled/locales/zh-CN/skills/process-automation/SKILL.md +0 -143
  122. package/bundled/locales/zh-TW/skills/ac-coverage-assistant/SKILL.md +0 -195
  123. package/bundled/locales/zh-TW/skills/deploy-assistant/SKILL.md +0 -178
  124. package/bundled/locales/zh-TW/skills/forward-derivation/SKILL.md +0 -69
  125. package/bundled/locales/zh-TW/skills/forward-derivation/guide.md +0 -415
  126. package/bundled/locales/zh-TW/skills/methodology-system/SKILL.md +0 -86
  127. package/bundled/locales/zh-TW/skills/methodology-system/create-methodology.md +0 -350
  128. package/bundled/locales/zh-TW/skills/methodology-system/guide.md +0 -131
  129. package/bundled/locales/zh-TW/skills/methodology-system/runtime.md +0 -279
  130. package/bundled/locales/zh-TW/skills/process-automation/SKILL.md +0 -144
  131. /package/bundled/skills/{ac-coverage-assistant → ac-coverage}/SKILL.md +0 -0
  132. /package/bundled/skills/{methodology-system → dev-methodology}/SKILL.md +0 -0
  133. /package/bundled/skills/{methodology-system → dev-methodology}/create-methodology.md +0 -0
  134. /package/bundled/skills/{methodology-system → dev-methodology}/guide.md +0 -0
  135. /package/bundled/skills/{methodology-system → dev-methodology}/integrated-flow.md +0 -0
  136. /package/bundled/skills/{methodology-system → dev-methodology}/prerequisite-check.md +0 -0
  137. /package/bundled/skills/{methodology-system → dev-methodology}/runtime.md +0 -0
  138. /package/bundled/skills/{forward-derivation → spec-derivation}/guide.md +0 -0
@@ -0,0 +1,269 @@
1
+ # LLM Output Validation Standards - AI Optimized
2
+ # Source: core/llm-output-validation.md
3
+
4
+ id: llm-output-validation
5
+ meta:
6
+ version: "1.0.0"
7
+ updated: "2026-05-05"
8
+ source: core/llm-output-validation.md
9
+ description: >
10
+ Standards for validating LLM and AI agent outputs to ensure schema
11
+ conformance, detect hallucinations, and assess response groundedness.
12
+ Covers structural validation, semantic validation, and test strategies.
13
+
14
+ # ─────────────────────────────────────────────────────────
15
+ # Core Concepts
16
+ # ─────────────────────────────────────────────────────────
17
+ core_concepts:
18
+ definition: >
19
+ LLM outputs are non-deterministic and may violate expected schema,
20
+ introduce hallucinated facts, or fail to stay grounded in provided context.
21
+ LLM output validation is the practice of systematically testing that
22
+ AI agent outputs meet structural and semantic quality standards.
23
+
24
+ validation_dimensions:
25
+ - dimension: Schema Conformance
26
+ definition: >
27
+ The output matches the declared JSON / type schema exactly —
28
+ required fields present, types correct, enums respected.
29
+ test_type: Contract Test
30
+ automated: true
31
+
32
+ - dimension: Hallucination Detection
33
+ definition: >
34
+ Factual claims in the output can be traced back to source material
35
+ (grounded) or are fabricated (hallucinated).
36
+ test_type: Semantic / NLI probe
37
+ automated: partial
38
+
39
+ - dimension: Grounded Answer Rate
40
+ definition: >
41
+ Ratio of responses that cite or derive from provided context
42
+ vs. responses that introduce unsupported facts.
43
+ metric: "grounded_answers / total_answers"
44
+ target: "≥ 0.95 for RAG-based agents"
45
+ automated: partial
46
+
47
+ - dimension: Refusal Evaluation
48
+ definition: >
49
+ Agent correctly refuses (or escalates) out-of-scope, harmful,
50
+ or ambiguous requests rather than hallucinating a plausible answer.
51
+ test_type: Adversarial probe
52
+ automated: true (via test corpus)
53
+
54
+ - dimension: Determinism / Stability
55
+ definition: >
56
+ With temperature=0, same prompt yields same output schema structure
57
+ (if not identical content).
58
+ test_type: Replay / Golden file
59
+ automated: true
60
+
61
+ # ─────────────────────────────────────────────────────────
62
+ # Contract Testing (Schema Conformance)
63
+ # ─────────────────────────────────────────────────────────
64
+ contract_testing:
65
+ definition: >
66
+ Contract tests verify that LLM/agent outputs satisfy a declared schema
67
+ (JSON Schema, Zod, Pydantic, etc.) before being consumed downstream.
68
+ They are the cheapest and most automated form of LLM output validation.
69
+
70
+ patterns:
71
+ - name: Schema-driven contract test
72
+ description: >
73
+ Maintain an output-schema.json per agent. Run fixture outputs through
74
+ a schema validator (Ajv / Pydantic / jsonschema) in unit tests.
75
+ example: |
76
+ // TypeScript with ajv
77
+ import Ajv from "ajv"
78
+ import schema from "./output-schema.json"
79
+ const ajv = new Ajv()
80
+ const validate = ajv.compile(schema)
81
+ const result = validate(agentOutput)
82
+ expect(result).toBe(true)
83
+
84
+ - name: Golden file test
85
+ description: >
86
+ Capture a real LLM response as a golden fixture (JSON).
87
+ Re-validate against the schema on every schema change.
88
+ Diff the fixture if schema is unchanged (regression).
89
+
90
+ - name: Negative contract test
91
+ description: >
92
+ Provide outputs that violate the schema (missing required fields,
93
+ wrong types). Assert the validator correctly rejects them.
94
+ Proves the schema is strict enough to catch real failures.
95
+
96
+ per_agent_structure:
97
+ - "agents/<agent-name>/output-schema.json — JSON Schema definition"
98
+ - "agents/<agent-name>/__tests__/contract.test.ts — contract test suite"
99
+ - "agents/<agent-name>/__fixtures__/valid.json — valid output fixture"
100
+ - "agents/<agent-name>/__fixtures__/invalid-*.json — invalid fixtures"
101
+
102
+ # ─────────────────────────────────────────────────────────
103
+ # Hallucination Detection
104
+ # ─────────────────────────────────────────────────────────
105
+ hallucination_detection:
106
+ definition: >
107
+ Hallucination occurs when an LLM generates plausible-sounding but
108
+ factually incorrect or unsupported content.
109
+
110
+ detection_strategies:
111
+ - strategy: Grounding check
112
+ description: >
113
+ For RAG-based agents: compare named entities / key facts in response
114
+ against retrieved documents using NLI or embedding similarity.
115
+ tools: [DeepEval, Ragas, custom NLI probe]
116
+ complexity: high
117
+
118
+ - strategy: Schema field validation
119
+ description: >
120
+ Structured outputs (JSON) partially prevent hallucination by requiring
121
+ specific enumerations and reference IDs. If the LLM hallucinates a
122
+ field value, it either violates the schema (caught by contract test)
123
+ or produces a real-looking but wrong value (requires semantic check).
124
+ complexity: low
125
+
126
+ - strategy: Confidence calibration
127
+ description: >
128
+ Agent includes a confidence score or explicitly marks uncertain claims.
129
+ Downstream consumers reject or escalate low-confidence outputs.
130
+ requires: Prompt design to elicit uncertainty markers
131
+ complexity: medium
132
+
133
+ rate_targets:
134
+ structured_output_agents:
135
+ schema_conformance: "≥ 99%"
136
+ factual_hallucination_rate: "≤ 5%"
137
+ rag_agents:
138
+ grounded_answer_rate: "≥ 95%"
139
+ chat_agents:
140
+ refusal_on_out_of_scope: "≥ 90%"
141
+
142
+ # ─────────────────────────────────────────────────────────
143
+ # Prompt Regression & Stability
144
+ # ─────────────────────────────────────────────────────────
145
+ prompt_regression:
146
+ definition: >
147
+ When a prompt is changed, verify that the output schema structure
148
+ is preserved (even if content differs). A prompt change that silently
149
+ breaks downstream field expectations is a regression.
150
+
151
+ test_pattern: |
152
+ # 1. Before prompt change: capture golden output structure
153
+ # 2. Change prompt
154
+ # 3. Re-run with temperature=0 on same input fixture
155
+ # 4. Validate output against schema (contract test)
156
+ # 5. Diff field presence / enum values against golden
157
+
158
+ required_on:
159
+ - Any change to agents/*/prompt.md
160
+ - Model version upgrade (same prompt, new model)
161
+ - New required output field added to schema
162
+
163
+ # ─────────────────────────────────────────────────────────
164
+ # Tools & Libraries
165
+ # ─────────────────────────────────────────────────────────
166
+ tools:
167
+ schema_validation:
168
+ - name: Ajv (TypeScript/JavaScript)
169
+ package: "ajv"
170
+ strengths: [JSON Schema draft-07 support, fast, minimal]
171
+ use_case: Contract tests for agents with JSON output-schema.json
172
+
173
+ - name: Zod (TypeScript)
174
+ package: "zod"
175
+ strengths: [Type-safe, composable, coercion support]
176
+ use_case: Runtime validation when TypeScript types are available
177
+
178
+ - name: Pydantic (Python)
179
+ package: "pydantic"
180
+ strengths: [Automatic validation from type annotations]
181
+ use_case: Python-based agent contract tests
182
+
183
+ hallucination_evaluation:
184
+ - name: DeepEval
185
+ package: "deepeval"
186
+ features: [Hallucination metric, Contextual Precision, Faithfulness]
187
+ note: Requires LLM judge (adds cost)
188
+
189
+ - name: Ragas
190
+ package: "ragas"
191
+ features: [Answer Grounding, Context Recall, Faithfulness]
192
+ note: Designed for RAG pipelines
193
+
194
+ # ─────────────────────────────────────────────────────────
195
+ # Quality Gates
196
+ # ─────────────────────────────────────────────────────────
197
+ quality_gates:
198
+ - gate: Schema conformance (CI)
199
+ threshold: "100% of valid fixtures pass schema validation"
200
+ enforcement: Block merge
201
+ automated: true
202
+ note: Run in every PR via contract.test.ts
203
+
204
+ - gate: Negative schema rejection (CI)
205
+ threshold: "100% of invalid fixtures are rejected by schema validator"
206
+ enforcement: Block merge
207
+ automated: true
208
+ note: Proves schema is strict enough
209
+
210
+ - gate: Hallucination rate (pre-release)
211
+ threshold: "≤ 5% hallucinated facts on benchmark dataset"
212
+ enforcement: Advisory (escalate to human review if exceeded)
213
+ automated: partial
214
+ note: Run monthly or on model upgrade
215
+
216
+ - gate: Prompt regression (on prompt change)
217
+ threshold: "Schema conformance maintained after prompt change"
218
+ enforcement: Block merge
219
+ automated: true (via contract test re-run)
220
+
221
+ # ─────────────────────────────────────────────────────────
222
+ # Rules
223
+ # ─────────────────────────────────────────────────────────
224
+ rules:
225
+ - id: agent-must-have-output-schema
226
+ trigger: defining or modifying an AI agent
227
+ instruction: >
228
+ Every agent MUST have a declared output-schema.json.
229
+ Agents without a schema cannot be safely composed into pipelines.
230
+ priority: required
231
+
232
+ - id: contract-test-per-agent
233
+ trigger: defining or modifying an AI agent
234
+ instruction: >
235
+ Every agent MUST have a contract.test.ts with at least:
236
+ (1) one valid fixture that passes schema validation
237
+ (2) one invalid fixture that fails schema validation
238
+ priority: required
239
+
240
+ - id: prompt-change-triggers-contract-rerun
241
+ trigger: modifying agents/*/prompt.md
242
+ instruction: >
243
+ Re-run the agent's contract tests with temperature=0 after any
244
+ prompt change. A schema violation in the golden fixture signals regression.
245
+ priority: required
246
+
247
+ - id: model-upgrade-triggers-contract-rerun
248
+ trigger: upgrading the LLM model version used by an agent
249
+ instruction: >
250
+ Run all agent contract tests against the new model.
251
+ Compare output structure, field presence, and enum values against golden.
252
+ priority: required
253
+
254
+ anti_patterns:
255
+ - Defining agents without output-schema.json
256
+ - Only validating JSON.parse (syntax) without schema (semantic) validation
257
+ - Writing contract tests with only valid fixtures (no negative cases)
258
+ - Accepting model upgrades without re-running contract tests
259
+ - Using high temperature (> 0.3) in contract test fixtures (non-deterministic)
260
+
261
+ quick_reference:
262
+ contract_test_checklist: |
263
+ □ agents/<name>/output-schema.json exists
264
+ □ agents/<name>/__tests__/contract.test.ts exists
265
+ □ Valid fixture: all required fields, correct types, enum values
266
+ □ Invalid fixtures: missing required field, wrong type, invalid enum
267
+ □ Schema validator: Ajv (JSON Schema) or Zod (TypeScript)
268
+ □ CI: contract tests run on every PR
269
+ □ Prompt change: re-run contract test, compare against golden
@@ -0,0 +1,250 @@
1
+ # Mock Boundary Standards - AI Optimized
2
+ # Source: core/mock-boundary.md
3
+
4
+ id: mock-boundary
5
+ meta:
6
+ version: "1.0.0"
7
+ updated: "2026-05-04"
8
+ source: core/mock-boundary.md
9
+ description: >
10
+ Rules defining what can and cannot be mocked to prevent hollow tests —
11
+ tests that pass while the real system is broken.
12
+
13
+ # ─────────────────────────────────────────────────────────
14
+ # Core Problem
15
+ # ─────────────────────────────────────────────────────────
16
+ core_problem:
17
+ name: Hollow Test Anti-Pattern
18
+ description: >
19
+ Over-mocking replaces real business logic with test doubles, making the test suite
20
+ a specification of mock behavior rather than system behavior.
21
+ The tests pass in CI while the real system silently fails.
22
+ real_world_example: |
23
+ // SPEC-002.test.ts (VibeOps) — hollow test example
24
+ vi.mock('../../src/runner/agent-runner.js') // Core dependency mocked
25
+ vi.mock('../../src/runner/guardian-hooks.js') // Core dependency mocked
26
+ vi.mock('../../src/runner/prototyper.js') // Core dependency mocked
27
+ vi.mock('../../src/runner/iteration-report.js') // Core dependency mocked
28
+ vi.mock('../../src/memory/memory-store.js') // Core dependency mocked
29
+ vi.mock('node:fs/promises', ...) // I/O mocked
30
+
31
+ // Result: runPipeline() runs but touches ZERO real code.
32
+ // All 8 agent calls are faked. The test proves nothing about pipeline logic.
33
+
34
+ # ─────────────────────────────────────────────────────────
35
+ # Allowed Mocks
36
+ # ─────────────────────────────────────────────────────────
37
+ allowed:
38
+ - category: External HTTP Services
39
+ description: Third-party APIs, LLM providers, payment gateways, email services
40
+ reason: Prevents flaky tests from external dependencies; enables response scenario control
41
+ examples:
42
+ - OpenAI / Anthropic / Grok API
43
+ - Stripe / payment processors
44
+ - SendGrid / email providers
45
+ - External OAuth providers
46
+ implementation: Mock the HTTP client or provider factory; never mock the internal caller
47
+
48
+ - category: Time Functions
49
+ description: Date.now(), new Date(), setTimeout, setInterval
50
+ reason: Makes tests deterministic and enables time-travel scenarios
51
+ examples:
52
+ - "vi.useFakeTimers()"
53
+ - "vi.setSystemTime(new Date('2026-01-01'))"
54
+ note: Always restore real timers after test with vi.useRealTimers()
55
+
56
+ - category: Environment Variables
57
+ description: process.env values
58
+ reason: Tests need different configurations without changing system state
59
+ implementation: Use vi.stubEnv() or process.env assignment in beforeEach/afterEach
60
+
61
+ - category: File System (unit tests only)
62
+ description: fs.readFile, fs.writeFile, fs.stat in UNIT tests
63
+ reason: Avoids slow I/O in fast unit tests
64
+ constraint: >
65
+ Integration tests, flow tests, and E2E tests MUST use real filesystem
66
+ or in-memory FS (memfs) — never vi.mock('node:fs/promises') at those levels
67
+
68
+ - category: Cross-Module Boundaries (with counterpart)
69
+ description: Calls to OTHER modules when testing THIS module's own logic
70
+ reason: Isolates the unit under test from its collaborators
71
+ constraint: >
72
+ A corresponding integration test MUST exist that exercises the real interaction.
73
+ Mock only when the real collaborator has its own test coverage.
74
+ example: |
75
+ // Mocking the DB layer in a service unit test is OK IF:
76
+ // 1. The DB layer has its own integration tests
77
+ // 2. The service test focuses on service logic (not DB behavior)
78
+
79
+ # ─────────────────────────────────────────────────────────
80
+ # Forbidden Mocks
81
+ # ─────────────────────────────────────────────────────────
82
+ forbidden:
83
+ - category: Own Module Core Logic
84
+ description: Mocking the module's OWN functions in the file that tests it
85
+ example: |
86
+ // ❌ Testing pipeline-runner.ts but mocking pipeline-runner itself
87
+ vi.mock('../../src/runner/pipeline-runner.js')
88
+ import { runPipeline } from '../../src/runner/pipeline-runner.js'
89
+ // runPipeline is now a no-op stub — the test proves nothing
90
+ violation_indicator: >
91
+ The mock import path resolves to the same directory as the test file,
92
+ or the mock replaces the primary export being tested.
93
+ fix: Remove the mock and let the real code run; mock only its external dependencies
94
+
95
+ - category: Database Layer in Integration/Flow Tests
96
+ description: Replacing DB calls with in-memory return values in integration or flow tests
97
+ reason: Masks query bugs, schema constraint violations, index issues, and migration errors
98
+ alternative: >
99
+ Use in-memory SQLite (better-sqlite3 / sql.js), test containers,
100
+ or a dedicated test schema — a real database with controlled data
101
+ example: |
102
+ // ❌ Forbidden in integration/flow tests
103
+ vi.mock('../../src/db/client.js', () => ({ query: vi.fn().mockResolvedValue([]) }))
104
+
105
+ // ✅ Correct: use real in-memory DB
106
+ import Database from 'better-sqlite3'
107
+ const testDb = new Database(':memory:')
108
+
109
+ - category: Core Framework Internals
110
+ description: Express/Fastify routing, ORM core (Drizzle/Prisma internals), auth middleware core
111
+ reason: Tests pass while real routing, query building, or auth enforcement is broken
112
+ example: |
113
+ // ❌ Forbidden
114
+ vi.mock('express', () => ({ Router: () => ({ get: vi.fn(), post: vi.fn() }) }))
115
+
116
+ - category: Security Controls
117
+ description: Auth token validators, permission checks, rate limiters, input sanitizers
118
+ reason: Mocking security controls makes tests useless for security validation
119
+ example: |
120
+ // ❌ Forbidden — this test proves nothing about auth
121
+ vi.mock('../../src/auth/middleware.js', () => ({
122
+ requireAuth: (req, res, next) => next() // always passes
123
+ }))
124
+ fix: Use a real test user with a real valid token; test with real auth logic
125
+
126
+ # ─────────────────────────────────────────────────────────
127
+ # Hollow Test Detection Patterns
128
+ # ─────────────────────────────────────────────────────────
129
+ detection_patterns:
130
+ hollow_test_indicators:
131
+ - name: Mock Count Exceeds Import Count
132
+ check: "vi.mock() call count >= number of non-type imports in the test file"
133
+ severity: high
134
+ action: Review all assertions; verify at least one assertion is on actual output
135
+
136
+ - name: Assertions Only on Mock Calls
137
+ check: "All expect() statements use .toHaveBeenCalled() or .toHaveBeenCalledWith()"
138
+ severity: high
139
+ action: Add assertions on actual return values and system state changes
140
+
141
+ - name: More Mock Setup Than Assertions
142
+ check: "Lines of mock setup > lines of expect() assertions"
143
+ severity: medium
144
+ action: Consider if the test is testing behavior or just mock wiring
145
+
146
+ - name: Self-Referential Mock
147
+ check: "A vi.mock() path resolves to the same module being imported as the subject under test"
148
+ severity: critical
149
+ action: Remove the self-mock immediately; it makes the test a no-op
150
+
151
+ ai_generation_warning: >
152
+ AI tools (including this assistant) tend to generate hollow tests because:
153
+ 1. Mocking makes tests compile and pass without requiring real infrastructure
154
+ 2. AI cannot know the full dependency graph at generation time
155
+ When reviewing AI-generated tests, always apply the hollow test indicators above.
156
+
157
+ # ─────────────────────────────────────────────────────────
158
+ # Anti-Patterns
159
+ # ─────────────────────────────────────────────────────────
160
+ anti_patterns:
161
+ - name: Total Mock Isolation
162
+ description: Every import is mocked; test verifies only mock interaction counts
163
+ problem: Tests pass regardless of actual logic correctness
164
+ symptom: Deleting the implementation file doesn't break the test
165
+
166
+ - name: Mock the World
167
+ description: External + internal + database + filesystem all mocked in one test
168
+ problem: Test becomes a specification of mock behavior, not system behavior
169
+
170
+ - name: Mock Without Integration Counterpart
171
+ description: Cross-module mock with no corresponding integration test
172
+ problem: The interaction between modules is never actually exercised
173
+
174
+ - name: Security Mock Bypass
175
+ description: Auth/permission middleware replaced with always-pass stub
176
+ problem: Security regression cannot be detected
177
+
178
+ - name: Database Mock Cascade
179
+ description: DB mock returns hardcoded data, hiding query logic errors
180
+ problem: Schema migrations, wrong predicates, missing joins — all invisible
181
+
182
+ # ─────────────────────────────────────────────────────────
183
+ # Rules
184
+ # ─────────────────────────────────────────────────────────
185
+ rules:
186
+ - id: no-own-module-mock
187
+ trigger: writing any test for a module
188
+ instruction: Never vi.mock() a path that resolves to the module being tested
189
+ priority: required
190
+
191
+ - id: real-db-in-flow-tests
192
+ trigger: writing flow test, integration test, or E2E test
193
+ instruction: >
194
+ Use a real database (in-memory SQLite, test container, or test schema).
195
+ Never mock the DB layer in these test levels.
196
+ priority: required
197
+
198
+ - id: mock-needs-integration-counterpart
199
+ trigger: adding a vi.mock() for a cross-module dependency in a unit test
200
+ instruction: >
201
+ Ensure a corresponding integration test exercises the real interaction.
202
+ Note the counterpart test in a comment: "// integration: see src/__tests__/integration/..."
203
+ priority: required
204
+
205
+ - id: security-no-mock
206
+ trigger: test involves authentication, authorization, or rate limiting
207
+ instruction: >
208
+ Never mock security controls.
209
+ Create a real test user with a real token; exercise the real auth logic.
210
+ priority: required
211
+
212
+ - id: hollow-test-review
213
+ trigger: mock count in test file equals or exceeds non-type import count
214
+ instruction: >
215
+ Apply hollow test indicators checklist before submitting.
216
+ At least one assertion must verify an actual output value (not a mock call).
217
+ priority: required
218
+
219
+ - id: ai-generated-test-mock-review
220
+ trigger: tests are AI-generated
221
+ instruction: >
222
+ AI-generated tests frequently over-mock. Apply all detection_patterns checks.
223
+ If any hollow test indicator triggers, rewrite the mocking strategy.
224
+ priority: required
225
+
226
+ # ─────────────────────────────────────────────────────────
227
+ # Quick Reference
228
+ # ─────────────────────────────────────────────────────────
229
+ quick_reference:
230
+ allowed_mock_summary:
231
+ - "✅ External HTTP/LLM/payment APIs"
232
+ - "✅ Time functions (Date.now, setTimeout)"
233
+ - "✅ Environment variables"
234
+ - "✅ File system — in unit tests only"
235
+ - "✅ Cross-module boundaries — with integration test counterpart"
236
+
237
+ forbidden_mock_summary:
238
+ - "❌ Own module's core logic (self-referential mock)"
239
+ - "❌ Database layer in integration/flow/E2E tests"
240
+ - "❌ HTTP framework internals (Express router, etc.)"
241
+ - "❌ Security controls (auth middleware, permission checks)"
242
+
243
+ checklist: |
244
+ Before submitting test with mocks:
245
+ □ No vi.mock() path matches the module under test
246
+ □ DB layer not mocked in integration/flow tests
247
+ □ Security controls not mocked
248
+ □ Mock count < import count (or justified with comment)
249
+ □ At least one assertion on actual output value (not mock call)
250
+ □ Integration counterpart exists for each cross-module mock