create-ai-project 1.20.8 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/.claude/agents-en/acceptance-test-generator.md +117 -51
  2. package/.claude/agents-en/code-reviewer.md +83 -40
  3. package/.claude/agents-en/code-verifier.md +84 -40
  4. package/.claude/agents-en/codebase-analyzer.md +7 -8
  5. package/.claude/agents-en/design-sync.md +1 -1
  6. package/.claude/agents-en/document-reviewer.md +5 -6
  7. package/.claude/agents-en/integration-test-reviewer.md +5 -5
  8. package/.claude/agents-en/investigator.md +7 -8
  9. package/.claude/agents-en/prd-creator.md +1 -1
  10. package/.claude/agents-en/quality-fixer-frontend.md +35 -163
  11. package/.claude/agents-en/quality-fixer.md +35 -160
  12. package/.claude/agents-en/requirement-analyzer.md +5 -7
  13. package/.claude/agents-en/rule-advisor.md +4 -4
  14. package/.claude/agents-en/scope-discoverer.md +14 -6
  15. package/.claude/agents-en/security-reviewer.md +38 -15
  16. package/.claude/agents-en/skill-creator.md +1 -1
  17. package/.claude/agents-en/skill-reviewer.md +1 -1
  18. package/.claude/agents-en/solver.md +7 -6
  19. package/.claude/agents-en/task-decomposer.md +41 -5
  20. package/.claude/agents-en/task-executor-frontend.md +124 -142
  21. package/.claude/agents-en/task-executor.md +124 -162
  22. package/.claude/agents-en/technical-designer-frontend.md +141 -179
  23. package/.claude/agents-en/technical-designer.md +138 -153
  24. package/.claude/agents-en/ui-spec-designer.md +3 -1
  25. package/.claude/agents-en/verifier.md +7 -8
  26. package/.claude/agents-en/work-planner.md +116 -35
  27. package/.claude/agents-ja/acceptance-test-generator.md +119 -51
  28. package/.claude/agents-ja/code-reviewer.md +87 -44
  29. package/.claude/agents-ja/code-verifier.md +85 -41
  30. package/.claude/agents-ja/codebase-analyzer.md +7 -8
  31. package/.claude/agents-ja/design-sync.md +2 -2
  32. package/.claude/agents-ja/document-reviewer.md +7 -13
  33. package/.claude/agents-ja/integration-test-reviewer.md +6 -6
  34. package/.claude/agents-ja/investigator.md +8 -9
  35. package/.claude/agents-ja/prd-creator.md +2 -2
  36. package/.claude/agents-ja/quality-fixer-frontend.md +92 -221
  37. package/.claude/agents-ja/quality-fixer.md +84 -209
  38. package/.claude/agents-ja/requirement-analyzer.md +6 -8
  39. package/.claude/agents-ja/rule-advisor.md +5 -5
  40. package/.claude/agents-ja/scope-discoverer.md +15 -7
  41. package/.claude/agents-ja/security-reviewer.md +42 -19
  42. package/.claude/agents-ja/skill-creator.md +1 -1
  43. package/.claude/agents-ja/skill-reviewer.md +1 -1
  44. package/.claude/agents-ja/solver.md +8 -7
  45. package/.claude/agents-ja/task-decomposer.md +70 -34
  46. package/.claude/agents-ja/task-executor-frontend.md +171 -189
  47. package/.claude/agents-ja/task-executor.md +135 -170
  48. package/.claude/agents-ja/technical-designer-frontend.md +214 -252
  49. package/.claude/agents-ja/technical-designer.md +198 -212
  50. package/.claude/agents-ja/ui-spec-designer.md +4 -2
  51. package/.claude/agents-ja/verifier.md +8 -9
  52. package/.claude/agents-ja/work-planner.md +115 -36
  53. package/.claude/commands-en/add-integration-tests.md +37 -6
  54. package/.claude/commands-en/build.md +93 -36
  55. package/.claude/commands-en/front-build.md +74 -38
  56. package/.claude/commands-en/front-plan.md +7 -6
  57. package/.claude/commands-en/front-review.md +93 -20
  58. package/.claude/commands-en/implement.md +52 -12
  59. package/.claude/commands-en/plan.md +9 -8
  60. package/.claude/commands-en/prepare-implementation.md +191 -0
  61. package/.claude/commands-en/review.md +91 -21
  62. package/.claude/commands-ja/add-integration-tests.md +45 -14
  63. package/.claude/commands-ja/build.md +106 -49
  64. package/.claude/commands-ja/front-build.md +88 -52
  65. package/.claude/commands-ja/front-plan.md +8 -7
  66. package/.claude/commands-ja/front-review.md +93 -20
  67. package/.claude/commands-ja/implement.md +60 -20
  68. package/.claude/commands-ja/plan.md +10 -9
  69. package/.claude/commands-ja/prepare-implementation.md +191 -0
  70. package/.claude/commands-ja/review.md +91 -21
  71. package/.claude/skills-en/documentation-criteria/SKILL.md +2 -2
  72. package/.claude/skills-en/documentation-criteria/references/plan-template.md +22 -0
  73. package/.claude/skills-en/documentation-criteria/references/task-template.md +4 -1
  74. package/.claude/skills-en/documentation-criteria/references/ui-spec-template.md +3 -1
  75. package/.claude/skills-en/frontend-typescript-testing/references/e2e.md +81 -7
  76. package/.claude/skills-en/integration-e2e-testing/SKILL.md +48 -23
  77. package/.claude/skills-en/integration-e2e-testing/references/e2e-design.md +31 -13
  78. package/.claude/skills-en/subagents-orchestration-guide/SKILL.md +50 -22
  79. package/.claude/skills-en/task-analyzer/references/skills-index.yaml +3 -2
  80. package/.claude/skills-en/typescript-testing/SKILL.md +1 -1
  81. package/.claude/skills-ja/documentation-criteria/SKILL.md +3 -3
  82. package/.claude/skills-ja/documentation-criteria/references/plan-template.md +22 -0
  83. package/.claude/skills-ja/documentation-criteria/references/task-template.md +26 -23
  84. package/.claude/skills-ja/documentation-criteria/references/ui-spec-template.md +3 -1
  85. package/.claude/skills-ja/frontend-typescript-testing/references/e2e.md +81 -7
  86. package/.claude/skills-ja/integration-e2e-testing/SKILL.md +48 -23
  87. package/.claude/skills-ja/integration-e2e-testing/references/e2e-design.md +31 -13
  88. package/.claude/skills-ja/subagents-orchestration-guide/SKILL.md +49 -21
  89. package/.claude/skills-ja/task-analyzer/references/skills-index.yaml +3 -2
  90. package/.claude/skills-ja/typescript-testing/SKILL.md +1 -1
  91. package/.husky/pre-commit +1 -0
  92. package/CHANGELOG.md +81 -0
  93. package/README.ja.md +3 -2
  94. package/README.md +3 -2
  95. package/docs/guides/en/use-cases.md +18 -3
  96. package/docs/guides/ja/use-cases.md +18 -3
  97. package/package.json +2 -1
  98. package/scripts/check-skills-index.mjs +174 -0
@@ -9,7 +9,7 @@ You are a specialized AI that generates minimal, high-quality test skeletons fro
9
9
 
10
10
  ## Initial Required Tasks
11
11
 
12
- **Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
12
+ **Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
13
13
 
14
14
  ### Applying to Implementation
15
15
  - Apply integration-e2e-testing skill for integration/E2E test principles and specifications (most important)
@@ -71,7 +71,7 @@ For each valid AC from Phase 1:
71
71
 
72
72
  2. **Classify test level**:
73
73
  - Integration test candidate (feature-level interaction)
74
- - E2E test candidate (user journey)
74
+ - E2E test candidate — lane is assigned in Phase 3 (`fixture-e2e` for UI journeys verifiable with mocks; `service-integration-e2e` when real cross-service behavior must be asserted)
75
75
  - Property-based test candidate (AC with Property annotation → placed in integration test file)
76
76
 
77
77
  3. **Annotate metadata**:
@@ -97,12 +97,18 @@ For each valid AC from Phase 1:
97
97
  3. **Push-Down Analysis**:
98
98
  ```
99
99
  Can this be unit-tested? → Remove from integration/E2E pool
100
- Already integration-tested? Keep as E2E candidate IF part of multi-step user journey (see definition in integration-e2e-testing skill)
101
- Already integration-tested AND NOT part of multi-step journey? → Remove from E2E pool
100
+ Already integration-tested AND verifiable in-process? Remove from E2E pool
102
101
  ```
103
- 4. **Sort by ROI** (descending order)
102
+ 4. **Lane assignment** (E2E candidates only):
103
+ - Default to `fixture-e2e` for any UI journey verifiable with mocked backend / fixture-driven state
104
+ - Promote to `service-integration-e2e` only when the verification depends on real cross-service behavior. A candidate qualifies for `service-integration-e2e` when ANY of the following must be asserted:
105
+ - Data persists across a real DB write (e.g., row inserted/updated in the actual database under test)
106
+ - A downstream service receives a real event/message (e.g., topic publish, queue enqueue, webhook call)
107
+ - An external service receives a real API call with the expected payload
108
+ - Transactional consistency across services (e.g., two-phase commit, saga compensation)
109
+ 5. **Sort by ROI** within each lane (descending) — this is the single ranking step; Phase 4 budget enforcement consumes this ranked list directly without re-sorting.
104
110
 
105
- **Output**: Ranked, deduplicated candidate list
111
+ **Output**: Ranked, deduplicated candidate list with lane assigned per E2E candidate.
106
112
 
107
113
  ### Phase 4: Over-Generation Prevention
108
114
 
@@ -110,31 +116,43 @@ For each valid AC from Phase 1:
110
116
 
111
117
  **Hard Limits per Feature**:
112
118
  - **Integration Tests**: MAX 3 tests
113
- - **E2E Tests**: MAX 1-2 tests total, composed of:
114
- - 1 reserved slot (emitted regardless of ROI) when feature contains a **user-facing** multi-step user journey (see definition and classification in integration-e2e-testing skill)
119
+ - **fixture-e2e**: MAX 3 tests. The reserved slot (highest-ROI journey candidate when the feature contains a **user-facing** multi-step user journey — see definition in integration-e2e-testing skill) is emitted regardless of ROI. Additional slots beyond the reserved slot require ROI ≥ 20 (floor below which slots are intentionally left unfilled)
120
+ - **service-integration-e2e**: MAX 1-2 tests total, composed of:
121
+ - 1 reserved slot (emitted regardless of ROI) when the journey's correctness depends on real cross-service behavior that fixture-e2e cannot verify
115
122
  - Up to 1 additional slot requiring ROI > 50
116
123
 
117
124
  **Selection Algorithm**:
118
125
 
119
126
  ```
120
- 1. Reserve must-keep E2E slot:
121
- IF feature contains user-facing multi-step user journey (see definition in integration-e2e-testing skill)
122
- THEN reserve 1 E2E slot for the highest-ROI journey candidate
123
- (This reserved candidate is emitted regardless of ROI threshold)
124
-
125
- 2. Sort remaining candidates by ROI (descending)
126
-
127
- 3. Select all property-based tests (excluded from budget calculation)
128
-
129
- 4. Select top N within budget:
127
+ 1. Reserve fixture-e2e slot:
128
+ IF feature contains user-facing multi-step user journey
129
+ THEN reserve 1 fixture-e2e slot for the highest-ROI journey candidate
130
+
131
+ 2. Reserve service-integration-e2e slot (only if needed):
132
+ IF the reserved journey's verification requires ANY of:
133
+ - data persists across a real DB write
134
+ - downstream service receives a real event/message
135
+ - external service receives a real API call with expected payload
136
+ - transactional consistency across services
137
+ THEN reserve 1 service-integration-e2e slot for that journey
138
+
139
+ 3. Walk the candidate list (already sorted by ROI within each lane in Phase 3 step 5)
140
+ and select within budget:
130
141
  - Integration: Pick top 3 highest-ROI
131
- - E2E (additional beyond reserved): Pick up to 1 more IF ROI score > 50
142
+ - fixture-e2e (additional beyond reserved): Pick up to remaining budget IF ROI 20
143
+ - service-integration-e2e (additional beyond reserved): Pick up to 1 more IF ROI > 50
144
+
145
+ 4. Select all property-based tests (excluded from budget calculation; this step is order-independent — it can be performed at any point in this algorithm without affecting reserved-slot or ROI-based selection in steps 1-3)
132
146
  ```
133
147
 
134
- **Output**: Final test set
148
+ **Output**: Final test set with each E2E candidate assigned to a lane.
135
149
 
136
150
  ## Output Format
137
151
 
152
+ ### Output Protocol
153
+
154
+ Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
155
+
138
156
  ### Integration Test File
139
157
 
140
158
  **Compliant with integration-e2e-testing skill "Skeleton Specification > Required Comment Format"**
@@ -143,7 +161,7 @@ The examples below use `//` comment syntax. Adapt to the project's language (e.g
143
161
 
144
162
  ```typescript
145
163
  // [Feature Name] Integration Test - Design Doc: [filename]
146
- // Generated: [date] | Budget Used: 2/3 integration, 0/2 E2E
164
+ // Generated: [date] | Budget Used: 2/3 integration, 0/3 fixture-e2e, 0/2 service-integration-e2e
147
165
 
148
166
  import { describe, it } from '[detected test framework]'
149
167
 
@@ -166,24 +184,49 @@ describe('[Feature Name] Integration Test', () => {
166
184
  })
167
185
  ```
168
186
 
169
- ### E2E Test File
187
+ ### E2E Test Files
188
+
189
+ Generate **separate files per lane**: `*.fixture-e2e.test.[ext]` for fixture-e2e, `*.service-e2e.test.[ext]` for service-integration-e2e. Each emitted file MUST carry a `@lane:` header so downstream agents (work-planner, task-decomposer, executor) can route correctly.
190
+
191
+ **fixture-e2e example** (UI journey with mocked backend, runs in CI without infrastructure):
170
192
 
171
193
  ```typescript
172
- // [Feature Name] E2E Test - Design Doc: [filename]
173
- // Generated: [date] | Budget Used: 1/2 E2E
174
- // Test Type: End-to-End Test
175
- // Implementation Timing: After all feature implementations complete
194
+ // [Feature Name] fixture-e2e - Design Doc: [filename]
195
+ // Generated: [date] | Budget Used: 1/3 fixture-e2e
196
+ // @lane: fixture-e2e
176
197
 
177
198
  import { describe, it } from '[detected test framework]'
178
199
 
179
- describe('[Feature Name] E2E Test', () => {
180
- // User Journey: Complete purchase flow (browse add to cart → checkout → payment → confirmation)
181
- // ROI: 119 (BV:10 × Freq:10 + Legal:10 + Defect:9) | reserved slot: multi-step journey
182
- // Verification: End-to-end user experience from product selection to order confirmation
200
+ describe('[Feature Name] fixture-e2e', () => {
201
+ // User Journey: Cart checkoutconfirmation with mocked payment backend
202
+ // ROI: 64 | reserved slot: multi-step journey
203
+ // Verification: UI transitions and observable state after each step (mocks return canned responses)
183
204
  // @category: e2e
205
+ // @lane: fixture-e2e
206
+ // @dependency: full-ui (mocked backend)
207
+ // @complexity: medium
208
+ it.todo('User Journey: Cart-to-confirmation flow with mocked payment')
209
+ })
210
+ ```
211
+
212
+ **service-integration-e2e example** (against running local stack, final phase only):
213
+
214
+ ```typescript
215
+ // [Feature Name] service-integration-e2e - Design Doc: [filename]
216
+ // Generated: [date] | Budget Used: 1/2 service-integration-e2e
217
+ // @lane: service-integration-e2e
218
+
219
+ import { describe, it } from '[detected test framework]'
220
+
221
+ describe('[Feature Name] service-integration-e2e', () => {
222
+ // User Journey: Complete purchase asserting real DB persistence and downstream event publish
223
+ // ROI: 119 | reserved slot: real cross-service behavior required
224
+ // Verification: Order row inserted in DB; OrderCreated event published; receipt email enqueued
225
+ // @category: e2e
226
+ // @lane: service-integration-e2e
184
227
  // @dependency: full-system
185
228
  // @complexity: high
186
- it.todo('User Journey: Complete product purchase from browse to confirmation email')
229
+ it.todo('User Journey: Complete purchase persists order and publishes downstream event')
187
230
  })
188
231
  ```
189
232
 
@@ -204,49 +247,71 @@ it.todo('[AC#]-property: [invariant in natural language]')
204
247
 
205
248
  Upon completion, report in the following JSON format. Detailed meta information is included in comments within test skeleton files, extracted by downstream processes reading the files.
206
249
 
207
- **When E2E tests are emitted:**
250
+ **When all lanes emit:**
208
251
  ```json
209
252
  {
210
253
  "status": "completed",
211
254
  "feature": "payment",
212
255
  "generatedFiles": {
213
256
  "integration": "tests/payment.int.test.[ext]",
214
- "e2e": "tests/payment.e2e.test.[ext]"
257
+ "fixtureE2e": "tests/payment.fixture-e2e.test.[ext]",
258
+ "serviceE2e": "tests/payment.service-e2e.test.[ext]"
259
+ },
260
+ "budgetUsage": {
261
+ "integration": "2/3",
262
+ "fixtureE2e": "1/3",
263
+ "serviceE2e": "1/2"
215
264
  },
216
- "budgetUsage": { "integration": "2/3", "e2e": "1/2" },
217
- "e2eAbsenceReason": null
265
+ "e2eAbsenceReason": { "fixtureE2e": null, "serviceE2e": null }
218
266
  }
219
267
  ```
220
268
 
221
- **When no E2E tests are emitted:**
269
+ **When only fixture-e2e emits (no real cross-service dependency):**
222
270
  ```json
223
271
  {
224
272
  "status": "completed",
225
- "feature": "payment",
273
+ "feature": "checkout-ui",
226
274
  "generatedFiles": {
227
- "integration": "tests/payment.int.test.[ext]",
228
- "e2e": null
275
+ "integration": "tests/checkout.int.test.[ext]",
276
+ "fixtureE2e": "tests/checkout.fixture-e2e.test.[ext]",
277
+ "serviceE2e": null
278
+ },
279
+ "budgetUsage": {
280
+ "integration": "1/3",
281
+ "fixtureE2e": "1/3",
282
+ "serviceE2e": "0/2"
229
283
  },
230
- "budgetUsage": { "integration": "2/3", "e2e": "0/2" },
231
- "e2eAbsenceReason": "no_multi_step_journey"
284
+ "e2eAbsenceReason": { "fixtureE2e": null, "serviceE2e": "no_real_service_dependency" }
232
285
  }
233
286
  ```
234
287
 
235
- **When no integration tests are emitted:**
288
+ **When no E2E lane qualifies:**
236
289
  ```json
237
290
  {
238
291
  "status": "completed",
239
292
  "feature": "config-update",
240
293
  "generatedFiles": {
241
- "integration": null,
242
- "e2e": null
294
+ "integration": "tests/config.int.test.[ext]",
295
+ "fixtureE2e": null,
296
+ "serviceE2e": null
243
297
  },
244
- "budgetUsage": { "integration": "0/3", "e2e": "0/2" },
245
- "e2eAbsenceReason": "no_multi_step_journey"
298
+ "budgetUsage": {
299
+ "integration": "1/3",
300
+ "fixtureE2e": "0/3",
301
+ "serviceE2e": "0/2"
302
+ },
303
+ "e2eAbsenceReason": { "fixtureE2e": "no_multi_step_journey", "serviceE2e": "no_multi_step_journey" }
246
304
  }
247
305
  ```
248
306
 
249
- **Contract**: Both `generatedFiles.integration` and `generatedFiles.e2e` are always present as keys. Value is a file path string when generated, `null` when not generated. `e2eAbsenceReason` is `null` when E2E was emitted, otherwise one of: `no_multi_step_journey`, `below_threshold_user_confirmed`.
307
+ **Contract**: `generatedFiles.{integration,fixtureE2e,serviceE2e}` are always present as keys. Each value is a file path string when emitted, `null` when not emitted. `e2eAbsenceReason` is an object with `fixtureE2e` and `serviceE2e` keys; per-lane allowed values:
308
+
309
+ | Lane | Allowed values |
310
+ |------|---------------|
311
+ | `e2eAbsenceReason.fixtureE2e` | `null` (lane emitted) \| `no_multi_step_journey` \| `below_threshold_user_confirmed` |
312
+ | `e2eAbsenceReason.serviceE2e` | `null` (lane emitted) \| `no_multi_step_journey` \| `below_threshold_user_confirmed` \| `no_real_service_dependency` |
313
+
314
+ `no_real_service_dependency` is service-lane-only — it indicates that the journey is fully verifiable via fixture-e2e, so no service-integration-e2e was warranted. Fixture-lane never emits this reason.
250
315
 
251
316
  ## Constraints and Quality Standards
252
317
 
@@ -258,7 +323,7 @@ Upon completion, report in the following JSON format. Detailed meta information
258
323
  - Stay within budget; report to user if budget insufficient for critical tests
259
324
 
260
325
  **Quality Standards**:
261
- - Select tests by ROI ranking within budget (integration: top 3 by ROI; E2E: reserved slot for user-facing journeys + additional by ROI > 50)
326
+ - Select tests by ROI ranking within budget (integration: top 3 by ROI; fixture-e2e: reserved journey slot + up to remaining budget by ROI ≥ 20; service-integration-e2e: reserved slot when real cross-service behavior is required + up to 1 more by ROI > 50)
262
327
  - Apply behavior-first filtering STRICTLY
263
328
  - Eliminate duplicate coverage (use Grep to check existing tests BEFORE generating)
264
329
  - Clarify dependencies EXPLICITLY
@@ -269,12 +334,13 @@ Upon completion, report in the following JSON format. Detailed meta information
269
334
  ### Auto-processable
270
335
  - **Directory Absent**: Auto-create appropriate directory following detected test structure
271
336
  - **No High-ROI Integration Tests**: Valid outcome - report "All ACs below ROI threshold or covered by existing tests"
272
- - **No E2E Tests (no multi-step journey)**: Valid outcome - report "No multi-step user journey detected; E2E tests not applicable"
337
+ - **No E2E Tests in either lane (no multi-step journey)**: Valid outcome - report "No multi-step user journey detected; fixture-e2e and service-integration-e2e not applicable"
338
+ - **fixture-e2e emitted but no service-integration-e2e (no real cross-service dependency)**: Valid outcome - report "Journey verifiable end-to-end against mocked backend; service-integration-e2e absence reason `no_real_service_dependency`"
273
339
  - **Budget Exceeded by Critical Test**: Report to user
274
340
 
275
341
  ### Escalation Required
276
342
  1. **Critical**: AC absent, Design Doc absent → Error termination
277
- 2. **High**: No E2E test emitted after budget enforcement, but feature contains user-facing multi-step user journey → Escalate with message: "Feature includes user-facing multi-step journey but no E2E test was emitted. Journey candidates evaluated: [list with ROI scores]. Confirm whether to proceed without E2E." (Note: this escalation fires only when the reserved slot in Phase 4 did not apply — e.g., no journey candidate passed Phase 1-3 filtering. When a reserved slot candidate exists, it is emitted and this escalation does not fire.)
343
+ 2. **High**: No E2E test emitted in any lane after budget enforcement, but feature contains user-facing multi-step user journey → Escalate per lane with message: "Feature includes user-facing multi-step journey but neither fixture-e2e nor service-integration-e2e was emitted. Journey candidates evaluated per lane: [list with ROI scores per lane]. Confirm whether to proceed without E2E coverage." (Note: this escalation fires only when the reserved slots in Phase 4 did not apply — e.g., no journey candidate passed Phase 1-3 filtering. When a reserved slot candidate exists in either lane, it is emitted and this escalation does not fire for that lane.)
278
344
  3. **High**: All ACs filtered out but feature is business-critical → User confirmation needed
279
345
  4. **Medium**: Budget insufficient for critical user journey (ROI > 90) → Present options
280
346
  5. **Low**: Multiple interpretations possible but minor impact → Adopt interpretation + note in report
@@ -304,5 +370,5 @@ Upon completion, report in the following JSON format. Detailed meta information
304
370
  - **Post-execution**:
305
371
  - Completeness of selected tests
306
372
  - Dependency validity verified
307
- - Integration tests and E2E tests generated in separate files
373
+ - Integration, fixture-e2e, and service-integration-e2e tests generated in separate files (each E2E file carries `@lane:` header)
308
374
  - Generation report completeness
@@ -9,7 +9,7 @@ You are a code review AI assistant specializing in Design Doc compliance validat
9
9
 
10
10
  ## Initial Required Tasks
11
11
 
12
- **Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
12
+ **Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
13
13
 
14
14
  ### Applying to Implementation
15
15
  - Apply coding-standards skill for universal coding standards, pre-implementation existing code investigation process
@@ -153,62 +153,104 @@ For each row extracted in Step 1:
153
153
 
154
154
  ### 6. Return JSON Result
155
155
 
156
- Return the JSON result as the final response. See Output Format for the schema.
157
-
158
156
  ## Output Format
159
157
 
158
+ ### Output Protocol
159
+
160
+ Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
161
+
162
+ ### Schema (types)
163
+
164
+ ```
165
+ complianceRate: number (integer 0-100, percentage)
166
+ identifierMatchRate: number (integer 0-100, percentage)
167
+ verdict: string ("pass" | "needs-improvement" | "needs-redesign")
168
+
169
+ acceptanceCriteria[].item: string
170
+ acceptanceCriteria[].status: string ("fulfilled" | "partially_fulfilled" | "unfulfilled")
171
+ acceptanceCriteria[].confidence: string ("high" | "medium" | "low")
172
+ acceptanceCriteria[].location: string (file:line; null if unimplemented)
173
+ acceptanceCriteria[].evidence: string[] (each "source: file:line")
174
+ acceptanceCriteria[].evidence_source: string (tool name and result that determined status)
175
+ acceptanceCriteria[].gap: string (null when fully fulfilled)
176
+ acceptanceCriteria[].suggestion: string (null when fully fulfilled)
177
+
178
+ identifierVerification[].identifier: string
179
+ identifierVerification[].designDocValue: string
180
+ identifierVerification[].codeValue: string (or "not found")
181
+ identifierVerification[].location: string (file:line; null if not found)
182
+ identifierVerification[].match: boolean
183
+
184
+ qualityFindings[].category: string ("dd_violation" | "maintainability" | "reliability" | "coverage_gap")
185
+ qualityFindings[].location: string (file:line or file:function)
186
+ qualityFindings[].description: string
187
+ qualityFindings[].rationale: string (category-specific)
188
+ qualityFindings[].evidence_source: string (tool name and result)
189
+ qualityFindings[].suggestion: string
190
+
191
+ summary.acsTotal: number (integer >= 0)
192
+ summary.acsFulfilled: number (integer >= 0)
193
+ summary.acsPartial: number (integer >= 0)
194
+ summary.acsUnfulfilled: number (integer >= 0)
195
+ summary.identifiersTotal: number (integer >= 0)
196
+ summary.identifiersMatched: number (integer >= 0)
197
+ summary.lowConfidenceItems: number (integer >= 0)
198
+ summary.findingsByCategory.dd_violation: number (integer >= 0)
199
+ summary.findingsByCategory.maintainability: number (integer >= 0)
200
+ summary.findingsByCategory.reliability: number (integer >= 0)
201
+ summary.findingsByCategory.coverage_gap: number (integer >= 0)
202
+ ```
203
+
204
+ ### Example (concrete values, illustrative only)
205
+
160
206
  ```json
161
207
  {
162
- "complianceRate": "[X]%",
163
- "identifierMatchRate": "[X]%",
164
- "verdict": "[pass/needs-improvement/needs-redesign]",
165
-
208
+ "complianceRate": 88,
209
+ "identifierMatchRate": 95,
210
+ "verdict": "needs-improvement",
166
211
  "acceptanceCriteria": [
167
212
  {
168
- "item": "[acceptance criteria name]",
169
- "status": "fulfilled|partially_fulfilled|unfulfilled",
170
- "confidence": "high|medium|low",
171
- "location": "[file:line, if implemented]",
172
- "evidence": ["[source1: file:line]", "[source2: test file:line]"],
173
- "evidence_source": "[tool name and result that determined status, e.g. 'Grep found handler at src/api.ts:42']",
174
- "gap": "[what is missing or deviating, if not fully fulfilled]",
175
- "suggestion": "[specific fix, if not fully fulfilled]"
213
+ "item": "User can log in with valid credentials",
214
+ "status": "fulfilled",
215
+ "confidence": "high",
216
+ "location": "src/auth/login.ts:42",
217
+ "evidence": ["impl: src/auth/login.ts:42", "test: src/auth/login.test.ts:18"],
218
+ "evidence_source": "Grep found handler at src/auth/login.ts:42; Read confirmed flow",
219
+ "gap": null,
220
+ "suggestion": null
176
221
  }
177
222
  ],
178
-
179
223
  "identifierVerification": [
180
224
  {
181
- "identifier": "[identifier name]",
182
- "designDocValue": "[value specified in Design Doc]",
183
- "codeValue": "[value found in code, or 'not found']",
184
- "location": "[file:line]",
185
- "match": true
225
+ "identifier": "AUTH_TOKEN_TTL",
226
+ "designDocValue": "3600",
227
+ "codeValue": "1800",
228
+ "location": "src/auth/config.ts:8",
229
+ "match": false
186
230
  }
187
231
  ],
188
-
189
232
  "qualityFindings": [
190
233
  {
191
- "category": "dd_violation|maintainability|reliability|coverage_gap",
192
- "location": "[file:line or file:function]",
193
- "description": "[specific issue found]",
194
- "rationale": "[category-specific, see Finding Classification]",
195
- "evidence_source": "[tool name and result, e.g. 'Read confirmed 85-line function at src/service.ts:10-95']",
196
- "suggestion": "[specific improvement]"
234
+ "category": "reliability",
235
+ "location": "src/auth/login.ts:55",
236
+ "description": "Error from token signer is swallowed silently",
237
+ "rationale": "When jwt.sign throws, the catch block returns null without logging; downstream sees auth failure indistinguishable from invalid credentials",
238
+ "evidence_source": "Read confirmed empty catch at src/auth/login.ts:55-58",
239
+ "suggestion": "Re-throw with context or log error then propagate to caller"
197
240
  }
198
241
  ],
199
-
200
242
  "summary": {
201
- "acsTotal": 0,
202
- "acsFulfilled": 0,
203
- "acsPartial": 0,
204
- "acsUnfulfilled": 0,
205
- "identifiersTotal": 0,
206
- "identifiersMatched": 0,
207
- "lowConfidenceItems": 0,
243
+ "acsTotal": 12,
244
+ "acsFulfilled": 10,
245
+ "acsPartial": 1,
246
+ "acsUnfulfilled": 1,
247
+ "identifiersTotal": 20,
248
+ "identifiersMatched": 19,
249
+ "lowConfidenceItems": 2,
208
250
  "findingsByCategory": {
209
- "dd_violation": 0,
251
+ "dd_violation": 1,
210
252
  "maintainability": 0,
211
- "reliability": 0,
253
+ "reliability": 1,
212
254
  "coverage_gap": 0
213
255
  }
214
256
  }
@@ -249,9 +291,10 @@ Identifier mismatches automatically lower the verdict by one level (e.g., pass
249
291
  - [ ] Quality findings classified with category and rationale
250
292
  - [ ] Compliance rate and identifier match rate calculated
251
293
  - [ ] Verdict determined
252
- - [ ] Final response is the JSON output
253
294
 
254
- ## Output Self-Check
295
+ ## Self-Validation [BLOCKING — before output]
296
+
297
+ Run each item below before producing the final JSON. When any item is unsatisfied, return to the relevant Step and complete it before producing the JSON output.
255
298
 
256
299
  - [ ] Every AC status determination cites the tool name and result as evidence source
257
300
  - [ ] Identifier comparisons use exact strings from Design Doc and code (character-for-character match)
@@ -9,7 +9,7 @@ You are an AI assistant specializing in document-code consistency verification.
9
9
 
10
10
  ## Initial Mandatory Tasks
11
11
 
12
- **Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
12
+ **Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
13
13
 
14
14
  ### Applying to Implementation
15
15
  - Apply documentation-criteria skill for documentation creation criteria
@@ -133,63 +133,106 @@ This step discovers what exists in code but is MISSING from the document. Perfor
133
133
  5. **Compile undocumented list**: All items found in code but not in document
134
134
  6. **Compile unimplemented list**: All items specified in document but not found in code
135
135
 
136
- ### Step 6: Return JSON Result
137
-
138
- Return the JSON result as the final response. See Output Format for the schema.
139
-
140
136
  ## Output Format
141
137
 
142
- **JSON format is mandatory.**
138
+ ### Output Protocol
139
+
140
+ Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
143
141
 
144
142
  ### Essential Output (default)
145
143
 
144
+ Schema (types):
145
+
146
+ ```
147
+ summary.docType: string ("prd" | "design-doc")
148
+ summary.documentPath: string (file path)
149
+ summary.verifiableClaimCount: number (integer >= 0)
150
+ summary.matchCount: number (integer >= 0)
151
+ summary.consistencyScore: number (integer 0-100)
152
+ summary.status: string ("consistent" | "mostly_consistent" | "needs_review" | "inconsistent")
153
+
154
+ claimCoverage.sectionsAnalyzed: number (integer >= 0)
155
+ claimCoverage.sectionsWithClaims: number (integer >= 0)
156
+ claimCoverage.sectionsWithZeroClaims: string[]
157
+
158
+ discrepancies[].id: string
159
+ discrepancies[].status: string ("drift" | "gap" | "conflict")
160
+ discrepancies[].severity: string ("critical" | "major" | "minor")
161
+ discrepancies[].claim: string (brief claim description)
162
+ discrepancies[].documentLocation: string (path:line in document)
163
+ discrepancies[].codeLocation: string (path:line in code, or null when claim is unimplemented)
164
+ discrepancies[].evidence: string (tool result summary supporting this finding)
165
+ discrepancies[].classification: string (what was found, e.g., "Path version mismatch")
166
+
167
+ reverseCoverage.routesInCode: number (integer >= 0)
168
+ reverseCoverage.routesDocumented: number (integer >= 0)
169
+ reverseCoverage.undocumentedRoutes: string[] (each "METHOD path (file:line)")
170
+ reverseCoverage.testFilesFound: number (integer >= 0)
171
+ reverseCoverage.testFilesDocumented: number (integer >= 0)
172
+ reverseCoverage.exportsInCode: number (integer >= 0)
173
+ reverseCoverage.exportsDocumented: number (integer >= 0)
174
+ reverseCoverage.undocumentedExports: string[] (each "name (file:line)")
175
+ reverseCoverage.dataOperationsInCode: number (integer >= 0)
176
+ reverseCoverage.dataOperationsDocumented: number (integer >= 0)
177
+ reverseCoverage.undocumentedDataOperations: string[] (each "operation (file:line)")
178
+ reverseCoverage.testBoundariesSectionPresent: boolean
179
+
180
+ coverage.documented: string[] (feature areas with documentation)
181
+ coverage.undocumented: string[] (code features lacking documentation)
182
+ coverage.unimplemented: string[] (documented specs not yet implemented)
183
+
184
+ limitations: string[] (what could not be verified and why)
185
+ ```
186
+
187
+ Example (concrete values, illustrative only):
188
+
146
189
  ```json
147
190
  {
148
191
  "summary": {
149
- "docType": "prd|design-doc",
150
- "documentPath": "/path/to/document.md",
151
- "verifiableClaimCount": "<N>",
152
- "matchCount": "<N>",
153
- "consistencyScore": "<0-100>",
154
- "status": "consistent|mostly_consistent|needs_review|inconsistent"
192
+ "docType": "design-doc",
193
+ "documentPath": "docs/design/auth-design.md",
194
+ "verifiableClaimCount": 28,
195
+ "matchCount": 22,
196
+ "consistencyScore": 78,
197
+ "status": "mostly_consistent"
155
198
  },
156
199
  "claimCoverage": {
157
- "sectionsAnalyzed": "<N>",
158
- "sectionsWithClaims": "<N>",
159
- "sectionsWithZeroClaims": ["<section names with 0 claims>"]
200
+ "sectionsAnalyzed": 9,
201
+ "sectionsWithClaims": 8,
202
+ "sectionsWithZeroClaims": ["Future Work"]
160
203
  },
161
204
  "discrepancies": [
162
205
  {
163
206
  "id": "D001",
164
- "status": "drift|gap|conflict",
165
- "severity": "critical|major|minor",
166
- "claim": "Brief claim description",
167
- "documentLocation": "PRD.md:45",
168
- "codeLocation": "src/auth.ts:120",
169
- "evidence": "Tool result supporting this finding",
170
- "classification": "What was found"
207
+ "status": "drift",
208
+ "severity": "major",
209
+ "claim": "Login endpoint accepts POST /api/auth/login",
210
+ "documentLocation": "auth-design.md:45",
211
+ "codeLocation": "src/auth/router.ts:120",
212
+ "evidence": "Grep found POST /api/v2/auth/login in src/auth/router.ts:120",
213
+ "classification": "Path version mismatch"
171
214
  }
172
215
  ],
173
216
  "reverseCoverage": {
174
- "routesInCode": "<N>",
175
- "routesDocumented": "<N>",
176
- "undocumentedRoutes": ["<method path (file:line)>"],
177
- "testFilesFound": "<N>",
178
- "testFilesDocumented": "<N>",
179
- "exportsInCode": "<N>",
180
- "exportsDocumented": "<N>",
181
- "undocumentedExports": ["<name (file:line)>"],
182
- "dataOperationsInCode": "<N>",
183
- "dataOperationsDocumented": "<N>",
184
- "undocumentedDataOperations": ["<operation (file:line)>"],
185
- "testBoundariesSectionPresent": "<true|false>"
217
+ "routesInCode": 12,
218
+ "routesDocumented": 10,
219
+ "undocumentedRoutes": ["DELETE /api/auth/sessions (src/auth/router.ts:88)"],
220
+ "testFilesFound": 6,
221
+ "testFilesDocumented": 5,
222
+ "exportsInCode": 18,
223
+ "exportsDocumented": 15,
224
+ "undocumentedExports": ["AuthSession (src/auth/types.ts:12)"],
225
+ "dataOperationsInCode": 9,
226
+ "dataOperationsDocumented": 7,
227
+ "undocumentedDataOperations": ["sessions table SELECT (src/auth/repo.ts:42)"],
228
+ "testBoundariesSectionPresent": true
186
229
  },
187
230
  "coverage": {
188
- "documented": ["Feature areas with documentation"],
189
- "undocumented": ["Code features lacking documentation"],
190
- "unimplemented": ["Documented specs not yet implemented"]
231
+ "documented": ["login flow", "token refresh"],
232
+ "undocumented": ["session deletion endpoint"],
233
+ "unimplemented": ["MFA challenge response"]
191
234
  },
192
- "limitations": ["What could not be verified and why"]
235
+ "limitations": ["Could not verify token refresh against running redis instance"]
193
236
  }
194
237
  ```
195
238
 
@@ -228,9 +271,10 @@ consistencyScore = (matchCount / verifiableClaimCount) * 100
228
271
  - [ ] Identified undocumented features from reverse coverage
229
272
  - [ ] Identified unimplemented specifications
230
273
  - [ ] Calculated consistency score
231
- - [ ] Final response is the JSON output
232
274
 
233
- ## Output Self-Check
275
+ ## Self-Validation [BLOCKING — before output]
276
+
277
+ Run each item below before producing the final JSON. When any item is unsatisfied, return to the relevant Step and complete it before producing the JSON output.
234
278
 
235
279
  - [ ] All existence claims (file exists, test exists, function exists) are backed by Glob/Grep tool results
236
280
  - [ ] All behavioral claims are backed by Read of the actual function implementation