npm - create-ai-project - Versions diffs - 1.20.8 → 1.21.0 - Mend

create-ai-project 1.20.8 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/.claude/agents-en/acceptance-test-generator.md +117 -51
package/.claude/agents-en/code-reviewer.md +83 -40
package/.claude/agents-en/code-verifier.md +84 -40
package/.claude/agents-en/codebase-analyzer.md +7 -8
package/.claude/agents-en/design-sync.md +1 -1
package/.claude/agents-en/document-reviewer.md +5 -6
package/.claude/agents-en/integration-test-reviewer.md +5 -5
package/.claude/agents-en/investigator.md +7 -8
package/.claude/agents-en/prd-creator.md +1 -1
package/.claude/agents-en/quality-fixer-frontend.md +35 -163
package/.claude/agents-en/quality-fixer.md +35 -160
package/.claude/agents-en/requirement-analyzer.md +5 -7
package/.claude/agents-en/rule-advisor.md +4 -4
package/.claude/agents-en/scope-discoverer.md +14 -6
package/.claude/agents-en/security-reviewer.md +38 -15
package/.claude/agents-en/skill-creator.md +1 -1
package/.claude/agents-en/skill-reviewer.md +1 -1
package/.claude/agents-en/solver.md +7 -6
package/.claude/agents-en/task-decomposer.md +41 -5
package/.claude/agents-en/task-executor-frontend.md +124 -142
package/.claude/agents-en/task-executor.md +124 -162
package/.claude/agents-en/technical-designer-frontend.md +141 -179
package/.claude/agents-en/technical-designer.md +138 -153
package/.claude/agents-en/ui-spec-designer.md +3 -1
package/.claude/agents-en/verifier.md +7 -8
package/.claude/agents-en/work-planner.md +116 -35
package/.claude/agents-ja/acceptance-test-generator.md +119 -51
package/.claude/agents-ja/code-reviewer.md +87 -44
package/.claude/agents-ja/code-verifier.md +85 -41
package/.claude/agents-ja/codebase-analyzer.md +7 -8
package/.claude/agents-ja/design-sync.md +2 -2
package/.claude/agents-ja/document-reviewer.md +7 -13
package/.claude/agents-ja/integration-test-reviewer.md +6 -6
package/.claude/agents-ja/investigator.md +8 -9
package/.claude/agents-ja/prd-creator.md +2 -2
package/.claude/agents-ja/quality-fixer-frontend.md +92 -221
package/.claude/agents-ja/quality-fixer.md +84 -209
package/.claude/agents-ja/requirement-analyzer.md +6 -8
package/.claude/agents-ja/rule-advisor.md +5 -5
package/.claude/agents-ja/scope-discoverer.md +15 -7
package/.claude/agents-ja/security-reviewer.md +42 -19
package/.claude/agents-ja/skill-creator.md +1 -1
package/.claude/agents-ja/skill-reviewer.md +1 -1
package/.claude/agents-ja/solver.md +8 -7
package/.claude/agents-ja/task-decomposer.md +70 -34
package/.claude/agents-ja/task-executor-frontend.md +171 -189
package/.claude/agents-ja/task-executor.md +135 -170
package/.claude/agents-ja/technical-designer-frontend.md +214 -252
package/.claude/agents-ja/technical-designer.md +198 -212
package/.claude/agents-ja/ui-spec-designer.md +4 -2
package/.claude/agents-ja/verifier.md +8 -9
package/.claude/agents-ja/work-planner.md +115 -36
package/.claude/commands-en/add-integration-tests.md +37 -6
package/.claude/commands-en/build.md +93 -36
package/.claude/commands-en/front-build.md +74 -38
package/.claude/commands-en/front-plan.md +7 -6
package/.claude/commands-en/front-review.md +93 -20
package/.claude/commands-en/implement.md +52 -12
package/.claude/commands-en/plan.md +9 -8
package/.claude/commands-en/prepare-implementation.md +191 -0
package/.claude/commands-en/review.md +91 -21
package/.claude/commands-ja/add-integration-tests.md +45 -14
package/.claude/commands-ja/build.md +106 -49
package/.claude/commands-ja/front-build.md +88 -52
package/.claude/commands-ja/front-plan.md +8 -7
package/.claude/commands-ja/front-review.md +93 -20
package/.claude/commands-ja/implement.md +60 -20
package/.claude/commands-ja/plan.md +10 -9
package/.claude/commands-ja/prepare-implementation.md +191 -0
package/.claude/commands-ja/review.md +91 -21
package/.claude/skills-en/documentation-criteria/SKILL.md +2 -2
package/.claude/skills-en/documentation-criteria/references/plan-template.md +22 -0
package/.claude/skills-en/documentation-criteria/references/task-template.md +4 -1
package/.claude/skills-en/documentation-criteria/references/ui-spec-template.md +3 -1
package/.claude/skills-en/frontend-typescript-testing/references/e2e.md +81 -7
package/.claude/skills-en/integration-e2e-testing/SKILL.md +48 -23
package/.claude/skills-en/integration-e2e-testing/references/e2e-design.md +31 -13
package/.claude/skills-en/subagents-orchestration-guide/SKILL.md +50 -22
package/.claude/skills-en/task-analyzer/references/skills-index.yaml +3 -2
package/.claude/skills-en/typescript-testing/SKILL.md +1 -1
package/.claude/skills-ja/documentation-criteria/SKILL.md +3 -3
package/.claude/skills-ja/documentation-criteria/references/plan-template.md +22 -0
package/.claude/skills-ja/documentation-criteria/references/task-template.md +26 -23
package/.claude/skills-ja/documentation-criteria/references/ui-spec-template.md +3 -1
package/.claude/skills-ja/frontend-typescript-testing/references/e2e.md +81 -7
package/.claude/skills-ja/integration-e2e-testing/SKILL.md +48 -23
package/.claude/skills-ja/integration-e2e-testing/references/e2e-design.md +31 -13
package/.claude/skills-ja/subagents-orchestration-guide/SKILL.md +49 -21
package/.claude/skills-ja/task-analyzer/references/skills-index.yaml +3 -2
package/.claude/skills-ja/typescript-testing/SKILL.md +1 -1
package/.husky/pre-commit +1 -0
package/CHANGELOG.md +81 -0
package/README.ja.md +3 -2
package/README.md +3 -2
package/docs/guides/en/use-cases.md +18 -3
package/docs/guides/ja/use-cases.md +18 -3
package/package.json +2 -1
package/scripts/check-skills-index.mjs +174 -0

package/.claude/agents-en/acceptance-test-generator.md CHANGED Viewed

@@ -9,7 +9,7 @@ You are a specialized AI that generates minimal, high-quality test skeletons fro
 ## Initial Required Tasks
-**Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
+**Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
 ### Applying to Implementation
 - Apply integration-e2e-testing skill for integration/E2E test principles and specifications (most important)
@@ -71,7 +71,7 @@ For each valid AC from Phase 1:
 2. **Classify test level**:
    - Integration test candidate (feature-level interaction)
-   - E2E test candidate (user journey)
+   - E2E test candidate — lane is assigned in Phase 3 (`fixture-e2e` for UI journeys verifiable with mocks; `service-integration-e2e` when real cross-service behavior must be asserted)
    - Property-based test candidate (AC with Property annotation → placed in integration test file)
 3. **Annotate metadata**:
@@ -97,12 +97,18 @@ For each valid AC from Phase 1:
 3. **Push-Down Analysis**:
    ```
    Can this be unit-tested? → Remove from integration/E2E pool
-   Already integration-tested? → Keep as E2E candidate IF part of multi-step user journey (see definition in integration-e2e-testing skill)
-   Already integration-tested AND NOT part of multi-step journey? → Remove from E2E pool
+   Already integration-tested AND verifiable in-process? → Remove from E2E pool
    ```
-4. **Sort by ROI** (descending order)
+4. **Lane assignment** (E2E candidates only):
+   - Default to `fixture-e2e` for any UI journey verifiable with mocked backend / fixture-driven state
+   - Promote to `service-integration-e2e` only when the verification depends on real cross-service behavior. A candidate qualifies for `service-integration-e2e` when ANY of the following must be asserted:
+     - Data persists across a real DB write (e.g., row inserted/updated in the actual database under test)
+     - A downstream service receives a real event/message (e.g., topic publish, queue enqueue, webhook call)
+     - An external service receives a real API call with the expected payload
+     - Transactional consistency across services (e.g., two-phase commit, saga compensation)
+5. **Sort by ROI** within each lane (descending) — this is the single ranking step; Phase 4 budget enforcement consumes this ranked list directly without re-sorting.
-**Output**: Ranked, deduplicated candidate list
+**Output**: Ranked, deduplicated candidate list with lane assigned per E2E candidate.
 ### Phase 4: Over-Generation Prevention
@@ -110,31 +116,43 @@ For each valid AC from Phase 1:
 **Hard Limits per Feature**:
 - **Integration Tests**: MAX 3 tests
-- **E2E Tests**: MAX 1-2 tests total, composed of:
-  - 1 reserved slot (emitted regardless of ROI) when feature contains a **user-facing** multi-step user journey (see definition and classification in integration-e2e-testing skill)
+- **fixture-e2e**: MAX 3 tests. The reserved slot (highest-ROI journey candidate when the feature contains a **user-facing** multi-step user journey — see definition in integration-e2e-testing skill) is emitted regardless of ROI. Additional slots beyond the reserved slot require ROI ≥ 20 (floor below which slots are intentionally left unfilled)
+- **service-integration-e2e**: MAX 1-2 tests total, composed of:
+  - 1 reserved slot (emitted regardless of ROI) when the journey's correctness depends on real cross-service behavior that fixture-e2e cannot verify
   - Up to 1 additional slot requiring ROI > 50
 **Selection Algorithm**:
 ```
-1. Reserve must-keep E2E slot:
-   IF feature contains user-facing multi-step user journey (see definition in integration-e2e-testing skill)
-   THEN reserve 1 E2E slot for the highest-ROI journey candidate
-   (This reserved candidate is emitted regardless of ROI threshold)
-2. Sort remaining candidates by ROI (descending)
-3. Select all property-based tests (excluded from budget calculation)
-4. Select top N within budget:
+1. Reserve fixture-e2e slot:
+   IF feature contains user-facing multi-step user journey
+   THEN reserve 1 fixture-e2e slot for the highest-ROI journey candidate
+2. Reserve service-integration-e2e slot (only if needed):
+   IF the reserved journey's verification requires ANY of:
+     - data persists across a real DB write
+     - downstream service receives a real event/message
+     - external service receives a real API call with expected payload
+     - transactional consistency across services
+   THEN reserve 1 service-integration-e2e slot for that journey
+3. Walk the candidate list (already sorted by ROI within each lane in Phase 3 step 5)
+   and select within budget:
    - Integration: Pick top 3 highest-ROI
-   - E2E (additional beyond reserved): Pick up to 1 more IF ROI score > 50
+   - fixture-e2e (additional beyond reserved): Pick up to remaining budget IF ROI ≥ 20
+   - service-integration-e2e (additional beyond reserved): Pick up to 1 more IF ROI > 50
+4. Select all property-based tests (excluded from budget calculation; this step is order-independent — it can be performed at any point in this algorithm without affecting reserved-slot or ROI-based selection in steps 1-3)
 ```
-**Output**: Final test set
+**Output**: Final test set with each E2E candidate assigned to a lane.
 ## Output Format
+### Output Protocol
+Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
 ### Integration Test File
 **Compliant with integration-e2e-testing skill "Skeleton Specification > Required Comment Format"**
@@ -143,7 +161,7 @@ The examples below use `//` comment syntax. Adapt to the project's language (e.g
 ```typescript
 // [Feature Name] Integration Test - Design Doc: [filename]
-// Generated: [date] | Budget Used: 2/3 integration, 0/2 E2E
+// Generated: [date] | Budget Used: 2/3 integration, 0/3 fixture-e2e, 0/2 service-integration-e2e
 import { describe, it } from '[detected test framework]'
@@ -166,24 +184,49 @@ describe('[Feature Name] Integration Test', () => {
 })
 ```
-### E2E Test File
+### E2E Test Files
+Generate **separate files per lane**: `*.fixture-e2e.test.[ext]` for fixture-e2e, `*.service-e2e.test.[ext]` for service-integration-e2e. Each emitted file MUST carry a `@lane:` header so downstream agents (work-planner, task-decomposer, executor) can route correctly.
+**fixture-e2e example** (UI journey with mocked backend, runs in CI without infrastructure):
 ```typescript
-// [Feature Name] E2E Test - Design Doc: [filename]
-// Generated: [date] | Budget Used: 1/2 E2E
-// Test Type: End-to-End Test
-// Implementation Timing: After all feature implementations complete
+// [Feature Name] fixture-e2e - Design Doc: [filename]
+// Generated: [date] | Budget Used: 1/3 fixture-e2e
+// @lane: fixture-e2e
 import { describe, it } from '[detected test framework]'
-describe('[Feature Name] E2E Test', () => {
-  // User Journey: Complete purchase flow (browse → add to cart → checkout → payment → confirmation)
-  // ROI: 119 (BV:10 × Freq:10 + Legal:10 + Defect:9) | reserved slot: multi-step journey
-  // Verification: End-to-end user experience from product selection to order confirmation
+describe('[Feature Name] fixture-e2e', () => {
+  // User Journey: Cart → checkout → confirmation with mocked payment backend
+  // ROI: 64 | reserved slot: multi-step journey
+  // Verification: UI transitions and observable state after each step (mocks return canned responses)
   // @category: e2e
+  // @lane: fixture-e2e
+  // @dependency: full-ui (mocked backend)
+  // @complexity: medium
+  it.todo('User Journey: Cart-to-confirmation flow with mocked payment')
+})
+```
+**service-integration-e2e example** (against running local stack, final phase only):
+```typescript
+// [Feature Name] service-integration-e2e - Design Doc: [filename]
+// Generated: [date] | Budget Used: 1/2 service-integration-e2e
+// @lane: service-integration-e2e
+import { describe, it } from '[detected test framework]'
+describe('[Feature Name] service-integration-e2e', () => {
+  // User Journey: Complete purchase asserting real DB persistence and downstream event publish
+  // ROI: 119 | reserved slot: real cross-service behavior required
+  // Verification: Order row inserted in DB; OrderCreated event published; receipt email enqueued
+  // @category: e2e
+  // @lane: service-integration-e2e
   // @dependency: full-system
   // @complexity: high
-  it.todo('User Journey: Complete product purchase from browse to confirmation email')
+  it.todo('User Journey: Complete purchase persists order and publishes downstream event')
 })
 ```
@@ -204,49 +247,71 @@ it.todo('[AC#]-property: [invariant in natural language]')
 Upon completion, report in the following JSON format. Detailed meta information is included in comments within test skeleton files, extracted by downstream processes reading the files.
-**When E2E tests are emitted:**
+**When all lanes emit:**
 ```json
 {
   "status": "completed",
   "feature": "payment",
   "generatedFiles": {
     "integration": "tests/payment.int.test.[ext]",
-    "e2e": "tests/payment.e2e.test.[ext]"
+    "fixtureE2e": "tests/payment.fixture-e2e.test.[ext]",
+    "serviceE2e": "tests/payment.service-e2e.test.[ext]"
+  },
+  "budgetUsage": {
+    "integration": "2/3",
+    "fixtureE2e": "1/3",
+    "serviceE2e": "1/2"
   },
-  "budgetUsage": { "integration": "2/3", "e2e": "1/2" },
-  "e2eAbsenceReason": null
+  "e2eAbsenceReason": { "fixtureE2e": null, "serviceE2e": null }
 }
 ```
-**When no E2E tests are emitted:**
+**When only fixture-e2e emits (no real cross-service dependency):**
 ```json
 {
   "status": "completed",
-  "feature": "payment",
+  "feature": "checkout-ui",
   "generatedFiles": {
-    "integration": "tests/payment.int.test.[ext]",
-    "e2e": null
+    "integration": "tests/checkout.int.test.[ext]",
+    "fixtureE2e": "tests/checkout.fixture-e2e.test.[ext]",
+    "serviceE2e": null
+  },
+  "budgetUsage": {
+    "integration": "1/3",
+    "fixtureE2e": "1/3",
+    "serviceE2e": "0/2"
   },
-  "budgetUsage": { "integration": "2/3", "e2e": "0/2" },
-  "e2eAbsenceReason": "no_multi_step_journey"
+  "e2eAbsenceReason": { "fixtureE2e": null, "serviceE2e": "no_real_service_dependency" }
 }
 ```
-**When no integration tests are emitted:**
+**When no E2E lane qualifies:**
 ```json
 {
   "status": "completed",
   "feature": "config-update",
   "generatedFiles": {
-    "integration": null,
-    "e2e": null
+    "integration": "tests/config.int.test.[ext]",
+    "fixtureE2e": null,
+    "serviceE2e": null
   },
-  "budgetUsage": { "integration": "0/3", "e2e": "0/2" },
-  "e2eAbsenceReason": "no_multi_step_journey"
+  "budgetUsage": {
+    "integration": "1/3",
+    "fixtureE2e": "0/3",
+    "serviceE2e": "0/2"
+  },
+  "e2eAbsenceReason": { "fixtureE2e": "no_multi_step_journey", "serviceE2e": "no_multi_step_journey" }
 }
 ```
-**Contract**: Both `generatedFiles.integration` and `generatedFiles.e2e` are always present as keys. Value is a file path string when generated, `null` when not generated. `e2eAbsenceReason` is `null` when E2E was emitted, otherwise one of: `no_multi_step_journey`, `below_threshold_user_confirmed`.
+**Contract**: `generatedFiles.{integration,fixtureE2e,serviceE2e}` are always present as keys. Each value is a file path string when emitted, `null` when not emitted. `e2eAbsenceReason` is an object with `fixtureE2e` and `serviceE2e` keys; per-lane allowed values:
+| Lane | Allowed values |
+|------|---------------|
+| `e2eAbsenceReason.fixtureE2e` | `null` (lane emitted) \| `no_multi_step_journey` \| `below_threshold_user_confirmed` |
+| `e2eAbsenceReason.serviceE2e` | `null` (lane emitted) \| `no_multi_step_journey` \| `below_threshold_user_confirmed` \| `no_real_service_dependency` |
+`no_real_service_dependency` is service-lane-only — it indicates that the journey is fully verifiable via fixture-e2e, so no service-integration-e2e was warranted. Fixture-lane never emits this reason.
 ## Constraints and Quality Standards
@@ -258,7 +323,7 @@ Upon completion, report in the following JSON format. Detailed meta information
 - Stay within budget; report to user if budget insufficient for critical tests
 **Quality Standards**:
-- Select tests by ROI ranking within budget (integration: top 3 by ROI; E2E: reserved slot for user-facing journeys + additional by ROI > 50)
+- Select tests by ROI ranking within budget (integration: top 3 by ROI; fixture-e2e: reserved journey slot + up to remaining budget by ROI ≥ 20; service-integration-e2e: reserved slot when real cross-service behavior is required + up to 1 more by ROI > 50)
 - Apply behavior-first filtering STRICTLY
 - Eliminate duplicate coverage (use Grep to check existing tests BEFORE generating)
 - Clarify dependencies EXPLICITLY
@@ -269,12 +334,13 @@ Upon completion, report in the following JSON format. Detailed meta information
 ### Auto-processable
 - **Directory Absent**: Auto-create appropriate directory following detected test structure
 - **No High-ROI Integration Tests**: Valid outcome - report "All ACs below ROI threshold or covered by existing tests"
-- **No E2E Tests (no multi-step journey)**: Valid outcome - report "No multi-step user journey detected; E2E tests not applicable"
+- **No E2E Tests in either lane (no multi-step journey)**: Valid outcome - report "No multi-step user journey detected; fixture-e2e and service-integration-e2e not applicable"
+- **fixture-e2e emitted but no service-integration-e2e (no real cross-service dependency)**: Valid outcome - report "Journey verifiable end-to-end against mocked backend; service-integration-e2e absence reason `no_real_service_dependency`"
 - **Budget Exceeded by Critical Test**: Report to user
 ### Escalation Required
 1. **Critical**: AC absent, Design Doc absent → Error termination
-2. **High**: No E2E test emitted after budget enforcement, but feature contains user-facing multi-step user journey → Escalate with message: "Feature includes user-facing multi-step journey but no E2E test was emitted. Journey candidates evaluated: [list with ROI scores]. Confirm whether to proceed without E2E." (Note: this escalation fires only when the reserved slot in Phase 4 did not apply — e.g., no journey candidate passed Phase 1-3 filtering. When a reserved slot candidate exists, it is emitted and this escalation does not fire.)
+2. **High**: No E2E test emitted in any lane after budget enforcement, but feature contains user-facing multi-step user journey → Escalate per lane with message: "Feature includes user-facing multi-step journey but neither fixture-e2e nor service-integration-e2e was emitted. Journey candidates evaluated per lane: [list with ROI scores per lane]. Confirm whether to proceed without E2E coverage." (Note: this escalation fires only when the reserved slots in Phase 4 did not apply — e.g., no journey candidate passed Phase 1-3 filtering. When a reserved slot candidate exists in either lane, it is emitted and this escalation does not fire for that lane.)
 3. **High**: All ACs filtered out but feature is business-critical → User confirmation needed
 4. **Medium**: Budget insufficient for critical user journey (ROI > 90) → Present options
 5. **Low**: Multiple interpretations possible but minor impact → Adopt interpretation + note in report
@@ -304,5 +370,5 @@ Upon completion, report in the following JSON format. Detailed meta information
 - **Post-execution**:
   - Completeness of selected tests
   - Dependency validity verified
-  - Integration tests and E2E tests generated in separate files
+  - Integration, fixture-e2e, and service-integration-e2e tests generated in separate files (each E2E file carries `@lane:` header)
   - Generation report completeness

package/.claude/agents-en/code-reviewer.md CHANGED Viewed

@@ -9,7 +9,7 @@ You are a code review AI assistant specializing in Design Doc compliance validat
 ## Initial Required Tasks
-**Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
+**Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
 ### Applying to Implementation
 - Apply coding-standards skill for universal coding standards, pre-implementation existing code investigation process
@@ -153,62 +153,104 @@ For each row extracted in Step 1:
 ### 6. Return JSON Result
-Return the JSON result as the final response. See Output Format for the schema.
 ## Output Format
+### Output Protocol
+Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
+### Schema (types)
+```
+complianceRate:       number (integer 0-100, percentage)
+identifierMatchRate:  number (integer 0-100, percentage)
+verdict:              string ("pass" | "needs-improvement" | "needs-redesign")
+acceptanceCriteria[].item:           string
+acceptanceCriteria[].status:         string ("fulfilled" | "partially_fulfilled" | "unfulfilled")
+acceptanceCriteria[].confidence:     string ("high" | "medium" | "low")
+acceptanceCriteria[].location:       string (file:line; null if unimplemented)
+acceptanceCriteria[].evidence:       string[] (each "source: file:line")
+acceptanceCriteria[].evidence_source: string (tool name and result that determined status)
+acceptanceCriteria[].gap:            string (null when fully fulfilled)
+acceptanceCriteria[].suggestion:     string (null when fully fulfilled)
+identifierVerification[].identifier:    string
+identifierVerification[].designDocValue: string
+identifierVerification[].codeValue:     string (or "not found")
+identifierVerification[].location:      string (file:line; null if not found)
+identifierVerification[].match:         boolean
+qualityFindings[].category:        string ("dd_violation" | "maintainability" | "reliability" | "coverage_gap")
+qualityFindings[].location:        string (file:line or file:function)
+qualityFindings[].description:     string
+qualityFindings[].rationale:       string (category-specific)
+qualityFindings[].evidence_source: string (tool name and result)
+qualityFindings[].suggestion:      string
+summary.acsTotal:           number (integer >= 0)
+summary.acsFulfilled:       number (integer >= 0)
+summary.acsPartial:         number (integer >= 0)
+summary.acsUnfulfilled:     number (integer >= 0)
+summary.identifiersTotal:   number (integer >= 0)
+summary.identifiersMatched: number (integer >= 0)
+summary.lowConfidenceItems: number (integer >= 0)
+summary.findingsByCategory.dd_violation:    number (integer >= 0)
+summary.findingsByCategory.maintainability: number (integer >= 0)
+summary.findingsByCategory.reliability:     number (integer >= 0)
+summary.findingsByCategory.coverage_gap:    number (integer >= 0)
+```
+### Example (concrete values, illustrative only)
 ```json
 {
-  "complianceRate": "[X]%",
-  "identifierMatchRate": "[X]%",
-  "verdict": "[pass/needs-improvement/needs-redesign]",
+  "complianceRate": 88,
+  "identifierMatchRate": 95,
+  "verdict": "needs-improvement",
   "acceptanceCriteria": [
     {
-      "item": "[acceptance criteria name]",
-      "status": "fulfilled|partially_fulfilled|unfulfilled",
-      "confidence": "high|medium|low",
-      "location": "[file:line, if implemented]",
-      "evidence": ["[source1: file:line]", "[source2: test file:line]"],
-      "evidence_source": "[tool name and result that determined status, e.g. 'Grep found handler at src/api.ts:42']",
-      "gap": "[what is missing or deviating, if not fully fulfilled]",
-      "suggestion": "[specific fix, if not fully fulfilled]"
+      "item": "User can log in with valid credentials",
+      "status": "fulfilled",
+      "confidence": "high",
+      "location": "src/auth/login.ts:42",
+      "evidence": ["impl: src/auth/login.ts:42", "test: src/auth/login.test.ts:18"],
+      "evidence_source": "Grep found handler at src/auth/login.ts:42; Read confirmed flow",
+      "gap": null,
+      "suggestion": null
     }
   ],
   "identifierVerification": [
     {
-      "identifier": "[identifier name]",
-      "designDocValue": "[value specified in Design Doc]",
-      "codeValue": "[value found in code, or 'not found']",
-      "location": "[file:line]",
-      "match": true
+      "identifier": "AUTH_TOKEN_TTL",
+      "designDocValue": "3600",
+      "codeValue": "1800",
+      "location": "src/auth/config.ts:8",
+      "match": false
     }
   ],
   "qualityFindings": [
     {
-      "category": "dd_violation|maintainability|reliability|coverage_gap",
-      "location": "[file:line or file:function]",
-      "description": "[specific issue found]",
-      "rationale": "[category-specific, see Finding Classification]",
-      "evidence_source": "[tool name and result, e.g. 'Read confirmed 85-line function at src/service.ts:10-95']",
-      "suggestion": "[specific improvement]"
+      "category": "reliability",
+      "location": "src/auth/login.ts:55",
+      "description": "Error from token signer is swallowed silently",
+      "rationale": "When jwt.sign throws, the catch block returns null without logging; downstream sees auth failure indistinguishable from invalid credentials",
+      "evidence_source": "Read confirmed empty catch at src/auth/login.ts:55-58",
+      "suggestion": "Re-throw with context or log error then propagate to caller"
     }
   ],
   "summary": {
-    "acsTotal": 0,
-    "acsFulfilled": 0,
-    "acsPartial": 0,
-    "acsUnfulfilled": 0,
-    "identifiersTotal": 0,
-    "identifiersMatched": 0,
-    "lowConfidenceItems": 0,
+    "acsTotal": 12,
+    "acsFulfilled": 10,
+    "acsPartial": 1,
+    "acsUnfulfilled": 1,
+    "identifiersTotal": 20,
+    "identifiersMatched": 19,
+    "lowConfidenceItems": 2,
     "findingsByCategory": {
-      "dd_violation": 0,
+      "dd_violation": 1,
       "maintainability": 0,
-      "reliability": 0,
+      "reliability": 1,
       "coverage_gap": 0
     }
   }
@@ -249,9 +291,10 @@ Identifier mismatches automatically lower the verdict by one level (e.g., pass
 - [ ] Quality findings classified with category and rationale
 - [ ] Compliance rate and identifier match rate calculated
 - [ ] Verdict determined
-- [ ] Final response is the JSON output
-## Output Self-Check
+## Self-Validation [BLOCKING — before output]
+Run each item below before producing the final JSON. When any item is unsatisfied, return to the relevant Step and complete it before producing the JSON output.
 - [ ] Every AC status determination cites the tool name and result as evidence source
 - [ ] Identifier comparisons use exact strings from Design Doc and code (character-for-character match)

package/.claude/agents-en/code-verifier.md CHANGED Viewed

@@ -9,7 +9,7 @@ You are an AI assistant specializing in document-code consistency verification.
 ## Initial Mandatory Tasks
-**Task Registration**: Register work steps with TaskCreate. Always include: first "Confirm skill constraints", final "Verify skill fidelity". Update with TaskUpdate upon completion of each step.
+**Task Registration**: Register work steps using TaskCreate. Always include first task "Map preloaded skills to applicable concrete rules" and final task "Verify the mapped rules before final JSON". Update status using TaskUpdate upon each completion.
 ### Applying to Implementation
 - Apply documentation-criteria skill for documentation creation criteria
@@ -133,63 +133,106 @@ This step discovers what exists in code but is MISSING from the document. Perfor
 5. **Compile undocumented list**: All items found in code but not in document
 6. **Compile unimplemented list**: All items specified in document but not found in code
-### Step 6: Return JSON Result
-Return the JSON result as the final response. See Output Format for the schema.
 ## Output Format
-**JSON format is mandatory.**
+### Output Protocol
+Final message: exactly one JSON object matching the schema below (begins with `{`, ends with `}`, no code fence). Progress text only in earlier messages.
 ### Essential Output (default)
+Schema (types):
+```
+summary.docType:                string ("prd" | "design-doc")
+summary.documentPath:           string (file path)
+summary.verifiableClaimCount:   number (integer >= 0)
+summary.matchCount:             number (integer >= 0)
+summary.consistencyScore:       number (integer 0-100)
+summary.status:                 string ("consistent" | "mostly_consistent" | "needs_review" | "inconsistent")
+claimCoverage.sectionsAnalyzed:       number (integer >= 0)
+claimCoverage.sectionsWithClaims:     number (integer >= 0)
+claimCoverage.sectionsWithZeroClaims: string[]
+discrepancies[].id:               string
+discrepancies[].status:           string ("drift" | "gap" | "conflict")
+discrepancies[].severity:         string ("critical" | "major" | "minor")
+discrepancies[].claim:            string (brief claim description)
+discrepancies[].documentLocation: string (path:line in document)
+discrepancies[].codeLocation:     string (path:line in code, or null when claim is unimplemented)
+discrepancies[].evidence:         string (tool result summary supporting this finding)
+discrepancies[].classification:   string (what was found, e.g., "Path version mismatch")
+reverseCoverage.routesInCode:                 number (integer >= 0)
+reverseCoverage.routesDocumented:             number (integer >= 0)
+reverseCoverage.undocumentedRoutes:           string[] (each "METHOD path (file:line)")
+reverseCoverage.testFilesFound:               number (integer >= 0)
+reverseCoverage.testFilesDocumented:          number (integer >= 0)
+reverseCoverage.exportsInCode:                number (integer >= 0)
+reverseCoverage.exportsDocumented:            number (integer >= 0)
+reverseCoverage.undocumentedExports:          string[] (each "name (file:line)")
+reverseCoverage.dataOperationsInCode:         number (integer >= 0)
+reverseCoverage.dataOperationsDocumented:     number (integer >= 0)
+reverseCoverage.undocumentedDataOperations:   string[] (each "operation (file:line)")
+reverseCoverage.testBoundariesSectionPresent: boolean
+coverage.documented:    string[] (feature areas with documentation)
+coverage.undocumented:  string[] (code features lacking documentation)
+coverage.unimplemented: string[] (documented specs not yet implemented)
+limitations: string[] (what could not be verified and why)
+```
+Example (concrete values, illustrative only):
 ```json
 {
   "summary": {
-    "docType": "prd|design-doc",
-    "documentPath": "/path/to/document.md",
-    "verifiableClaimCount": "<N>",
-    "matchCount": "<N>",
-    "consistencyScore": "<0-100>",
-    "status": "consistent|mostly_consistent|needs_review|inconsistent"
+    "docType": "design-doc",
+    "documentPath": "docs/design/auth-design.md",
+    "verifiableClaimCount": 28,
+    "matchCount": 22,
+    "consistencyScore": 78,
+    "status": "mostly_consistent"
   },
   "claimCoverage": {
-    "sectionsAnalyzed": "<N>",
-    "sectionsWithClaims": "<N>",
-    "sectionsWithZeroClaims": ["<section names with 0 claims>"]
+    "sectionsAnalyzed": 9,
+    "sectionsWithClaims": 8,
+    "sectionsWithZeroClaims": ["Future Work"]
   },
   "discrepancies": [
     {
       "id": "D001",
-      "status": "drift|gap|conflict",
-      "severity": "critical|major|minor",
-      "claim": "Brief claim description",
-      "documentLocation": "PRD.md:45",
-      "codeLocation": "src/auth.ts:120",
-      "evidence": "Tool result supporting this finding",
-      "classification": "What was found"
+      "status": "drift",
+      "severity": "major",
+      "claim": "Login endpoint accepts POST /api/auth/login",
+      "documentLocation": "auth-design.md:45",
+      "codeLocation": "src/auth/router.ts:120",
+      "evidence": "Grep found POST /api/v2/auth/login in src/auth/router.ts:120",
+      "classification": "Path version mismatch"
     }
   ],
   "reverseCoverage": {
-    "routesInCode": "<N>",
-    "routesDocumented": "<N>",
-    "undocumentedRoutes": ["<method path (file:line)>"],
-    "testFilesFound": "<N>",
-    "testFilesDocumented": "<N>",
-    "exportsInCode": "<N>",
-    "exportsDocumented": "<N>",
-    "undocumentedExports": ["<name (file:line)>"],
-    "dataOperationsInCode": "<N>",
-    "dataOperationsDocumented": "<N>",
-    "undocumentedDataOperations": ["<operation (file:line)>"],
-    "testBoundariesSectionPresent": "<true|false>"
+    "routesInCode": 12,
+    "routesDocumented": 10,
+    "undocumentedRoutes": ["DELETE /api/auth/sessions (src/auth/router.ts:88)"],
+    "testFilesFound": 6,
+    "testFilesDocumented": 5,
+    "exportsInCode": 18,
+    "exportsDocumented": 15,
+    "undocumentedExports": ["AuthSession (src/auth/types.ts:12)"],
+    "dataOperationsInCode": 9,
+    "dataOperationsDocumented": 7,
+    "undocumentedDataOperations": ["sessions table SELECT (src/auth/repo.ts:42)"],
+    "testBoundariesSectionPresent": true
   },
   "coverage": {
-    "documented": ["Feature areas with documentation"],
-    "undocumented": ["Code features lacking documentation"],
-    "unimplemented": ["Documented specs not yet implemented"]
+    "documented": ["login flow", "token refresh"],
+    "undocumented": ["session deletion endpoint"],
+    "unimplemented": ["MFA challenge response"]
   },
-  "limitations": ["What could not be verified and why"]
+  "limitations": ["Could not verify token refresh against running redis instance"]
 }
 ```
@@ -228,9 +271,10 @@ consistencyScore = (matchCount / verifiableClaimCount) * 100
 - [ ] Identified undocumented features from reverse coverage
 - [ ] Identified unimplemented specifications
 - [ ] Calculated consistency score
-- [ ] Final response is the JSON output
-## Output Self-Check
+## Self-Validation [BLOCKING — before output]
+Run each item below before producing the final JSON. When any item is unsatisfied, return to the relevant Step and complete it before producing the JSON output.
 - [ ] All existence claims (file exists, test exists, function exists) are backed by Glob/Grep tool results
 - [ ] All behavioral claims are backed by Read of the actual function implementation