npm - @kiwidata/grimoire - Versions diffs - 0.1.1 - Mend

@kiwidata/grimoire 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

package/.claude-plugin/plugin.json +8 -0
package/AGENTS.md +217 -0
package/README.md +748 -0
package/bin/grimoire.js +2 -0
package/dist/cli/index.d.ts +2 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +42 -0
package/dist/cli/index.js.map +1 -0
package/dist/commands/archive.d.ts +3 -0
package/dist/commands/archive.d.ts.map +1 -0
package/dist/commands/archive.js +22 -0
package/dist/commands/archive.js.map +1 -0
package/dist/commands/branch-check.d.ts +3 -0
package/dist/commands/branch-check.d.ts.map +1 -0
package/dist/commands/branch-check.js +16 -0
package/dist/commands/branch-check.js.map +1 -0
package/dist/commands/check.d.ts +3 -0
package/dist/commands/check.d.ts.map +1 -0
package/dist/commands/check.js +22 -0
package/dist/commands/check.js.map +1 -0
package/dist/commands/ci.d.ts +3 -0
package/dist/commands/ci.d.ts.map +1 -0
package/dist/commands/ci.js +18 -0
package/dist/commands/ci.js.map +1 -0
package/dist/commands/diff.d.ts +3 -0
package/dist/commands/diff.d.ts.map +1 -0
package/dist/commands/diff.js +10 -0
package/dist/commands/diff.js.map +1 -0
package/dist/commands/docs.d.ts +3 -0
package/dist/commands/docs.d.ts.map +1 -0
package/dist/commands/docs.js +11 -0
package/dist/commands/docs.js.map +1 -0
package/dist/commands/health.d.ts +3 -0
package/dist/commands/health.d.ts.map +1 -0
package/dist/commands/health.js +13 -0
package/dist/commands/health.js.map +1 -0
package/dist/commands/init.d.ts +3 -0
package/dist/commands/init.d.ts.map +1 -0
package/dist/commands/init.js +21 -0
package/dist/commands/init.js.map +1 -0
package/dist/commands/list.d.ts +3 -0
package/dist/commands/list.d.ts.map +1 -0
package/dist/commands/list.js +22 -0
package/dist/commands/list.js.map +1 -0
package/dist/commands/log.d.ts +3 -0
package/dist/commands/log.d.ts.map +1 -0
package/dist/commands/log.js +15 -0
package/dist/commands/log.js.map +1 -0
package/dist/commands/map.d.ts +3 -0
package/dist/commands/map.d.ts.map +1 -0
package/dist/commands/map.js +17 -0
package/dist/commands/map.js.map +1 -0
package/dist/commands/pr.d.ts +3 -0
package/dist/commands/pr.d.ts.map +1 -0
package/dist/commands/pr.js +17 -0
package/dist/commands/pr.js.map +1 -0
package/dist/commands/status.d.ts +3 -0
package/dist/commands/status.d.ts.map +1 -0
package/dist/commands/status.js +12 -0
package/dist/commands/status.js.map +1 -0
package/dist/commands/test-quality.d.ts +3 -0
package/dist/commands/test-quality.d.ts.map +1 -0
package/dist/commands/test-quality.js +37 -0
package/dist/commands/test-quality.js.map +1 -0
package/dist/commands/trace.d.ts +3 -0
package/dist/commands/trace.d.ts.map +1 -0
package/dist/commands/trace.js +12 -0
package/dist/commands/trace.js.map +1 -0
package/dist/commands/update.d.ts +3 -0
package/dist/commands/update.d.ts.map +1 -0
package/dist/commands/update.js +22 -0
package/dist/commands/update.js.map +1 -0
package/dist/commands/validate.d.ts +3 -0
package/dist/commands/validate.d.ts.map +1 -0
package/dist/commands/validate.js +17 -0
package/dist/commands/validate.js.map +1 -0
package/dist/core/archive.d.ts +9 -0
package/dist/core/archive.d.ts.map +1 -0
package/dist/core/archive.js +92 -0
package/dist/core/archive.js.map +1 -0
package/dist/core/branch-check.d.ts +27 -0
package/dist/core/branch-check.d.ts.map +1 -0
package/dist/core/branch-check.js +205 -0
package/dist/core/branch-check.js.map +1 -0
package/dist/core/check.d.ts +24 -0
package/dist/core/check.d.ts.map +1 -0
package/dist/core/check.js +372 -0
package/dist/core/check.js.map +1 -0
package/dist/core/ci.d.ts +24 -0
package/dist/core/ci.d.ts.map +1 -0
package/dist/core/ci.js +162 -0
package/dist/core/ci.js.map +1 -0
package/dist/core/detect.d.ts +10 -0
package/dist/core/detect.d.ts.map +1 -0
package/dist/core/detect.js +368 -0
package/dist/core/detect.js.map +1 -0
package/dist/core/diff.d.ts +29 -0
package/dist/core/diff.d.ts.map +1 -0
package/dist/core/diff.js +197 -0
package/dist/core/diff.js.map +1 -0
package/dist/core/doc-style.d.ts +16 -0
package/dist/core/doc-style.d.ts.map +1 -0
package/dist/core/doc-style.js +192 -0
package/dist/core/doc-style.js.map +1 -0
package/dist/core/docs.d.ts +6 -0
package/dist/core/docs.d.ts.map +1 -0
package/dist/core/docs.js +478 -0
package/dist/core/docs.js.map +1 -0
package/dist/core/health.d.ts +7 -0
package/dist/core/health.d.ts.map +1 -0
package/dist/core/health.js +489 -0
package/dist/core/health.js.map +1 -0
package/dist/core/hooks.d.ts +5 -0
package/dist/core/hooks.d.ts.map +1 -0
package/dist/core/hooks.js +168 -0
package/dist/core/hooks.js.map +1 -0
package/dist/core/init.d.ts +9 -0
package/dist/core/init.d.ts.map +1 -0
package/dist/core/init.js +563 -0
package/dist/core/init.js.map +1 -0
package/dist/core/list.d.ts +4 -0
package/dist/core/list.d.ts.map +1 -0
package/dist/core/list.js +170 -0
package/dist/core/list.js.map +1 -0
package/dist/core/log.d.ts +8 -0
package/dist/core/log.d.ts.map +1 -0
package/dist/core/log.js +150 -0
package/dist/core/log.js.map +1 -0
package/dist/core/map.d.ts +9 -0
package/dist/core/map.d.ts.map +1 -0
package/dist/core/map.js +302 -0
package/dist/core/map.js.map +1 -0
package/dist/core/pr.d.ts +9 -0
package/dist/core/pr.d.ts.map +1 -0
package/dist/core/pr.js +273 -0
package/dist/core/pr.js.map +1 -0
package/dist/core/shared-setup.d.ts +52 -0
package/dist/core/shared-setup.d.ts.map +1 -0
package/dist/core/shared-setup.js +221 -0
package/dist/core/shared-setup.js.map +1 -0
package/dist/core/status.d.ts +6 -0
package/dist/core/status.d.ts.map +1 -0
package/dist/core/status.js +114 -0
package/dist/core/status.js.map +1 -0
package/dist/core/test-quality.d.ts +33 -0
package/dist/core/test-quality.d.ts.map +1 -0
package/dist/core/test-quality.js +378 -0
package/dist/core/test-quality.js.map +1 -0
package/dist/core/trace.d.ts +6 -0
package/dist/core/trace.d.ts.map +1 -0
package/dist/core/trace.js +211 -0
package/dist/core/trace.js.map +1 -0
package/dist/core/update.d.ts +10 -0
package/dist/core/update.d.ts.map +1 -0
package/dist/core/update.js +149 -0
package/dist/core/update.js.map +1 -0
package/dist/core/validate.d.ts +20 -0
package/dist/core/validate.d.ts.map +1 -0
package/dist/core/validate.js +275 -0
package/dist/core/validate.js.map +1 -0
package/dist/index.d.ts +19 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +20 -0
package/dist/index.js.map +1 -0
package/dist/utils/config.d.ts +61 -0
package/dist/utils/config.d.ts.map +1 -0
package/dist/utils/config.js +172 -0
package/dist/utils/config.js.map +1 -0
package/dist/utils/fs.d.ts +17 -0
package/dist/utils/fs.d.ts.map +1 -0
package/dist/utils/fs.js +38 -0
package/dist/utils/fs.js.map +1 -0
package/dist/utils/paths.d.ts +10 -0
package/dist/utils/paths.d.ts.map +1 -0
package/dist/utils/paths.js +35 -0
package/dist/utils/paths.js.map +1 -0
package/dist/utils/spawn.d.ts +5 -0
package/dist/utils/spawn.d.ts.map +1 -0
package/dist/utils/spawn.js +34 -0
package/dist/utils/spawn.js.map +1 -0
package/package.json +68 -0
package/skills/grimoire-apply/SKILL.md +274 -0
package/skills/grimoire-audit/SKILL.md +129 -0
package/skills/grimoire-branch-guard/SKILL.md +111 -0
package/skills/grimoire-bug/SKILL.md +160 -0
package/skills/grimoire-bug-explore/SKILL.md +242 -0
package/skills/grimoire-bug-report/SKILL.md +237 -0
package/skills/grimoire-bug-session/SKILL.md +222 -0
package/skills/grimoire-bug-triage/SKILL.md +274 -0
package/skills/grimoire-commit/SKILL.md +150 -0
package/skills/grimoire-discover/SKILL.md +297 -0
package/skills/grimoire-draft/SKILL.md +202 -0
package/skills/grimoire-plan/SKILL.md +329 -0
package/skills/grimoire-pr/SKILL.md +134 -0
package/skills/grimoire-pr-review/SKILL.md +240 -0
package/skills/grimoire-refactor/SKILL.md +251 -0
package/skills/grimoire-remove/SKILL.md +112 -0
package/skills/grimoire-review/SKILL.md +247 -0
package/skills/grimoire-verify/SKILL.md +223 -0
package/skills/references/bug-classification.md +154 -0
package/skills/references/build-vs-buy.md +77 -0
package/skills/references/elicitation-personas.md +118 -0
package/skills/references/refactor-register-format.md +88 -0
package/skills/references/refactor-scan-categories.md +102 -0
package/skills/references/schema-format.md +68 -0
package/skills/references/security-compliance.md +110 -0
package/skills/references/testing-contracts.md +93 -0
package/templates/context.yml +110 -0
package/templates/debt-exceptions.yml +61 -0
package/templates/decision.md +50 -0
package/templates/dupignore +93 -0
package/templates/example.feature +24 -0
package/templates/manifest.md +29 -0
package/templates/mapignore +58 -0
package/templates/mapkeys +65 -0

package/skills/grimoire-review/SKILL.md ADDED Viewed

@@ -0,0 +1,247 @@
+---
+name: grimoire-review
+description: Multi-perspective design review before coding begins. Expert personas validate the change for completeness, feasibility, security, and data integrity. Use after draft/plan, before apply.
+compatibility: Designed for Claude Code (or similar products)
+metadata:
+  author: kiwi-data
+  version: "0.1"
+---
+# grimoire-review
+Multi-perspective LLM review of a completed design before coding begins. Expert personas validate the change for completeness, feasibility, security, and data integrity.
+## Triggers
+- User has a grimoire change with approved features, decisions, and tasks
+- User asks to review a design before implementing
+- Automatically suggested after `grimoire-plan` completes
+- Loose match: "review", "design review", "ready to code", "before we start"
+## Routing
+- No tasks.md exists → `grimoire-plan` first
+- Level 1 change → skip review entirely, proceed to `grimoire-apply`
+- User says "skip review" → proceed to `grimoire-apply`
+- Post-implementation review → `grimoire-pr` (has optional post-impl review)
+## Prerequisites
+- A change exists in `.grimoire/changes/<change-id>/` with:
+  - `manifest.md` (approved)
+  - At least one `.feature` file or decision record
+  - `tasks.md` (generated by grimoire-plan)
+## Skipping
+This step is optional. The user can skip it by saying "skip review" or "go straight to apply". Not every change needs a full review — small or low-risk changes can go directly from plan to apply.
+## Complexity-Gated Review
+Read `complexity` from `manifest.md` frontmatter to determine review depth:
+| Complexity | Review Depth |
+|------------|-------------|
+| **1 (Trivial)** | Skip review entirely — suggest proceeding to apply |
+| **2 (Simple)** | Senior Engineer only. Skip other personas unless the change touches security or data. |
+| **3 (Moderate)** | All relevant personas (skip Data Engineer if no data changes, skip QA if no user-facing behavior) |
+| **4 (Complex)** | All personas mandatory. No skipping. |
+The user can always override: "run full review" on a level 2, or "just senior engineer" on a level 4.
+## Workflow
+### 1. Select Change
+- List active changes in `.grimoire/changes/`
+- If multiple, ask user which one to review
+- If only one, confirm it
+### 2. Gather Context
+Read all artifacts for the change:
+- `manifest.md` — change summary, scope, **and Prior Art section** (build-vs-buy rationale)
+- All `.feature` files — behavioral specifications
+- All decision records — architectural choices
+- `tasks.md` — implementation plan
+- `data.yml` — proposed schema changes (if present)
+- Read `.grimoire/config.yaml` for project context (language, tools, conventions)
+- Read `.grimoire/docs/data/schema.yml` for current data baseline (if it exists)
+- Read `.grimoire/docs/context.yml` for deployment environment, related services, and infrastructure (if it exists) — this informs security review (cross-service auth), engineering review (deployment constraints), and data review (infrastructure availability)
+- Read relevant `.grimoire/docs/` area docs if they exist
+- Skim the areas of the codebase the tasks reference
+### 3. Product Manager Review
+Adopt the perspective of a **product manager** focused on completeness and user value.
+Evaluate:
+- **Outcome**: Does the manifest's Why clearly state the problem being solved and how success is measured? If it describes a mechanism ("add an endpoint") instead of an outcome ("users can reset passwords"), flag it — the team will argue about scope later.
+- **Coverage**: Do the feature scenarios cover all user-facing behaviors? Are there missing edge cases, error states, or alternate flows that a user would encounter?
+- **Clarity**: Are the feature descriptions and user stories clear enough that a non-technical stakeholder could validate them? Would QA know exactly what to test?
+- **Scope**: Is the change well-bounded? Are there implicit requirements hiding in the scenarios that aren't spelled out? Do any scenarios or tasks stray into the manifest's Non-goals? Scope creep into non-goals is a **blocker**.
+- **Acceptance**: Could you ship this and confidently say the feature is "done"? What would a user complain about?
+Output a short list of findings — flag issues as **blocker** (must fix before coding) or **suggestion** (nice to have).
+### 4. Senior Engineer Review
+Adopt the perspective of a **senior software engineer** reviewing the technical design.
+Evaluate:
+- **Build vs Buy**: Was the prior art research thorough? Check the manifest's Prior Art section. If the change builds custom code, is the justification for not adopting an existing library convincing? Do a quick sanity check — search for obvious libraries the research may have missed. If a well-maintained library exists that the manifest doesn't mention, flag it as a **blocker**. If the research was done but the build decision is debatable, flag as **suggestion** with the alternative.
+- **Simplicity**: Is this the simplest design that solves the problem? Could any task be done with less code, fewer files, or fewer moving parts? Flag anything that looks over-engineered — new abstractions without justification, premature generalization, unnecessary indirection layers, config-driven behavior where a direct call would do.
+- **Architecture**: Do the decisions make sense for this codebase? Are there simpler alternatives? Will this paint us into a corner?
+- **Task quality**: Are the tasks specific enough to execute without re-planning? Do they reference real files, real patterns, real conventions from the codebase?
+- **Dependencies**: Are tasks ordered correctly? Are there missing dependencies or implicit assumptions between tasks?
+- **Integration**: How does this change interact with existing code? Are there areas that will break or need updating that the tasks don't cover?
+- **Contract compatibility**: Does this change alter the request/response shape for any external API documented in `schema.yml`? If fields are added, removed, renamed, or re-typed in `data.yml`, flag it — the client code and any downstream consumers need contract tests updated. A contract change without updated contract tests is a **blocker**.
+- **Reuse**: Are there existing utilities, patterns, or modules that should be used instead of writing new code? Check `.grimoire/docs/` area docs if available. The goal is less new code, not more.
+- **Surface area**: Does the change introduce new public APIs, exports, or interfaces beyond what's needed? Fewer public functions with fewer parameters is better.
+- **Quality attributes**: If decision records have a Quality Attributes table, are the targets measurable and realistic? For performance-sensitive changes (new endpoints, data pipelines, search), flag blank targets as a **blocker** — you can't verify what you haven't defined. For non-performance-sensitive changes, blank targets are fine.
+- **Testing**: Is the test strategy sound? Are there gaps between what the features describe and what the step definitions will actually verify?
+Output a short list of findings — flag issues as **blocker** or **suggestion**.
+### 5. Security Engineer Review
+Adopt the perspective of a **security engineer** reviewing the design for vulnerabilities.
+#### 5a. STRIDE Threat Analysis
+For each new endpoint, data flow, or trust boundary the change introduces, evaluate using STRIDE:
+| Threat             | Question                                                                                     |
+|--------------------|----------------------------------------------------------------------------------------------|
+| **S**poofing       | Can an attacker impersonate a user or service? Are auth checks present at every entry point?  |
+| **T**ampering      | Can input or data in transit be modified? Is integrity validated (checksums, signatures, CSRF)?|
+| **R**epudiation    | Are security-relevant actions logged? Could an attacker act without leaving a trace?          |
+| **I**nfo Disclosure| Could error messages, logs, or responses leak sensitive data (stack traces, PII, tokens)?     |
+| **D**enial of Service| Are there unbounded operations (large uploads, expensive queries, no rate limits)?          |
+| **E**levation of Privilege| Can a user escalate to admin? Are role/permission checks at the right layer?           |
+Skip STRIDE categories that don't apply to the change. Don't manufacture threats.
+#### 5b. Detailed Security Evaluation
+- **Input validation**: Do the features involve user input? Are there scenarios covering malicious or malformed input?
+- **Authentication/authorization**: Does the change touch auth boundaries? Are there missing access control checks?
+- **Data handling**: Does the change introduce new data storage, transmission, or processing? Are there privacy or compliance concerns?
+- **Dependencies**: Do the tasks introduce new dependencies? Are there known vulnerability concerns? Check that package names are real and correctly spelled — hallucinated or typosquatted package names are a supply chain attack vector.
+- **Vulnerable packages**: If the tasks add or upgrade dependencies, check for known vulnerabilities. Cross-reference against the project's dependency audit tool (configured in `.grimoire/config.yaml` under `dep_audit`). Flag any package without a clear provenance or with a very low download count.
+- **Attack surface**: Does this change expose new endpoints, APIs, or interfaces? What could an attacker target?
+- **Cross-service security**: If `context.yml` lists related services, does the change properly authenticate when calling them? Are service-to-service auth boundaries maintained? Is data from sibling services validated at the boundary?
+- **Secrets**: Are there hardcoded credentials, tokens, or keys in the design? Check that API keys, database credentials, and tokens are loaded from environment variables or secret stores, never inline.
+If the change has no security-relevant surface (e.g., a pure UI text change), say so briefly and move on. Not every change needs a deep security review.
+#### 5c. Compliance Review
+Check `.grimoire/config.yaml` under `project.compliance`. If configured, evaluate per `../references/security-compliance.md` (section "Compliance Framework Verification"). Missing compliance coverage on a tagged scenario is a **blocker**. If no compliance frameworks configured, skip.
+#### 5d. OWASP / CWE Classification
+Tag every security finding with:
+- **OWASP Top 10 (2021)** category — e.g., `A01:2021-Broken Access Control`, `A03:2021-Injection`
+- **CWE ID** — e.g., `CWE-89` (SQL Injection), `CWE-79` (XSS), `CWE-798` (Hardcoded Credentials)
+This makes findings actionable, searchable, and traceable to compliance frameworks.
+Tag findings with OWASP category and CWE ID. See `../references/security-compliance.md` for the CWE quick reference table.
+Output format:
+```markdown
+## Security Engineer
+### STRIDE Summary
+- **Spoofing**: [relevant finding or "N/A"]
+- **Tampering**: [relevant finding or "N/A"]
+- ... (only categories that apply)
+### Findings
+- **[blocker]** [A03:2021 / CWE-89] User search query is concatenated into SQL string in tasks — must use parameterized query
+- **[suggestion]** [A01:2021 / CWE-862] Add rate limiting scenario for login endpoint
+- No other security concerns for this change.
+```
+### 6. QA Engineer Review (Optional)
+**Skip this review if the change is purely internal (no user-facing behavior, no new inputs, no observable state changes).**
+If the change has user-facing behavior, adopt the perspective of a **QA engineer** focused on testability and real-world failure modes.
+Evaluate:
+- **Testability**: Can every scenario be verified automatically? Are there behaviors that require manual testing — and if so, is that documented? Are the Given/When/Then steps specific enough to implement as real tests?
+- **Edge cases**: What inputs, states, or timing conditions are not covered by the current scenarios? Think about empty states, concurrent users, interruptions, and boundary values.
+- **Negative scenarios**: For every happy path, is there at least one scenario covering what happens when things go wrong? Missing error scenarios are the #1 source of bug reports.
+- **Observability**: When this feature breaks in production, how will anyone know? Are there logs, metrics, or alerts? Can a tester distinguish between "feature is broken" and "feature is slow"?
+- **Regression risk**: What existing behavior could this change break? Are there integration points with other features that need cross-feature testing?
+- **Accessibility**: Does the change introduce new UI? If so, are there scenarios covering keyboard navigation, screen readers, or contrast requirements?
+Output a short list of findings — flag issues as **blocker** or **suggestion**.
+### 7. Data Engineer Review (Optional)
+**Skip this review if the change has no `data.yml` and doesn't touch data models, schemas, migrations, or external API integrations.**
+If the change touches data, adopt the perspective of a **data engineer** reviewing the schema design.
+Read:
+- `.grimoire/changes/<change-id>/data.yml` — proposed schema changes
+- `.grimoire/docs/data/schema.yml` — current schema baseline (if it exists)
+Evaluate:
+- **Schema design**: Are field types appropriate? Are there missing constraints (not_null, unique, indexes) that will cause problems at scale? Are enums used where they should be?
+- **Migrations**: Will the proposed changes require a data migration? Is it safe to run on a live database (e.g., adding a nullable column is safe, renaming a column is not)?
+- **Relationships**: Are foreign keys and references correct? Are there missing indexes on foreign keys? Could any relationships create N+1 query problems?
+- **Naming**: Do new fields/models follow the existing naming conventions in schema.yml?
+- **Backwards compatibility**: Will the schema change break existing API consumers, queries, or reports? Are there downstream dependencies?
+- **External APIs**: If adding a new external API dependency, is the `schema_ref` pointing to a stable spec? Is there a fallback if the API is unavailable? Is the client wrapper in the right place?
+- **Contract breaking changes**: Compare `data.yml` against `schema.yml` for any external API with `action: modify`. If the change removes a required response field, changes a field type, renames a field, or adds a new required request field — it's a **breaking contract change**. Flag as **blocker** unless the change documents a migration path (versioned endpoint, fallback handling, or coordinated deployment). Adding optional response fields is safe. Adding optional request fields is safe if the API has a default.
+- **Data integrity**: Are there edge cases where data could end up in an inconsistent state? Should any changes be wrapped in a transaction?
+Output a short list of findings — flag issues as **blocker** or **suggestion**.
+### 8. Present Findings
+Compile all reviews into a single summary:
+```markdown
+# Design Review: <change-id>
+## Product Manager
+- **[blocker]** Missing error scenario for invalid email format in registration feature
+- **[suggestion]** Add a scenario for password strength feedback
+## Senior Engineer
+- **[blocker]** Task 2.3 references `auth/views.py` but the project uses `accounts/views.py`
+- **[suggestion]** Reuse `validate_email()` from `utils/validators.py` instead of writing a new one
+## Security Engineer
+- **[suggestion]** Add rate limiting scenario for login attempts
+- No other security concerns for this change.
+## QA Engineer
+- **[blocker]** No negative scenario for expired TOTP codes — testers can't verify error handling
+- **[suggestion]** Add scenario for what happens when 2FA service is unreachable
+(or: "No user-facing behavior changes — skipped.")
+## Data Engineer
+- **[blocker]** Missing index on `profiles.user_id` — will cause full table scans on join queries
+- **[suggestion]** `avatar_url` should have a max_length constraint
+(or: "No data changes in this design — skipped.")
+## Summary
+- **3 blockers** — must be addressed before coding
+- **3 suggestions** — consider addressing
+Recommendation: Fix blockers, then proceed to apply.
+```
+### 9. Iterate
+- If there are **blockers**, tell the user which artifacts need updating (features, decisions, or tasks) and offer to help fix them
+- If only **suggestions**, present them and let the user decide which to address
+- If **no issues**, confirm the design is ready and suggest proceeding to `grimoire-apply`
+- Do NOT proceed to apply without user approval
+## Important
+- This is a design review, not a code review. Focus on the specifications and plan, not hypothetical implementation details.
+- Be direct. Don't pad findings with praise or soften blockers. The goal is to catch problems before code is written, when they're cheap to fix.
+- A blocker means "if we code this as-is, we'll have to come back and redo work." A suggestion means "this would improve the design but isn't blocking."
+- Keep each persona's review focused and short. Three bullet points that matter are better than ten that don't.
+- If the change is trivial (e.g., rename a field, fix a typo in a feature), say so and don't manufacture issues.
+## Done
+When findings are presented and blockers resolved (or accepted), the review is complete. Suggest proceeding to `grimoire-apply`.

package/skills/grimoire-verify/SKILL.md ADDED Viewed

@@ -0,0 +1,223 @@
+---
+name: grimoire-verify
+description: Verify that implementation matches feature specs and decision records. Use after apply is complete, before archiving the change.
+compatibility: Designed for Claude Code (or similar products)
+metadata:
+  author: kiwi-data
+  version: "0.1"
+---
+# grimoire-verify
+Verify that implementation matches the feature specs and decision records. Run after apply, before archive.
+## Triggers
+- User wants to verify a grimoire change is correctly implemented
+- User asks to check, verify, or review a change before archiving
+- Loose match: "verify", "check", "review" with a change reference
+## Routing
+- Change not yet applied → `grimoire-apply` first
+- Want a pre-implementation design review → `grimoire-review`
+- Found issues that need fixing → user decides: fix directly or route to `grimoire-apply` / `grimoire-draft`
+## Prerequisites
+- A change exists in `.grimoire/changes/<change-id>/` with completed tasks
+- Or: user wants to verify baseline features against the codebase (no active change required)
+## Workflow
+### 1. Select Scope
+Two modes:
+**Change verification** (default when a change exists):
+- Select an active change with completed tasks
+- Verify the implementation matches that specific change's features and decisions
+**Baseline verification** (when user asks to verify the whole project):
+- Verify all features in `features/` against the codebase
+- Check all decisions in `.grimoire/decisions/` are still accurate
+### 2. Load Artifacts
+For change verification:
+- Read `manifest.md`, proposed `.feature` files, decision records, `tasks.md`
+For baseline verification:
+- Read all `features/**/*.feature` and `.grimoire/decisions/*.md`
+### 3. Verify in Three Dimensions
+**A. Completeness — are all tasks done?**
+- Parse `tasks.md` and check all items are `- [x]`
+- If any are `- [ ]`, list them as CRITICAL issues
+- This is objective — checkboxes don't lie
+**B. Correctness — does the code match the specs?**
+For each scenario in the feature files:
+1. Search the codebase for the production code that implements this behavior
+2. Search for the step definition that tests this scenario
+3. Verify the step definition makes real assertions (not empty, not `assert True`, not `pass`)
+4. If possible, confirm the test actually runs (check test output, CI results)
+Flag issues:
+- Scenario with no corresponding step definition → CRITICAL
+- Step definition with empty/trivial body → CRITICAL
+- Step definition that doesn't match the scenario's intent → WARNING
+- Production code not found for a scenario → WARNING (may be indirect)
+**C. Coherence — does the implementation follow the decisions?**
+For each decision record:
+1. Read the chosen option and consequences
+2. Search the codebase for evidence the decision was followed
+3. Check the Confirmation section — has the criteria been met?
+Flag issues:
+- Decision says "use PostgreSQL" but code uses SQLite → CRITICAL
+- Decision's Confirmation criteria not verifiable → WARNING
+- Decision consequences not addressed → WARNING
+### 3.D Test Quality Intelligence
+Go beyond "does a step definition exist?" to "would this test catch a real bug?"
+For each step definition:
+1. **Assertion strength:** Classify each assertion:
+   - **Strong:** `assert result == "expected_value"`, `expect(status).toBe(302)`, `assertEqual(user.email, "test@example.com")`
+   - **Weak:** `assert result is not None`, `expect(result).toBeDefined()`, `assert len(items) > 0`
+   - **Trivial:** `assert True`, `pass`, empty body, `expect(true).toBe(true)`
+2. **Null implementation test:** Could this test pass if the function under test returned `None`, `[]`, `{}`, or `0`? If yes, the test is too weak.
+3. **Common anti-patterns to flag:**
+   - Step definition body is just `pass` or `...` → CRITICAL
+   - Assertion only checks `is not None` or `toBeDefined()` → WARNING
+   - Assertion checks type only (`isinstance()`) without checking value → WARNING
+   - Test creates a mock and then asserts against the mock's return value (circular) → CRITICAL
+   - Try/except that swallows assertion errors → CRITICAL
+   - Step definition has no `assert`/`expect` at all → CRITICAL (for Then steps)
+   - Test mocks the client wrapper instead of the HTTP boundary → WARNING (tests wiring, not contract compliance)
+   - Test mocks internal code that lives in the same repo → WARNING (hides integration bugs)
+   - Contract test uses a fixture that doesn't match `schema.yml` → CRITICAL (fictional contract)
+   - Test mocks so aggressively that removing production code still passes → CRITICAL
+4. **Report format:** Include test quality findings alongside correctness findings:
+   ```
+   - **[critical]** `test_auth.py:42` — step "Then I should be redirected" has no assertion (empty body)
+   - **[warning]** `test_auth.py:58` — step "Then user should exist" only asserts `is not None` — check the actual user properties
+   ```
+If `grimoire test-quality` CLI command is available, suggest running it for a comprehensive analysis.
+To run tests directly: use `config.tools.bdd_test` for BDD and `config.tools.unit_test` for unit tests.
+### 4. Security Compliance Verification
+Verify that security guidance from plan and review stages was followed in implementation. Read `../references/security-compliance.md` for the full checklist.
+**A. Check plan-stage security patterns:**
+Confirm the implementation uses proven patterns: framework auth (not custom), bcrypt/argon2 (not MD5/SHA), parameterized queries (not string concatenation), CSRF protection, input validation at boundary, no hardcoded secrets.
+**B. Check review findings were addressed:**
+If a `grimoire-review` was run, list each **blocker** from the Security Engineer review. Search the implementation for evidence each was fixed. Unaddressed blockers → CRITICAL.
+**C. OWASP Top 10 surface scan:**
+Scan changed files against the OWASP table in `../references/security-compliance.md`. Tag findings with OWASP category and CWE ID.
+**D. Verify security-tagged scenarios:**
+Check feature files for security tags. For each, verify per the rules in `../references/security-compliance.md`. A security-tagged scenario with no security verification in tests → CRITICAL.
+If no security tags exist and the change has no security surface, state so briefly and move on.
+### 5. Contract Test Coverage
+Verify that every external API integration has contract tests that match the documented contract.
+**A. Inventory external APIs:**
+Read `.grimoire/docs/data/schema.yml` and list every entry with `type: external_api`. For each:
+1. **Contract documented?** Check that the entry has `endpoints` with `request`, `response`, and `error_response` shapes. Missing contract documentation → WARNING (the contract is implicit and untested)
+2. **Contract test exists?** Search the test suite for tests that validate the client against the documented response shape. Look for:
+   - Tests that assert specific response fields match expected types/values
+   - Tests that use fixture/recorded responses matching the `schema.yml` shape
+   - Tests that verify error handling matches the documented `error_response`
+   - Missing contract test for a documented API → CRITICAL
+3. **Contract test matches schema?** Compare the fixture/recorded response used in tests against the `schema.yml` contract:
+   - Fixture has fields not in `schema.yml` → WARNING (undocumented dependency)
+   - `schema.yml` has `required: true` fields not asserted in tests → WARNING (untested contract guarantee)
+   - Client reads fields not in `schema.yml` → CRITICAL (invisible contract dependency)
+4. **Contract drift?** If this is a change verification (not baseline), compare `data.yml` against `schema.yml`:
+   - Any field changes on external APIs without corresponding test updates → CRITICAL
+   - New endpoints without contract tests → CRITICAL
+**Report format:**
+```markdown
+## Contract Coverage
+- [x] `stripe_api` — 3 endpoints, all with contract tests in `tests/integrations/test_stripe.py`
+- [ ] **[critical]** `github_api.get_user` — no contract test found for response shape
+- [ ] **[warning]** `sendgrid_api` — contract documented but `error_response` shape missing
+- [ ] **[critical]** `payments_api` — client reads `transaction.metadata.source` not in schema.yml (undocumented field dependency)
+```
+If no external APIs exist in `schema.yml`, skip this section.
+### 6. Dead Feature Detection
+Check for features that exist in specs but may no longer be implemented:
+- Feature files with no corresponding step definitions anywhere
+- Step definitions that import modules/functions that no longer exist
+- Step definitions with `pass` or `NotImplementedError` bodies
+- Features tagged `@skip` or `@wip` that have been in that state for a long time
+### 7. Generate Report
+Produce a structured report:
+```markdown
+# Verification Report: <change-id or "baseline">
+## Summary
+- Scenarios verified: X
+- Decisions verified: X
+- Security checks: X passed, X failed
+- Issues found: X critical, X warnings, X suggestions
+## Critical Issues
+- [ ] <issue description> — `file:line`
+## Security Compliance
+- [x] Verified: <security pattern confirmed> — `file:line`
+- [ ] **[critical]** [OWASP/CWE tag] <violation> — `file:line`
+- [ ] **[warning]** [OWASP/CWE tag] <concern> — `file:line`
+## Warnings
+- [ ] <issue description> — `file:line`
+## Suggestions
+- [ ] <suggestion> — `file:line`
+## Verified Scenarios
+- [x] "Scenario name" in `feature/file.feature` — step def in `test_file.py:42`
+- [x] ...
+```
+### 8. Recommend Next Steps
+Based on the report:
+- **All clear** → recommend archiving the change
+- **Critical issues** → must fix before archiving
+- **Warnings only** → user decides whether to fix or accept
+- **Dead features found** → suggest a removal change or updating the features
+## Important
+- Verify is read-only. Do NOT fix issues — only report them. The user decides what to do.
+- Be specific: reference file paths and line numbers for every issue.
+- A scenario without a step definition is always CRITICAL — the spec is not tested.
+- A step definition with no assertions is always CRITICAL — it's a false positive.
+- Don't verify implementation details — only verify that the behavior described in the scenario is covered.
+- For baseline verification, this may take a while on large codebases. Present results incrementally by capability.
+## Done
+When the verification report is presented, the workflow is complete. Suggest next steps based on findings:
+- **All clear** → `grimoire archive <change-id>` or `grimoire-pr`
+- **Critical issues** → must fix before archiving
+- **Warnings only** → user decides whether to fix or accept

package/skills/references/bug-classification.md ADDED Viewed

@@ -0,0 +1,154 @@
+# Bug Classification Taxonomy
+8-way root cause classification for bug triage. Used by bug-triage (full classification), bug (light classification).
+## Categories
+### CODE — Application defect
+The code doesn't match the spec, or the behavior is clearly wrong due to a bug in the application logic.
+**Signals:**
+- The bug reproduces in tests
+- The code path has an obvious logic error, missing edge case, or regression
+- `git log` shows a recent change that introduced the issue
+- The spec is clear and the implementation diverges from it
+### INFRASTRUCTURE — Platform or deployment issue
+The application code is correct, but the environment it runs in is broken or misconfigured.
+**Signals:**
+- Works locally or in other environments, fails in a specific one
+- Related to resources (memory, CPU, disk, network timeouts)
+- Deploy pipeline, container, or orchestration issue
+- Database server, cache, or queue is degraded
+- DNS, load balancer, or certificate problem
+**Examples:** staging database overloaded, k8s pod OOM-killed, CDN serving stale assets, Redis connection pool exhausted.
+### CONFIGURATION — Environment or feature config issue
+The code is correct and infrastructure is healthy, but the environment is configured wrong.
+**Signals:**
+- Feature flag is off when it should be on (or vice versa)
+- Environment variable is missing, wrong, or pointing to the wrong resource
+- Permissions or CORS settings differ between environments
+- A migration ran in one environment but not another
+**Examples:** `STRIPE_API_KEY` pointing to test mode in production, feature flag `enable-2fa` disabled on staging, missing database migration on QA.
+### DATA — Data integrity or content issue
+The code and config are correct, but the data is bad, missing, or in an unexpected state.
+**Signals:**
+- Only affects specific records, accounts, or tenants
+- Data doesn't match expected schema or constraints
+- Related to a recent data migration, import, or manual edit
+- Null/missing where a value is expected
+**Examples:** user record has null email from a botched migration, product has negative price from a CSV import, orphaned foreign key from a deleted parent.
+### THIRD-PARTY — External service or dependency issue
+The issue originates outside the application boundary — in a vendor API, library, or upstream service.
+**Signals:**
+- Third-party status page shows an incident
+- API responses from the vendor changed format or started returning errors
+- Library behavior changed after an update
+- Issue only occurs when the external service is involved
+**Examples:** Stripe webhook format changed, SendGrid rate-limiting, a library upgrade introduced a breaking change, OAuth provider returning new error codes.
+### SECURITY — Vulnerability or security defect
+The issue has security implications — unauthorized access, data exposure, injection, privilege escalation, or other vulnerabilities. May overlap with CODE, CONFIGURATION, or INFRASTRUCTURE but the security dimension changes how it's handled.
+Check the report's `security: true` flag — the bug-report skill auto-screens for security signals. But also evaluate during investigation even if the flag wasn't set.
+**Signals:**
+- Authentication or authorization bypass — accessing resources without proper credentials or acting as another user
+- Data exposure — PII, credentials, or internal data visible to unauthorized parties (in responses, logs, error messages, URLs)
+- Injection — SQL, XSS, command injection, template injection, SSRF
+- Privilege escalation — performing actions above the user's role
+- Credential/secret leakage — API keys, tokens, or passwords in source code, logs, client-side bundles, or error responses
+- Broken access control — IDOR (insecure direct object references), missing ownership checks, horizontal privilege escalation
+- Cryptographic issues — weak hashing, plaintext storage, broken TLS configuration
+- Denial of service — unbounded queries, resource exhaustion, regex DoS
+**Severity uses a security-specific scale:**
+- **critical** — active exploitation possible, data breach risk, auth bypass on production
+- **high** — exploitable vulnerability but requires specific conditions or authenticated access
+- **medium** — security weakness that increases risk but isn't directly exploitable (e.g., missing rate limiting, verbose error messages leaking internals)
+- **low** — defense-in-depth improvement, hardening recommendation (e.g., missing security headers, overly permissive CORS in dev)
+**Examples:** user can view other users' invoices by changing the ID in the URL (IDOR), admin API endpoint has no auth check, SQL injection in search query, JWT secret is hardcoded in source, error pages expose stack traces and DB connection strings.
+### DOCUMENTATION — Correct behavior, wrong expectations
+The application works as designed, but the user's expectation doesn't match reality because documentation, training, or UX is misleading.
+**Signals:**
+- Feature spec clearly describes the reported behavior as correct
+- The reporter's expectation is reasonable but doesn't match the design
+- Help text, tooltips, or docs describe different behavior than what's implemented
+- Onboarding or training missed this workflow
+**Examples:** user expects instant unlock but spec says 30-minute cooldown, docs say "click Save" but the button is labeled "Apply", reported "bug" is actually an undocumented limitation.
+### NOT A BUG — Cannot reproduce or invalid
+After thorough investigation, the reported issue is not reproducible or the report is invalid.
+**This still requires evidence.** Never dismiss with "works for me." Document:
+- Exactly what you tried
+- In what environment, with what data
+- Why you believe the issue is not valid
+- What follow-up questions might clarify
+## Triage Decision Outcomes
+After classification, one of four outcomes:
+### VALIDATE + ROUTE
+The issue is real. Classify it AND route it:
+| Classification | Route to | Next action |
+|---|---|---|
+| **Code** | Developer (this team) | → `grimoire-bug` for repro test + fix |
+| **Infrastructure** | Infra/DevOps/SRE | Create or update ticket for the infra team with evidence |
+| **Configuration** | DevOps or config owner for the affected environment | Describe the specific misconfiguration and expected correct value |
+| **Data** | Developer or DBA depending on scope | Describe affected records and whether a migration/script is needed |
+| **Third-party** | Developer (workaround) + vendor (upstream fix) | Document the vendor issue, check for workarounds, file upstream if possible |
+| **Security** | Security lead + developer (see special handling below) | Confidential fix, may trigger incident response |
+### REJECT — Not a bug
+The reported behavior is correct and the expectations are wrong, or the issue cannot be reproduced.
+Rejection **requires evidence**. Provide one of:
+- **By design** — cite the specific feature scenario or decision record. Quote the spec.
+- **Cannot reproduce** — document exactly what you tried, in what environment, with what data.
+- **Duplicate** — reference the existing bug report or fix.
+### REDIRECT — Documentation/training issue
+The behavior is correct but the user's confusion is valid. The fix is better docs, UX copy, or training — not a code change.
+1. Update status to `redirected`
+2. Explain why the behavior is correct (cite specs)
+3. Recommend specific documentation or UX improvements
+4. Offer to file a separate improvement ticket for the docs/UX fix
+### NEEDS INFO — Can't decide yet
+The report is incomplete or ambiguous. Generate specific follow-up questions — not "can you provide more details?" but:
+- "Does this happen with all user roles or just admin?"
+- "Which environment — dev, staging, or production?"
+- "Can you share the exact error message or a screenshot?"
+- "Is this specific to certain records/accounts, or does it affect everyone?"