npm - zenkit - Versions diffs - 0.5.0 - Mend

zenkit 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/CONTRIBUTING.md +63 -0
package/LICENSE +21 -0
package/README.md +242 -0
package/agents/backend-architect.md +19 -0
package/agents/frontend-architect.md +19 -0
package/agents/implementation-auditor.md +19 -0
package/agents/product-manager.md +19 -0
package/agents/qa-test-engineer.md +19 -0
package/agents/security-specialist.md +19 -0
package/agents/system-architect.md +19 -0
package/agents/technical-writer.md +19 -0
package/agents/ux-engineer.md +19 -0
package/benchmark/feature-specs/cli-tool.json +58 -0
package/benchmark/feature-specs/handoff-system.json +69 -0
package/benchmark/feature-specs/protocol-completeness.json +85 -0
package/benchmark/feature-specs/schema-validator-baseline.json +93 -0
package/benchmark/feature-specs/schema-validator-playground.json +92 -0
package/benchmark/feature-specs/self-audit.json +76 -0
package/benchmark/fixtures/valid-handoff.json +13 -0
package/benchmark/scripts/compare.ts +172 -0
package/benchmark/scripts/report.ts +102 -0
package/benchmark/scripts/run-all.ts +125 -0
package/benchmark/scripts/run.ts +595 -0
package/benchmark/scripts/visualize.ts +120 -0
package/bin/zenkit.js +24 -0
package/commands/audit.md +28 -0
package/commands/build.md +26 -0
package/commands/checkpoint.md +28 -0
package/commands/handoff.md +28 -0
package/commands/plan.md +27 -0
package/commands/refactor.md +27 -0
package/commands/ship.md +28 -0
package/commands/spec.md +26 -0
package/dist/cli.d.ts +2 -0
package/dist/cli.d.ts.map +1 -0
package/dist/cli.js +174 -0
package/dist/cli.js.map +1 -0
package/dist/index.d.ts +765 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +121 -0
package/dist/index.js.map +1 -0
package/dist/schemas/audit.schema.json +63 -0
package/dist/schemas/benchmark.schema.json +118 -0
package/dist/schemas/checkpoint.schema.json +64 -0
package/dist/schemas/feature-spec.schema.json +76 -0
package/dist/schemas/handoff.schema.json +78 -0
package/dist/schemas/schemas/audit.schema.json +63 -0
package/dist/schemas/schemas/benchmark.schema.json +118 -0
package/dist/schemas/schemas/checkpoint.schema.json +64 -0
package/dist/schemas/schemas/feature-spec.schema.json +76 -0
package/dist/schemas/schemas/handoff.schema.json +78 -0
package/dist/schemas/schemas/task.schema.json +69 -0
package/dist/schemas/task.schema.json +69 -0
package/docs/agent-contract.md +36 -0
package/docs/architecture.md +88 -0
package/docs/benchmarking.md +51 -0
package/docs/command-model.md +43 -0
package/docs/philosophy.md +35 -0
package/docs/roadmap.md +43 -0
package/docs/self-audit.md +29 -0
package/hooks/post-change.md +30 -0
package/hooks/pre-change.md +27 -0
package/hooks/pre-ship.md +30 -0
package/package.json +92 -0
package/rubrics/architectural-alignment.md +26 -0
package/rubrics/execution-quality.md +26 -0
package/rubrics/verbosity-score.md +26 -0
package/schemas/audit.schema.json +63 -0
package/schemas/benchmark.schema.json +118 -0
package/schemas/checkpoint.schema.json +64 -0
package/schemas/feature-spec.schema.json +76 -0
package/schemas/handoff.schema.json +78 -0
package/schemas/task.schema.json +69 -0
package/skills/architecture-review.md +17 -0
package/skills/backend-change.md +17 -0
package/skills/bug-triage.md +17 -0
package/skills/frontend-change.md +17 -0
package/skills/prompt-pruning.md +17 -0
package/skills/release-check.md +17 -0
package/skills/security-review.md +17 -0
package/templates/agent.template.md +18 -0
package/templates/command.template.md +21 -0
package/templates/skill.template.md +15 -0
package/templates/task.template.md +19 -0

package/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,63 @@
+# Contributing to ZenKit
+## Setup
+```bash
+git clone https://github.com/carl0zen/zenkit.git
+cd zenkit
+npm install
+npx playwright install chromium  # for E2E tests
+```
+## Verify everything works
+```bash
+npm test                     # 42 unit tests
+npm run lint                 # ESLint
+npm run validate:schemas     # 6 JSON schemas
+npm run benchmark:all        # 5 feature specs, 109+ checks
+npm run build                # Next.js production build
+npm run test:e2e             # 12 Playwright browser tests
+```
+## Adding protocol artifacts
+**New command:** Copy `templates/command.template.md` to `commands/`. Follow the compressed format used by existing commands.
+**New skill:** Copy `templates/skill.template.md` to `skills/`.
+**New agent:** Copy `templates/agent.template.md` to `agents/`.
+**New schema:** Add `schemas/your-name.schema.json`, register in `src/lib/schemas.ts`, add example data in `src/lib/playground-examples.ts`. Run `npm run validate:schemas` to verify.
+## Adding benchmark specs
+1. Create `benchmark/feature-specs/your-feature.json` following the `feature-spec.schema.json` format.
+2. Include at least one `limitations` entry — specs must be honest about what they don't verify.
+3. Run `npm run benchmark your-spec.json` to test it.
+4. Commit the spec. Live results are gitignored — they regenerate on each run.
+### Verification types available
+| Type | What it checks |
+|------|---------------|
+| `file_exists` | File is present |
+| `file_contains` | File contains a string pattern |
+| `schema_count` | Expected number of schemas compile |
+| `examples_valid` | Fixture data validates against schemas |
+| `schemas_consistent` | All schemas use the same draft |
+| `test_passes` | Shell command exits with code 0 |
+| `json_path_equals` | JSON file path equals expected value |
+## PR expectations
+- Tests pass (`npm test`)
+- Lint clean (`npm run lint`)
+- Benchmarks pass (`npm run benchmark:all`)
+- Build succeeds (`npm run build`)
+- No fabricated telemetry or claims — estimated data must be labeled
+- Uncertainty and limitations declared where applicable
+## Design principles
+Keep it thin. If your change adds a major subsystem, a runtime dependency, or theatrical agent language, reconsider. See [docs/philosophy.md](docs/philosophy.md).

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 ZenKit Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,242 @@
+# ZenKit
+**Disciplined workflows for coding agents.**
+ZenKit is a lightweight open-source protocol layer for AI-assisted software building. Commands, schemas, hooks, checkpoints, and handoffs — without framework bloat.
+## Install
+```bash
+npm install zenkit
+```
+### As a library
+```typescript
+import { validate, getSchemaNames, createHandoff, loadFeatureSpec } from 'zenkit'
+// Validate data against any ZenKit schema
+const result = validate('handoff', myHandoffData)
+if (!result.valid) {
+  console.error(result.errors) // [{ path: '/deliverable', message: '...', keyword: '...' }]
+}
+// Create a validated handoff
+const handoff = createHandoff({
+  context: 'Completed auth module',
+  assumptions: ['Redis available'],
+  decision: 'JWT with refresh tokens',
+  deliverable: { type: 'code', description: 'Auth module' },
+  next_agent: 'frontend-architect',
+})
+// Load and validate a feature spec
+const spec = loadFeatureSpec('benchmark/feature-specs/my-feature.json')
+```
+### As a CLI
+```bash
+npx zenkit validate handoff data.json    # Validate JSON against schema
+npx zenkit benchmark:all                 # Run all benchmark specs
+npx zenkit audit                         # Full audit with report
+npx zenkit status                        # Project health check
+npx zenkit init                          # Scaffold ZenKit into a project
+```
+## Problem
+Most AI-assisted development workflows share structural failures unrelated to model capability:
+- **Drift** — Agents wander from the plan. Each step compounds divergence.
+- **Verbosity** — Workflows burn tokens on narration instead of producing artifacts.
+- **Hidden uncertainty** — Agents report success without distinguishing validated from assumed.
+- **Lost context** — Handoffs between agents lose assumptions, constraints, and decisions.
+ZenKit adds structure around your agent runtime. It does not replace it.
+## Architecture
+Six categories of plain-file artifacts:
+| Primitive | Purpose | Format |
+|-----------|---------|--------|
+| **Commands** | 8 workflow verbs: spec, plan, build, audit, refactor, handoff, checkpoint, ship | Markdown |
+| **Schemas** | Machine-validatable contracts for handoffs, tasks, audits, checkpoints, benchmarks | JSON Schema |
+| **Skills** | Reusable capabilities: architecture review, security audit, bug triage, prompt pruning | Markdown |
+| **Hooks** | Automatic validation at workflow boundaries | Markdown |
+| **Checkpoints** | State snapshots with gate conditions — validated facts vs. assumptions | JSON Schema |
+| **Rubrics** | Evaluation criteria scored 0-10 | Markdown |
+### Standard output contract
+Every command produces output aligned to:
+```
+context, assumptions, constraints, decision,
+deliverable, risks, open_questions, next_agent
+```
+## Quickstart
+```bash
+npm install
+# CLI
+npm run zenkit status           # Project health check
+npm run zenkit validate handoff data.json  # Validate against schema
+npm run zenkit benchmark:all    # Run all 5 benchmark specs
+# Development
+npm run dev              # Landing page at localhost:3000, playground at /playground
+npm test                 # 54 unit tests
+npm run test:e2e         # 13 Playwright E2E browser tests
+npm run lint             # ESLint
+npm run build            # Production build
+# Benchmarking
+npm run benchmark        # Single spec (schema validator playground)
+npm run benchmark:all    # All 5 specs (131 checks, 44 criteria)
+npm run benchmark:report # Markdown report from latest result
+npm run benchmark:compare # ZenKit vs baseline comparison
+npm run benchmark:visualize -- --summary  # Mermaid workflow diagram
+```
+## Workflow
+```
+/spec → /plan → /build → /audit → /checkpoint → /ship
+                  ↑         |
+                  └─────────┘  (audit loop)
+```
+Lateral: `/refactor` (behavior-preserving improvement), `/handoff` (agent-to-agent context transfer).
+## Benchmarking
+ZenKit benchmarks verify acceptance criteria against the actual implementation — not file existence, not narrative claims.
+### Current coverage
+5 feature specs with 44 acceptance criteria and 131 total checks:
+| Spec | Criteria | Checks |
+|------|----------|--------|
+| Schema Validator Playground | 8 | 25 |
+| Handoff Contract System | 9 | 24 |
+| Protocol Completeness | 10 | 37 |
+| Self-Audit | 10 | 25 |
+| CLI Tool | 7 | 20 |
+### Verification types
+- `file_exists` — File is present
+- `file_contains` — File contains a specific string pattern
+- `schema_count` — Expected number of schemas compile
+- `examples_valid` — Fixture data validates against schemas
+- `schemas_consistent` — All schemas use the same JSON Schema draft
+### Telemetry honesty
+- **Estimated** data includes a `basis` field explaining the heuristic.
+- **Actual** telemetry is `null` when no API instrumentation is available. Never fabricated.
+- Every result includes `uncertainty` and `limitations` arrays.
+### Baseline comparison
+ZenKit supports `zenkit` and `baseline` modes. Current comparison data is **illustrative** — both modes verify the same codebase. A meaningful comparison requires A/B workflow execution.
+### Self-audit
+ZenKit uses its own benchmark system to audit itself. This is structured introspection, not self-certification. See [docs/self-audit.md](docs/self-audit.md).
+### Workflow visualization
+```bash
+npm run benchmark:visualize -- --summary  # Mermaid diagram of all specs
+npm run benchmark:visualize               # Mermaid diagram of single result
+```
+## API Reference
+### `validate(schemaName, data)`
+Validate data against a ZenKit schema. Returns `{ valid, errors, schemaName }`.
+### `getSchemaNames()`
+Returns array of all schema names: `handoff`, `task`, `audit`, `checkpoint`, `benchmark`, `feature-spec`.
+### `getSchema(name)`
+Returns the raw JSON Schema object for a named schema.
+### `createHandoff(data)`
+Create and validate a handoff object. Returns the handoff if valid, throws if invalid.
+### `loadFeatureSpec(path)`
+Load a feature spec from a JSON file. Validates against `feature-spec.schema.json`. Returns the spec if valid, throws if invalid.
+## Schema Validator Playground
+Interactive tool at `/playground` for validating JSON against ZenKit schemas. Client-side validation with Ajv, pre-loaded examples, detailed error paths.
+## CLI
+```bash
+npm run zenkit help                         # All commands
+npm run zenkit status                       # Project health
+npm run zenkit validate <schema> <file>     # Validate JSON
+npm run zenkit validate:all                 # Check all schemas compile
+npm run zenkit benchmark [spec]             # Run single benchmark
+npm run zenkit benchmark:all                # Run all benchmarks
+npm run zenkit audit                        # Run all benchmarks + produce audit report
+npm run zenkit init [dir]                   # Scaffold ZenKit into a project
+```
+## Test coverage
+| Layer | Tests | What it covers |
+|-------|-------|----------------|
+| Unit (Vitest) | 54 | Schema validation, example data, edge cases, benchmark results, CLI commands, public API, handoff creation, feature spec loading |
+| E2E (Playwright) | 13 | Playground UI, schema selection, validation flows, format button, landing page sections, navigation |
+| Benchmarks | 131 checks | Code structure, schema compilation, test execution, JSON values, documentation, self-audit, CLI |
+## Extending
+```
+templates/command.template.md  → commands/
+templates/skill.template.md   → skills/
+templates/agent.template.md   → agents/
+```
+Custom schemas: add to `schemas/`, register in `src/lib/schemas.ts`, add example data in `src/lib/playground-examples.ts`.
+## Design Principles
+1. **Thin over grand** — Smallest architecture that works.
+2. **Protocol over persona** — Schemas and contracts, not theatrical agent identities.
+3. **Bounded autonomy** — Assumptions explicit. Uncertainty recorded. Claims bounded.
+4. **Validation over narration** — Tests, schemas, and artifacts over prose.
+5. **Low drift** — Commands and handoffs force consistency.
+6. **Benchmarkable** — Acceptance criteria, not file existence.
+## CI
+GitHub Actions runs on push/PR to main: lint, unit tests, schema validation, all benchmarks, build, E2E tests. Benchmark results uploaded as artifacts.
+## Documentation
+- [Philosophy](docs/philosophy.md) — Design principles.
+- [Architecture](docs/architecture.md) — Primitives and workflow composition.
+- [Command Model](docs/command-model.md) — The 8 commands and output contract.
+- [Agent Contract](docs/agent-contract.md) — Agent definitions and handoff chains.
+- [Benchmarking](docs/benchmarking.md) — The benchmark system.
+- [Self-Audit](docs/self-audit.md) — Self-verification and its limits.
+- [Roadmap](docs/roadmap.md) — What's done and what's next.
+## License
+MIT — see [LICENSE](LICENSE).

package/agents/backend-architect.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Backend Architect
+> Designs and implements backend systems, APIs, and data models.
+**Owns:** Backend implementation. Translates system architecture into working backend code including APIs, data models, business logic, and infrastructure configuration. Ensures the backend is performant, secure by default, and well-tested.
+**Receives from:** `system-architect` (backend component specs, API contracts, data flow requirements)
+**Hands off to:** `qa-test-engineer`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Modify frontend code or UI components
+- Change API contracts without coordinating with system-architect
+- Skip writing tests for new endpoints or business logic
+**Quality bar:**
+- All API endpoints have request/response validation with consistent error format
+- Data models include migrations and rollback paths
+- Unit tests cover core business logic; no raw SQL or unparameterized queries

package/agents/frontend-architect.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Frontend Architect
+> Designs and implements frontend components, pages, and interactions.
+**Owns:** Frontend implementation. Translates system architecture and UX requirements into working frontend code including components, pages, state management, and API integrations. Ensures the frontend is responsive, accessible, and follows the design system.
+**Receives from:** `system-architect` (frontend component specs, API contracts)
+**Hands off to:** `ux-engineer`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Modify backend code or API implementations
+- Invent new design patterns that conflict with the existing design system
+- Skip accessibility attributes on interactive elements
+**Quality bar:**
+- Components are reusable and follow project conventions; all interactive elements have ARIA attributes
+- State management is predictable with no unnecessary global state
+- Component tests cover rendering, interactions, and edge cases; no hardcoded user-facing strings

package/agents/implementation-auditor.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Implementation Auditor
+> Final quality review across all dimensions before shipping.
+**Owns:** The final quality gate. Performs comprehensive review of the complete implementation against requirements, architecture, code quality, test coverage, and security findings. Decides ship or return for corrections.
+**Receives from:** Build agents, `qa-test-engineer` (test results/coverage), `security-specialist` (audit report), `product-manager` (acceptance criteria)
+**Hands off to:** `technical-writer` (if shipping) or back to the relevant build agent (if corrections needed)
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Implement fixes directly (return work to the responsible agent)
+- Relax quality standards without explicit stakeholder approval
+- Block shipment for cosmetic issues when quality thresholds are met
+**Quality bar:**
+- Every acceptance criterion is verified as met or explicitly flagged
+- Rubric score meets project minimum (default 7/10); all critical/high security findings resolved
+- Test coverage meets thresholds; audit report is complete with no dimensions left unreviewed

package/agents/product-manager.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Product Manager
+> Defines requirements, priorities, and acceptance criteria for every task.
+**Owns:** The "what" and "why" of every piece of work. Translates user needs and business goals into clear, prioritized requirements with measurable acceptance criteria and a definition of done.
+**Receives from:** User requests, feature ideas, bug reports, business context.
+**Hands off to:** `system-architect`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Make architectural or implementation decisions
+- Write code or design system internals
+- Approve its own requirements without external review
+**Quality bar:**
+- Every requirement has at least one measurable acceptance criterion
+- Priorities are explicitly ranked; scope is bounded with out-of-scope items deferred
+- No implementation language or technology prescribed unless it is a hard constraint

package/agents/qa-test-engineer.md ADDED Viewed

@@ -0,0 +1,19 @@
+# QA Test Engineer
+> Creates test strategies and writes automated tests across all layers.
+**Owns:** Test coverage and quality assurance. Designs test strategies, writes automated tests (unit, integration, e2e), and validates implementations against acceptance criteria. Ensures the test suite is reliable, fast, and provides meaningful coverage.
+**Receives from:** `backend-architect`, `frontend-architect`, or `ux-engineer` (implemented code, acceptance criteria, API contracts)
+**Hands off to:** `security-specialist` or `implementation-auditor`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Modify production code to make tests pass (report the issue instead)
+- Reduce coverage to speed up the test suite
+- Write tests that depend on execution order or shared mutable state
+**Quality bar:**
+- Every acceptance criterion has at least one corresponding test
+- Tests are deterministic; names clearly describe the scenario and expected outcome
+- Coverage meets project thresholds; tests run under the configured CI time budget

package/agents/security-specialist.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Security Specialist
+> Audits for security vulnerabilities and compliance across all layers.
+**Owns:** Security posture. Performs security audits on code, configurations, and dependencies. Identifies vulnerabilities, recommends mitigations, and validates that security best practices and compliance requirements are met.
+**Receives from:** `qa-test-engineer` (tested code, architecture design, compliance requirements)
+**Hands off to:** `implementation-auditor`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Implement fixes directly (report findings for build agents to fix)
+- Approve security exceptions without documenting the accepted risk
+- Perform destructive testing against production systems
+**Quality bar:**
+- All findings include a severity rating (critical/high/medium/low/informational)
+- Critical and high findings include specific remediation steps
+- Dependency audit covers known CVEs; auth flows validated; secrets verified absent from code

package/agents/system-architect.md ADDED Viewed

@@ -0,0 +1,19 @@
+# System Architect
+> Designs overall system architecture and defines component boundaries.
+**Owns:** High-level technical design. Decomposes requirements into system components, defines boundaries and interfaces, selects key technologies, and produces an architecture plan that backend and frontend architects can independently execute against.
+**Receives from:** `product-manager` (approved requirements with acceptance criteria)
+**Hands off to:** `backend-architect` and/or `frontend-architect`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Implement code directly
+- Define UI layouts or visual design
+- Override product requirements without escalating to product-manager
+**Quality bar:**
+- Every component has a defined responsibility and clear interface
+- Data flow between components is explicitly documented
+- Technology choices include rationale and at least one considered alternative

package/agents/technical-writer.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Technical Writer
+> Creates documentation, guides, and API references for shipped work.
+**Owns:** All user-facing and developer-facing documentation. Produces clear, accurate, maintainable docs including API references, usage guides, changelogs, and inline code documentation. Ensures documentation stays in sync with the implementation.
+**Receives from:** `implementation-auditor` (approved implementation, API contracts, architecture context)
+**Hands off to:** Terminal (documentation complete) or ship process.
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Modify production code beyond documentation comments
+- Invent behavior that is not implemented; document only what exists
+- Use jargon without defining it on first use
+**Quality bar:**
+- Every public API has documented signature, description, parameters, return value, and at least one example
+- Guides include prerequisites and step-by-step instructions; verbosity score 7/10 or higher
+- All code examples are verified against current implementation; changelog entries follow project format

package/agents/ux-engineer.md ADDED Viewed

@@ -0,0 +1,19 @@
+# UX Engineer
+> Implements UX specifications with accessibility and design system compliance.
+**Owns:** The user experience layer. Reviews and refines frontend components to meet UX specifications, accessibility standards (WCAG 2.1 AA), and design system compliance. Bridges the gap between design intent and implementation reality.
+**Receives from:** `frontend-architect` (implemented components, design system tokens/guidelines)
+**Hands off to:** `qa-test-engineer`
+**Must produce:** context, assumptions, constraints, decision, deliverable, risks, open_questions, next_agent
+**Must NOT:**
+- Alter business logic or data handling
+- Introduce new design tokens without design system approval
+- Remove functionality to achieve design compliance
+**Quality bar:**
+- All interactive elements are keyboard navigable; color contrast meets WCAG 2.1 AA minimums
+- Components use design system tokens exclusively; no magic numbers or hardcoded colors
+- Focus management is correct for modals/drawers/dynamic content; reduced-motion preferences respected

package/benchmark/feature-specs/cli-tool.json ADDED Viewed

@@ -0,0 +1,58 @@
+{
+  "feature_id": "cli-001",
+  "name": "ZenKit CLI Tool",
+  "description": "The zenkit CLI provides validate, benchmark, audit, init, and status commands.",
+  "mode": "zenkit",
+  "acceptance_criteria": [
+    {
+      "id": "cli-1",
+      "description": "CLI entry point exists and is executable",
+      "verification": { "type": "file_exists", "path": "bin/zenkit.ts" }
+    },
+    {
+      "id": "cli-2",
+      "description": "CLI exports a bin field in package.json",
+      "verification": { "type": "file_contains", "path": "package.json", "pattern": "\"zenkit\":" }
+    },
+    {
+      "id": "cli-3",
+      "description": "CLI help command works",
+      "verification": { "type": "test_passes", "command": "npx tsx bin/zenkit.ts help" }
+    },
+    {
+      "id": "cli-4",
+      "description": "CLI status command works",
+      "verification": { "type": "test_passes", "command": "npx tsx bin/zenkit.ts status" }
+    },
+    {
+      "id": "cli-5",
+      "description": "CLI validate:all command works",
+      "verification": { "type": "test_passes", "command": "npx tsx bin/zenkit.ts validate:all" }
+    },
+    {
+      "id": "cli-6",
+      "description": "CLI validates a valid fixture",
+      "verification": { "type": "test_passes", "command": "npx tsx bin/zenkit.ts validate handoff benchmark/fixtures/valid-handoff.json" }
+    },
+    {
+      "id": "cli-7",
+      "description": "Package has repository field for npm",
+      "verification": { "type": "json_path_equals", "path": "package.json", "json_path": "repository.type", "equals": "git" }
+    }
+  ],
+  "constraints": [
+    "CLI must work via npx tsx without compilation step",
+    "All commands must exit 0 on success, non-zero on failure"
+  ],
+  "expected_files": [
+    "bin/zenkit.ts",
+    "package.json"
+  ],
+  "assigned_commands": ["build", "audit"],
+  "estimated_complexity": "low",
+  "limitations": [
+    "Verifies commands exit successfully, not that their output is semantically correct",
+    "Does not test zenkit init in isolation (tested in unit tests)",
+    "Does not test zenkit audit (slow — runs all benchmarks recursively)"
+  ]
+}

package/benchmark/feature-specs/handoff-system.json ADDED Viewed

@@ -0,0 +1,69 @@
+{
+  "feature_id": "hs-001",
+  "name": "Handoff Contract System",
+  "description": "The structured handoff system that enables agent-to-agent context transfer with schema validation.",
+  "mode": "zenkit",
+  "acceptance_criteria": [
+    {
+      "id": "hs-1",
+      "description": "Handoff schema defines required fields: context, assumptions, decision, deliverable, next_agent",
+      "verification": { "type": "file_contains", "path": "schemas/handoff.schema.json", "pattern": "\"required\"" }
+    },
+    {
+      "id": "hs-2",
+      "description": "Handoff schema enforces deliverable type enum",
+      "verification": { "type": "file_contains", "path": "schemas/handoff.schema.json", "pattern": "\"enum\"" }
+    },
+    {
+      "id": "hs-3",
+      "description": "Handoff schema disallows additional properties",
+      "verification": { "type": "file_contains", "path": "schemas/handoff.schema.json", "pattern": "additionalProperties" }
+    },
+    {
+      "id": "hs-4",
+      "description": "Validation engine can validate handoff data",
+      "verification": { "type": "file_contains", "path": "src/lib/schemas.ts", "pattern": "handoff:" }
+    },
+    {
+      "id": "hs-5",
+      "description": "Example handoff data exists in playground examples",
+      "verification": { "type": "file_contains", "path": "src/lib/playground-examples.ts", "pattern": "handoff:" }
+    },
+    {
+      "id": "hs-6",
+      "description": "Handoff fixture validates against schema",
+      "verification": { "type": "examples_valid" }
+    },
+    {
+      "id": "hs-7",
+      "description": "Handoff command documentation exists",
+      "verification": { "type": "file_exists", "path": "commands/handoff.md" }
+    },
+    {
+      "id": "hs-8",
+      "description": "Agent contract docs explain handoff chain",
+      "verification": { "type": "file_contains", "path": "docs/agent-contract.md", "pattern": "handoff chain" }
+    },
+    {
+      "id": "hs-9",
+      "description": "Landing page shows handoff example",
+      "verification": { "type": "file_contains", "path": "src/components/HandoffExample.tsx", "pattern": "next_agent" }
+    }
+  ],
+  "constraints": [
+    "Handoff validation must be client-side compatible",
+    "Schema must be strict (additionalProperties: false)"
+  ],
+  "expected_files": [
+    "schemas/handoff.schema.json",
+    "src/lib/schemas.ts",
+    "commands/handoff.md",
+    "benchmark/fixtures/valid-handoff.json"
+  ],
+  "assigned_commands": ["spec", "build", "audit"],
+  "estimated_complexity": "low",
+  "limitations": [
+    "Verifies schema structure and content presence, not runtime handoff behavior",
+    "Does not test actual agent-to-agent transfers (requires multi-agent execution)"
+  ]
+}