npm - @tgoodington/intuition - Versions diffs - 8.1.3 → 9.2.1 - Mend

@tgoodington/intuition 8.1.3 → 9.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

package/docs/v9/test/phase5b/decisions/5B-4-individual-decisions.json ADDED Viewed

@@ -0,0 +1,68 @@
+{
+  "specialist": "code-architect",
+  "gate_started": "2026-02-27T16:30:00Z",
+  "gate_completed": "2026-02-27T16:36:00Z",
+  "assumptions": [
+    {
+      "id": "A1",
+      "title": "Output Format Consistency",
+      "default": "Use existing 3-tier rating system (excellent_fit, acceptable_fit, poor_fit)",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A2",
+      "title": "Single-File Skill Structure",
+      "default": "Implement as a single SKILL.md file",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A3",
+      "title": "Model Selection for Execution",
+      "default": "Use sonnet as the execution model",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A4",
+      "title": "Hardware Profile Path",
+      "default": "Read hardware profile from config/hardware-profile.json",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A5",
+      "title": "Report Naming Convention",
+      "default": "model_rec_YYYY-MM-DD_[use-case-slug].md",
+      "status": "accepted",
+      "user_override": null
+    }
+  ],
+  "decisions": [
+    {
+      "id": "D1",
+      "title": "Scoring Formula Approach",
+      "context": "Need to rank 47 models against user hardware. RAM, VRAM, and context length are the key dimensions.",
+      "options": ["A: Weighted percentage — RAM 40%, VRAM 40%, context 20% (recommended)", "B: Binary pass/fail per dimension, rank by headroom", "C: Single composite ratio averaged across dimensions"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D2",
+      "title": "Use-Case Filtering Strategy",
+      "context": "Models have use-case tags (chat, code, creative, reasoning). User provides a query like 'I need a coding model'.",
+      "options": ["A: Strict tag match (recommended)", "B: Fuzzy match — tagged first, then 'might also work'"],
+      "chosen": "B",
+      "user_input": null
+    },
+    {
+      "id": "D3",
+      "title": "Top-N Presentation Count",
+      "context": "Need to decide how many models to show in the recommendation report.",
+      "options": ["A: Top 5 models (recommended)", "B: Top 3 models", "C: All models above acceptable_fit threshold"],
+      "chosen": "other",
+      "user_input": "Show top 5 but also include a 'honorable mentions' section for models that scored between acceptable_fit and the 5th-place score"
+    }
+  ]
+}

package/docs/v9/test/phase5b/decisions/5B-5-triage-decisions.json ADDED Viewed

@@ -0,0 +1,110 @@
+{
+  "specialist": "code-architect",
+  "gate_started": "2026-02-27T16:40:00Z",
+  "gate_completed": "2026-02-27T16:52:00Z",
+  "assumptions": [
+    {
+      "id": "A1",
+      "title": "Test Framework",
+      "default": "Use Jest with supertest (already in devDependencies)",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A2",
+      "title": "Single-File Skill Structure",
+      "default": "Implement as a single SKILL.md file",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A3",
+      "title": "Model Selection",
+      "default": "Use sonnet as the execution model",
+      "status": "accepted",
+      "user_override": null
+    }
+  ],
+  "decisions": [
+    {
+      "id": "D1",
+      "title": "Test Scope — Which Endpoints",
+      "context": "42 documented endpoints + ~6 undocumented admin routes. OpenAPI spec covers the 42.",
+      "options": ["A: All 42 documented endpoints (recommended)", "B: Critical paths only (~15)", "C: All 48 including undocumented admin"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D2",
+      "title": "External Service Mocking Strategy",
+      "context": "3 external dependencies: payment processor, email service, search index.",
+      "options": ["A: In-process mocks — nock/msw (recommended)", "B: Sidecar mock servers", "C: Real staging services"],
+      "chosen": "other",
+      "user_input": "Use msw for email and search, but use a real Stripe test-mode instance for payment since Stripe has a robust test API"
+    },
+    {
+      "id": "D3",
+      "title": "Database Strategy",
+      "context": "No test database seeding currently. 340 transitive deps in full tree.",
+      "options": ["A: SQLite in-memory (recommended)", "B: Dockerized test database", "C: Shared test DB with transaction rollback"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D4",
+      "title": "Auth Token Management",
+      "context": "Auth middleware uses JWT with refresh tokens. Integration tests need token management.",
+      "options": ["A: Pre-generated static tokens (recommended)", "B: Full auth flow per test", "C: Bypass auth middleware"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D5",
+      "title": "Test Organization",
+      "context": "14 route files in src/api/routes/. Need to organize test files.",
+      "options": ["A: One test file per route file — 14 files (recommended)", "B: One per endpoint — 42 files", "C: Grouped by domain — ~6 files"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D6",
+      "title": "Response Validation Depth",
+      "context": "OpenAPI 3.0 spec available for response shape validation.",
+      "options": ["A: Schema validation + key field assertions (recommended)", "B: Full deep-equal", "C: Status code + content-type only"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D7",
+      "title": "Error Case Coverage",
+      "context": "Error handling is where most integration bugs hide.",
+      "options": ["A: All documented error codes per endpoint (recommended)", "B: Common errors only (400, 401, 404, 500)", "C: Happy path only"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D8",
+      "title": "Rate Limiting Test Approach",
+      "context": "Rate limiting is per-IP in production. Needs different handling in tests.",
+      "options": ["A: Configurable rate limits in test env (recommended)", "B: Real rate limits", "C: Skip rate limit testing"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D9",
+      "title": "Test Data Seeding Strategy",
+      "context": "No existing seeding. Tests currently use mocked data stores.",
+      "options": ["A: Fixture files per test suite (recommended)", "B: Factory functions with random data", "C: Shared seed script"],
+      "chosen": "B",
+      "user_input": null
+    },
+    {
+      "id": "D10",
+      "title": "CI Integration",
+      "context": "Integration tests are slower than unit tests. Need CI strategy.",
+      "options": ["A: Separate CI job — run on PR (recommended)", "B: Combined with unit tests", "C: Manual trigger only"],
+      "chosen": "A",
+      "user_input": null
+    }
+  ]
+}

package/docs/v9/test/phase5b/decisions/5B-6-fallback-decisions.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+  "specialist": "code-architect",
+  "gate_started": "2026-02-27T16:00:00Z",
+  "gate_completed": "2026-02-27T16:03:00Z",
+  "assumptions": [],
+  "decisions": [
+    {
+      "id": "D1",
+      "title": "Scope of Audit",
+      "context": "Existing tests are unit-level only. No integration tests. Options range from vuln-only to full audit.",
+      "options": ["A: Vulnerabilities only (recommended)", "B: Vulnerabilities + license compliance", "C: Vulnerabilities + license compliance + version staleness"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D2",
+      "title": "Transitive Dependency Depth",
+      "context": "340 transitive dependencies in the tree. Direct-only covers 20 packages.",
+      "options": ["A: Full tree (recommended)", "B: Direct only"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D3",
+      "title": "Output Verbosity",
+      "context": "npm audit output is verbose and hard to read. Need to balance detail with readability.",
+      "options": ["A: Summary with expandable details (recommended)", "B: Full verbose", "C: Executive summary only"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D4",
+      "title": "Remediation Suggestions",
+      "context": "Users need actionable output. Fix commands risk suggesting breaking changes.",
+      "options": ["A: Include fix commands (recommended)", "B: Flag issues only"],
+      "chosen": "B",
+      "user_input": null
+    }
+  ]
+}

package/docs/v9/test/phase5b/decisions/5B-8-partial-decisions.json ADDED Viewed

@@ -0,0 +1,46 @@
+{
+  "specialist": "code-architect",
+  "gate_started": "2026-02-27T17:10:00Z",
+  "gate_completed": null,
+  "assumptions": [
+    {
+      "id": "A1",
+      "title": "Output Format Consistency",
+      "default": "Use existing 3-tier rating system (excellent_fit, acceptable_fit, poor_fit)",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A2",
+      "title": "Single-File Skill Structure",
+      "default": "Implement as a single SKILL.md file",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A3",
+      "title": "Model Selection for Execution",
+      "default": "Use sonnet as the execution model",
+      "status": "accepted",
+      "user_override": null
+    }
+  ],
+  "decisions": [
+    {
+      "id": "D1",
+      "title": "Scoring Formula Approach",
+      "context": "Need to rank 47 models against user hardware. RAM, VRAM, and context length are the key dimensions.",
+      "options": ["A: Weighted percentage — RAM 40%, VRAM 40%, context 20% (recommended)", "B: Binary pass/fail per dimension, rank by headroom", "C: Single composite ratio averaged across dimensions"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D2",
+      "title": "Use-Case Filtering Strategy",
+      "context": "Models have use-case tags (chat, code, creative, reasoning). User provides a query like 'I need a coding model'.",
+      "options": ["A: Strict tag match (recommended)", "B: Fuzzy match — tagged first, then 'might also work'"],
+      "chosen": "B",
+      "user_input": null
+    }
+  ]
+}

package/docs/v9/test/phase5b/decisions/5B-9-complete-decisions.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+  "specialist": "code-architect",
+  "gate_started": "2026-02-27T17:10:00Z",
+  "gate_completed": "2026-02-27T17:18:00Z",
+  "assumptions": [
+    {
+      "id": "A1",
+      "title": "Output Format Consistency",
+      "default": "Use existing 3-tier rating system (excellent_fit, acceptable_fit, poor_fit)",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A2",
+      "title": "Single-File Skill Structure",
+      "default": "Implement as a single SKILL.md file",
+      "status": "accepted",
+      "user_override": null
+    },
+    {
+      "id": "A3",
+      "title": "Model Selection for Execution",
+      "default": "Use sonnet as the execution model",
+      "status": "accepted",
+      "user_override": null
+    }
+  ],
+  "decisions": [
+    {
+      "id": "D1",
+      "title": "Scoring Formula Approach",
+      "context": "Need to rank 47 models against user hardware.",
+      "options": ["A: Weighted percentage (recommended)", "B: Binary pass/fail", "C: Single composite ratio"],
+      "chosen": "A",
+      "user_input": null
+    },
+    {
+      "id": "D2",
+      "title": "Use-Case Filtering Strategy",
+      "context": "Models have use-case tags.",
+      "options": ["A: Strict tag match (recommended)", "B: Fuzzy match"],
+      "chosen": "B",
+      "user_input": null
+    },
+    {
+      "id": "D3",
+      "title": "Top-N Presentation Count",
+      "context": "Need to decide how many models to show.",
+      "options": ["A: Top 5 (recommended)", "B: Top 3", "C: All above threshold"],
+      "chosen": "A",
+      "user_input": null
+    }
+  ]
+}

package/docs/v9/test/phase5b/scratch/code-architect-stage1.md ADDED Viewed

@@ -0,0 +1,133 @@
+# Stage 1 Exploration: Task 2 — Build the Hardware Vetter Claude Code Skill
+## Research Findings
+### Files Examined
+| File | Path | Key Data |
+|------|------|----------|
+| Model Catalog | `C:/Projects/District_AI_Agent_Implementation/docs/model_catalog.json` | Schema v1.2, 11 models, `hardware_profile`, `infrastructure_options` |
+| Pipeline Config | `C:/Projects/District_AI_Agent_Implementation/src/pipeline/config.py` | Pydantic Settings, 3 registered Ollama model IDs: `qwen2.5:14b`, `qwen2.5:7b`, `llama3.1:8b` |
+| Existing Skill | `C:/Projects/District_AI_Agent_Implementation/.claude/skills/hardware-vetter/SKILL.md` | 708-line complete skill implementation already exists |
+### Critical Finding: Skill Already Exists
+The file `.claude/skills/hardware-vetter/SKILL.md` is a **complete, 708-line implementation** — not a stub. It includes full YAML frontmatter, 8 workflow sections, detailed question flow via AskUserQuestion, GPU-primary and CPU-only analysis paths, benchmark search protocol (8-call cap), full report template, 7 error handling scenarios, and a completion message format.
+### Codebase Conventions
+- Single-file skill pattern: everything in `SKILL.md` (no reference files loaded by Claude Code)
+- Frontmatter format: `name`, `description`, `model`, `tools` fields
+- Report output directory: `docs/reports/` with naming convention `hardware_eval_YYYY-MM-DD_[slug].md`
+- Catalog field `hardware_profile.ram_gb` (not `total_ram_gb` as referenced in current skill)
+- Ollama IDs use colon format in config.py (`qwen2.5:14b`) but hyphen format as catalog keys (`qwen2.5-14b`); matching via `ollama_id` field
+### Data Field Issues Found
+1. **Field name mismatch:** Skill Section 2.1 references `hardware_profile.total_ram_gb` but catalog uses `hardware_profile.ram_gb`
+2. **Nonexistent field reference:** Skill Section 4.2a references `gpu_vram_gb_fp16` which does not exist in any model's `hardware_requirements`; only `ram_gb_fp16` exists
+3. **Unused tool:** `Glob` listed in frontmatter but never referenced in skill body
+## ECD Analysis
+### Elements
+- **Skill file:** Single `SKILL.md` at `.claude/skills/hardware-vetter/SKILL.md`
+- **Data source 1:** `docs/model_catalog.json` — 11 models, hardware profile, infrastructure options
+- **Data source 2:** `src/pipeline/config.py` — 3 registered model IDs (default, fast, chat)
+- **Output artifact:** Markdown report at `docs/reports/hardware_eval_YYYY-MM-DD_[slug].md`
+- **Model catalog schema fields per model:** `display_name`, `ollama_id`, `parameter_count`, `size_tier`, `hardware_requirements` (with `ram_gb_q4`, `ram_gb_q8`, `ram_gb_fp16`, `recommended_ram_gb`, `gpu_vram_gb_q4`, `gpu_vram_gb_q8`, `gpu_offload_support`), `feasibility`, `raw_benchmarks`, `category_scores`
+### Connections
+- Skill reads `model_catalog.json` to extract hardware profile and all 11 model entries
+- Skill reads `config.py` to extract 3 registered Ollama IDs, then matches via `ollama_id` field to catalog entries
+- AskUserQuestion collects proposed hardware changes from user (4 change types: CPU, GPU, RAM, full system)
+- Analysis engine branches on GPU presence: GPU-primary path or CPU-only path
+- WebSearch (up to 8 calls) finds published benchmarks to upgrade estimates from "Projected" to "Verified"
+- Write tool outputs the final report to `docs/reports/`
+### Dynamics
+- **Execution flow:** Data loading → question flow → analysis → benchmark search → report writing → completion message
+- **Graceful degradation:** GPU fields missing → CPU-only path; benchmarks return nothing → projected estimates; config.py unreadable → all 11 as candidates; catalog missing → STOP
+- **Unhandled edge cases:** Unified memory architectures (e.g., DGX Spark), multi-GPU configurations, adding a separate node (vs upgrading existing), models not in catalog, concurrent multi-model loading analysis
+## Assumptions
+### A1: Single-File Skill Structure
+- **Default**: Keep the entire skill as a single `SKILL.md` file with no companion files
+- **Rationale**: Claude Code only injects `SKILL.md` into context (the "Reference File Problem"). All existing Intuition skills follow this pattern. Splitting into multiple files would break skill loading.
+### A2: Fix the `total_ram_gb` Field Name Mismatch
+- **Default**: Change `total_ram_gb` to `ram_gb` in Section 2.1 to match the actual catalog field
+- **Rationale**: The catalog field is definitively `ram_gb`. The current reference is incorrect and could cause runtime confusion for the sonnet model executing the skill.
+### A3: Fix the `gpu_vram_gb_fp16` Nonexistent Field Reference
+- **Default**: Remove or correct the FP16 case in the GPU analysis path (Section 4.2a), since `gpu_vram_gb_fp16` does not exist in any model's hardware_requirements
+- **Rationale**: No model uses FP16 as its recommended quantization, and the field does not exist. The reference is dead code that could confuse the executor.
+### A4: Preserve Existing Report Format and Naming Convention
+- **Default**: Keep the existing `hardware_eval_YYYY-MM-DD_[slug].md` naming convention and report structure
+- **Rationale**: An existing report (`hardware_eval_2026-02-20_thinkstation-pgx-addition.md`) already demonstrates this format works well. Changing it would create inconsistency with prior reports.
+### A5: Match via `ollama_id` Field for Config-to-Catalog Linking
+- **Default**: Continue matching config.py model IDs to catalog entries via the `ollama_id` field (colon format)
+- **Rationale**: The skill already implements this correctly. Catalog keys use hyphen format but `ollama_id` uses colon format matching config.py exactly.
+### A6: Keep `sonnet` as the Execution Model
+- **Default**: Retain `model: sonnet` in frontmatter for skill execution
+- **Rationale**: The skill is data-reading, question-asking, and report-writing — tasks well-suited to sonnet. Opus would be overkill for the structured analysis and report generation this skill performs.
+### A7: Lightweight Schema Validation (Existence Checks Only)
+- **Default**: Validate only that `model_catalog.json` exists, is readable, and `hardware_profile` is present — no deep schema validation
+- **Rationale**: Full JSON schema validation would require code execution tools not available to the skill. The existing approach of reading data and gracefully degrading when fields are missing is the correct pattern for a Claude Code skill.
+## Key Decisions
+### D1: Scope of Changes — Fix Only vs Enhancement
+- **Options**:
+  - A) Fix data field issues only (Issues 1-3) — recommended: Minimal, low-risk changes to a working skill. Corrects the `ram_gb` mismatch, removes `gpu_vram_gb_fp16` dead reference, optionally removes unused `Glob` tool. Does not change functionality.
+  - B) Fix issues + add unified memory architecture support: Adds a sub-path in Section 4.2 for systems like DGX Spark where GPU VRAM and system RAM are unified. Medium scope increase.
+  - C) Fix issues + add unified memory + add concurrent model loading analysis: Also adds a check that sums RAM/VRAM requirements for all 3 registered models loaded simultaneously. Largest scope.
+- **Recommendation**: A, because the skill is already production-quality and complete. The acceptance criteria are already met. Scope creep into new features (unified memory, concurrent loading) should be separate tasks with their own planning.
+- **Risk if wrong**: If option A is chosen but unified memory systems are evaluated soon, the skill will produce incorrect VRAM/RAM split analysis for those architectures. However, this can be addressed in a follow-up task.
+### D2: Remove or Keep Unused `Glob` Tool in Frontmatter
+- **Options**:
+  - A) Remove `Glob` from the tools list — recommended: Reduces the tool surface to only what the skill actually uses (Read, WebSearch, AskUserQuestion, Write).
+  - B) Keep `Glob` and add a use case: Add a step to check if `docs/reports/` directory exists before writing, giving Glob a purpose.
+- **Recommendation**: A, because the Write tool will create the file regardless, and adding a directory check adds complexity for negligible benefit. Smaller tool surface means fewer tokens spent on tool descriptions.
+- **Risk if wrong**: Negligible either way. If a future skill revision needs Glob, it can be re-added.
+### D3: How to Handle the Existing Skill — Review-and-Patch vs Rewrite
+- **Options**:
+  - A) Review-and-patch — recommended: Treat the existing 708-line skill as the baseline. Apply targeted fixes (field name corrections, dead reference removal). Verify against acceptance criteria. Minimal diff.
+  - B) Rewrite from scratch: Produce a new SKILL.md from the blueprint, incorporating lessons learned but potentially losing working edge case handling.
+- **Recommendation**: A, because the existing skill handles 7 error scenarios, has a well-structured question flow, and covers all 8 acceptance criteria. A rewrite risks losing subtle handling that the existing implementation got right.
+- **Risk if wrong**: If the existing skill has deeper structural problems beyond the data field issues, patching may be insufficient. However, research found no structural issues — only data reference mismatches.
+## Risks Identified
+### Risk 1: Runtime Field Name Confusion (Low Severity)
+- **Description**: Even after fixing the `total_ram_gb` reference, the sonnet model executing the skill reads the actual JSON. If the instruction says one thing and the data says another, sonnet may adapt — but inconsistency between instruction text and data structure creates ambiguity.
+- **Mitigation**: Fix the field name references to exactly match the catalog. This eliminates the ambiguity entirely.
+### Risk 2: Future Catalog Schema Changes (Low Severity)
+- **Description**: The skill hardcodes field names from schema v1.2. If the catalog schema changes, field references will break.
+- **Mitigation**: The lightweight validation approach means the skill will gracefully degrade (missing fields trigger fallback paths). No action needed now.
+### Risk 3: Existing Skill Untested End-to-End (Medium Severity)
+- **Description**: The 708-line skill has never been run. Subtle issues in question flow branching, analysis calculations, or report formatting may exist but are invisible until runtime.
+- **Mitigation**: The code spec should include a mental walkthrough trace of at least one scenario (e.g., "Add RTX 4090 GPU") to verify the logic flow.
+## Recommended Approach
+The existing Hardware Vetter skill at `.claude/skills/hardware-vetter/SKILL.md` is a comprehensive, production-quality implementation that meets all 8 acceptance criteria. The engineering work should be a **review-and-patch** operation:
+1. Fix the `total_ram_gb` → `ram_gb` field name mismatch in Section 2.1
+2. Fix or remove the `gpu_vram_gb_fp16` nonexistent field reference in Section 4.2a
+3. Remove `Glob` from the tools list (if decided)
+4. Verify all remaining field references against the actual catalog schema
+5. Mental walkthrough of at least one complete scenario to validate logic flow
+6. No new files to create — single-file SKILL.md pattern maintained

package/docs/v9/test/phase5b/specialists/code-architect.specialist.md ADDED Viewed

@@ -0,0 +1,202 @@
+---
+name: code-architect
+display_name: Code Architect
+domain: code
+description: >
+  Analyzes software requirements, designs code architecture, and produces
+  implementation blueprints for code artifacts. Replaces the design + engineer
+  phases for code-domain tasks.
+exploration_methodology: ECD
+supported_depths: [Deep, Standard, Light]
+default_depth: Deep
+research_patterns:
+  - "Find existing code patterns and conventions in the codebase"
+  - "Locate configuration files and data schemas"
+  - "Identify integration points with existing systems"
+  - "Map dependencies between components"
+  - "Find similar implementations to follow as patterns"
+blueprint_sections:
+  - "Architecture Overview"
+  - "Data Flow"
+  - "Implementation Detail"
+  - "Error Handling"
+  - "Integration Points"
+default_producer: code-writer
+default_output_format: code
+review_criteria:
+  - "All acceptance criteria addressable from the blueprint"
+  - "No ambiguous implementation decisions left for the producer"
+  - "Error handling covers all identified edge cases"
+  - "Integration points fully specified with exact file paths and field names"
+  - "Patterns match existing codebase conventions"
+  - "Blueprint is self-contained — producer needs no external context"
+mandatory_reviewers: []
+model: opus
+reviewer_model: sonnet
+tools: [Read, Write, Glob, Grep, Task, AskUserQuestion]
+---
+# Code Architect
+## Stage 1: Exploration Protocol
+You are a code architect conducting exploration for a code implementation task. Your job is to research the project codebase, explore the problem space using ECD, and produce structured findings for the orchestrator to present to the user.
+### Research Phase
+First, read all project context files and codebase artifacts provided to you. Extract:
+- Existing code patterns and conventions
+- Data schemas and configuration structures
+- Integration points and dependencies
+- Constraints from the plan and existing architecture
+Use the research patterns above as guides — search for relevant files using Glob and Grep, read key files to understand patterns.
+### ECD Exploration
+**Elements (E)** — What are the building blocks?
+- What files/modules need to be created or modified?
+- What data structures are involved?
+- What interfaces exist between components?
+- What configuration or schema requirements apply?
+- What external dependencies are needed?
+**Connections (C)** — How do they relate?
+- How does data flow between components?
+- What reads from what? What writes to what?
+- How does this code interact with existing systems?
+- What shared resources need coordination?
+**Dynamics (D)** — How do they work/change over time?
+- What is the execution flow (step by step)?
+- What triggers each behavior?
+- What are the error/edge cases?
+- How does the system degrade gracefully?
+- What happens under different input scenarios?
+### Assumptions vs Key Decisions Classification
+After your ECD exploration, you MUST classify every architectural item into one of two categories:
+**Assumptions** — Items where there is a clear best practice, an obvious default, or only one reasonable approach given the codebase context. These are things you would do without asking. Examples:
+- Following an existing naming convention in the codebase
+- Using the same error handling pattern as adjacent code
+- Matching an established data format or schema
+- Using the project's existing dependency/library for a task
+**Key Decisions** — Items where multiple valid approaches exist and the choice meaningfully affects the outcome. These require user input. Examples:
+- Choosing between two viable architectures with different trade-offs
+- Deciding scope boundaries (include feature X or defer it?)
+- Selecting a strategy when the codebase has no established precedent
+- Trade-offs between correctness, performance, and complexity
+**Classification rule:** If you are uncertain whether something is an assumption or a decision, classify it as a **Key Decision**. It is better to ask unnecessarily than to assume incorrectly.
+### Output Format — FORMAT COMPLIANCE IS MANDATORY
+Write your findings to the specified stage1.md path. You MUST use exactly the heading levels and field labels specified below. Do not restructure, rename, or nest differently. The foreground skill parses stage1.md by these exact headings — creative reformatting will break the gate.
+```markdown
+# Stage 1 Exploration: [Task Title]
+## Research Findings
+[Facts from codebase research — file paths, schemas, patterns, constraints]
+## ECD Analysis
+### Elements
+[Components, files, data structures identified]
+### Connections
+[Data flows, integration points, dependencies mapped]
+### Dynamics
+[Execution flows, edge cases, error scenarios identified]
+## Assumptions
+### A1: [Title]
+- **Default**: [what you will do]
+- **Rationale**: [why this is the obvious choice]
+### A2: [Title]
+- **Default**: [what you will do]
+- **Rationale**: [why this is the obvious choice]
+## Key Decisions
+### D1: [Title]
+- **Options**:
+  - A) [option — recommended]: [rationale]
+  - B) [option]: [rationale]
+  - C) [option]: [rationale]
+- **Recommendation**: A, because [reason]
+- **Risk if wrong**: [what happens if this decision is wrong]
+### D2: [Title]
+- **Options**:
+  - A) [option — recommended]: [rationale]
+  - B) [option]: [rationale]
+- **Recommendation**: A, because [reason]
+- **Risk if wrong**: [what happens if this decision is wrong]
+## Risks Identified
+[Each risk with severity and mitigation]
+## Recommended Approach
+[Overall recommendation summarizing the proposed direction]
+```
+For Standard depth: abbreviate to Research Findings + Recommended Approach + Assumptions + 1-2 Key Decisions only.
+For Light depth: Research Findings + Proposed Approach only (no decisions — proceed autonomously).
+## Stage 2: Specification Protocol
+You are a code architect producing a detailed blueprint from approved exploration findings.
+You will receive:
+1. Your Stage 1 findings (the exploration you conducted)
+2. The user's decisions on each key question
+Produce the full blueprint in the universal envelope format with these 9 sections:
+1. **Task Reference** — plan task numbers, acceptance criteria, dependencies
+2. **Research Findings** — from your Stage 1 codebase research (file paths, patterns, schemas)
+3. **Approach** — the approved direction (incorporating user decisions)
+4. **Decisions Made** — every decision with alternatives considered and user's choice
+5. **Deliverable Specification** — the detailed implementation specification. This must contain enough detail that a code-writer producer can implement without making any architectural or design decisions. Include:
+   - Exact file paths to create/modify
+   - Complete data structures with field names and types
+   - Full algorithm/logic specifications with formulas and thresholds
+   - All error handling cases with exact behaviors
+   - Worked examples for complex calculations
+   - UI/interaction specifications (question flows, output formats)
+   - Configuration values and constants
+   - Template structures for generated outputs
+   - Pattern references from existing codebase
+6. **Acceptance Mapping** — how each plan acceptance criterion is addressed
+7. **Integration Points** — exact file paths, field names, and data formats for all integrations
+8. **Open Items** — must be empty or contain only [VERIFY]/execution-time items
+9. **Producer Handoff** — output format, producer name, filename, content blocks in order, target line count, instruction tone guidance
+Write the completed blueprint to the specified blueprint path.
+## Review Protocol
+You are reviewing code produced from a blueprint you authored. Your job is to FIND PROBLEMS, not approve.
+Check each review criterion against the produced deliverable:
+1. Read the blueprint to understand what was specified
+2. Read the produced code/artifact
+3. For each criterion: PASS or FAIL with specific evidence
+4. Flag any invented functionality (present in code but not in blueprint)
+5. Flag any omitted functionality (in blueprint but missing from code)
+6. Flag any architectural decisions the producer made that should have been in the blueprint
+7. Verify error handling covers all specified cases
+8. Verify integration points match exact specifications
+Return: PASS (all criteria met) or FAIL (with specific issues and remediation guidance)