npm - opencode-hive - Versions diffs - 1.1.0 → 1.2.0 - Mend

opencode-hive 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/agents/architect.d.ts +1 -1
package/dist/agents/forager.d.ts +1 -1
package/dist/agents/hive.d.ts +1 -1
package/dist/agents/hygienic.d.ts +1 -1
package/dist/agents/scout.d.ts +1 -1
package/dist/agents/swarm.d.ts +1 -1
package/dist/index.js +1189 -168
package/dist/skills/registry.generated.d.ts +1 -1
package/package.json +1 -1
package/skills/agents-md-mastery/SKILL.md +253 -0
package/skills/docker-mastery/SKILL.md +346 -0
package/skills/executing-plans/SKILL.md +2 -2
package/skills/test-driven-development/SKILL.md +1 -1
package/skills/writing-plans/SKILL.md +7 -0
package/skills/onboarding/SKILL.md +0 -61

package/dist/index.js CHANGED Viewed

@@ -12,7 +12,7 @@ var __export = (target, all) => {
 var __require = /* @__PURE__ */ createRequire(import.meta.url);
 // src/index.ts
-import * as path7 from "path";
+import * as path8 from "path";
 import * as os from "os";
 // ../../node_modules/zod/v4/classic/external.js
@@ -12336,8 +12336,260 @@ function tool(input) {
 }
 tool.schema = exports_external;
 // src/skills/registry.generated.ts
-var BUILTIN_SKILL_NAMES = ["brainstorming", "code-reviewer", "dispatching-parallel-agents", "executing-plans", "onboarding", "parallel-exploration", "systematic-debugging", "test-driven-development", "verification-before-completion", "writing-plans"];
+var BUILTIN_SKILL_NAMES = ["agents-md-mastery", "brainstorming", "code-reviewer", "dispatching-parallel-agents", "docker-mastery", "executing-plans", "parallel-exploration", "systematic-debugging", "test-driven-development", "verification-before-completion", "writing-plans"];
 var BUILTIN_SKILLS = [
+  {
+    name: "agents-md-mastery",
+    description: "Use when bootstrapping, updating, or reviewing AGENTS.md — teaches what makes effective agent memory, how to structure sections, signal vs noise filtering, and when to prune stale entries",
+    template: `# AGENTS.md Mastery
+## Overview
+**AGENTS.md is pseudo-memory loaded at session start.** Every line shapes agent behavior for the entire session. Quality beats quantity. Write for agents, not humans.
+Unlike code comments or READMEs, AGENTS.md entries persist across all agent sessions. A bad entry misleads agents hundreds of times. A missing entry causes the same mistake repeatedly.
+**Core principle:** Optimize for agent comprehension and behavioral change, not human readability.
+## The Iron Law
+\`\`\`
+EVERY ENTRY MUST CHANGE AGENT BEHAVIOR
+\`\`\`
+If an entry doesn't:
+- Prevent a specific mistake
+- Enable a capability the agent would otherwise miss
+- Override a default assumption that breaks in this codebase
+...then it doesn't belong in AGENTS.md.
+**Test:** Would a fresh agent session make a mistake without this entry? If no → noise.
+## When to Use
+| Trigger | Action |
+|---------|--------|
+| New project bootstrap | Write initial AGENTS.md with build/test/style basics |
+| Feature completion | Sync new learnings via \`hive_agents_md\` tool |
+| Periodic review | Audit for stale/redundant entries (quarterly) |
+| Quality issues | Agent repeating mistakes? Check if AGENTS.md has the fix |
+## What Makes Good Agent Memory
+### Signal Entries (Keep)
+✅ **Project-specific conventions:**
+- "We use Zustand, not Redux — never add Redux"
+- "Auth lives in \`/lib/auth\` — never create auth elsewhere"
+- "Run \`bun test\` not \`npm test\` (we don't use npm)"
+✅ **Non-obvious patterns:**
+- "Use \`.js\` extension for local imports (ESM requirement)"
+- "Worktrees don't share \`node_modules\` — run \`bun install\` in each"
+- "SandboxConfig is in \`dockerSandboxService.ts\`, NOT \`types.ts\`"
+✅ **Gotchas that break builds:**
+- "Never use \`ensureDirSync\` — doesn't exist. Use \`ensureDir\` (sync despite name)"
+- "Import from \`../utils/paths.js\` not \`./paths\` (ESM strict)"
+### Noise Entries (Remove)
+❌ **Agent already knows:**
+- "This project uses TypeScript" (agent detects from files)
+- "We follow semantic versioning" (universal convention)
+- "Use descriptive variable names" (generic advice)
+❌ **Irrelevant metadata:**
+- "Created on January 2024"
+- "Originally written by X"
+- "License: MIT" (in LICENSE file already)
+❌ **Describes what code does:**
+- "FeatureService manages features" (agent can read code)
+- "The system uses git worktrees" (observable from commands)
+### Rule of Thumb
+**Signal:** Changes how agent acts
+**Noise:** Documents what agent observes
+## Section Structure for Fast Comprehension
+Agents read AGENTS.md top-to-bottom once at session start. Put high-value info first:
+\`\`\`markdown
+# Project Name
+## Build & Test Commands
+# ← Agents need this IMMEDIATELY
+bun run build
+bun run test
+bun run release:check
+## Code Style
+# ← Prevents syntax/import errors
+- Semicolons: Yes
+- Quotes: Single
+- Imports: Use \`.js\` extension
+## Architecture
+# ← Key directories, where things live
+packages/
+├── hive-core/      # Shared logic
+├── opencode-hive/  # Plugin
+└── vscode-hive/    # Extension
+## Important Patterns
+# ← How to do common tasks correctly
+Use \`readText\` from paths.ts, not fs.readFileSync
+## Gotchas & Anti-Patterns
+# ← Things that break or mislead
+NEVER use \`ensureDirSync\` — doesn't exist
+\`\`\`
+**Keep total under 500 lines.** Beyond that, agents lose focus and miss critical entries.
+## The Sync Workflow
+After completing a feature, sync learnings to AGENTS.md:
+1. **Trigger sync:**
+   \`\`\`typescript
+   hive_agents_md({ action: 'sync', feature: 'feature-name' })
+   \`\`\`
+2. **Review each proposal:**
+   - Read the proposed change
+   - Ask: "Does this change agent behavior?"
+   - Check: Is this already obvious from code/files?
+3. **Accept signal, reject noise:**
+   - ❌ "TypeScript is used" → Agent detects this
+   - ✅ "Use \`.js\` extension for imports" → Prevents build failures
+4. **Apply approved changes:**
+   \`\`\`typescript
+   hive_agents_md({ action: 'apply' })
+   \`\`\`
+**Warning:** Don't auto-approve all proposals. One bad entry pollutes all future sessions.
+## When to Prune
+Remove entries when they become:
+**Outdated:**
+- "We use Redux" → Project migrated to Zustand
+- "Node 16 compatibility required" → Now on Node 22
+**Redundant:**
+- "Use single quotes" + "Strings use single quotes" → Keep one
+- Near-duplicates in different sections
+**Too generic:**
+- "Write clear code" → Applies to any project
+- "Test your changes" → Universal advice
+**Describing code:**
+- "TaskService manages tasks" → Agent can read \`TaskService\` class
+- "Worktrees are in \`.hive/.worktrees/\`" → Observable from filesystem
+**Proven unnecessary:**
+- Entry added 6 months ago, but agents haven't hit that issue since
+## Red Flags
+| Warning Sign | Why It's Bad | Fix |
+|-------------|-------------|-----|
+| AGENTS.md > 800 lines | Agents lose focus, miss critical info | Prune aggressively |
+| Describes what code does | Agent can read code | Remove descriptions |
+| Missing build/test commands | First thing agents need | Add at top |
+| No gotchas section | Agents repeat past mistakes | Document failure modes |
+| Generic best practices | Doesn't change behavior | Remove or make specific |
+| Outdated patterns | Misleads agents | Prune during sync |
+## Anti-Patterns
+| Anti-Pattern | Better Approach |
+|-------------|----------------|
+| "Document everything" | Document only what changes behavior |
+| "Keep for historical record" | Version control is history |
+| "Might be useful someday" | Add when proven necessary |
+| "Explains the system" | Agents read code for that |
+| "Comprehensive reference" | AGENTS.md is a filter, not docs |
+## Good Examples
+**Build Commands (High value, agents need immediately):**
+\`\`\`markdown
+## Build & Test Commands
+bun run build              # Build all packages
+bun run test               # Run all tests
+bun run release:check      # Full CI check
+\`\`\`
+**Project-Specific Convention (Prevents mistakes):**
+\`\`\`markdown
+## Code Style
+- Imports: Use \`.js\` extension for local imports (ESM requirement)
+- Paths: Import from \`../utils/paths.js\` never \`./paths\`
+\`\`\`
+**Non-Obvious Gotcha (Prevents build failure):**
+\`\`\`markdown
+## Important Patterns
+Use \`ensureDir\` from paths.ts — sync despite name
+NEVER use \`ensureDirSync\` (doesn't exist)
+\`\`\`
+## Bad Examples
+**Generic advice (agent already knows):**
+\`\`\`markdown
+## Best Practices
+- Use meaningful variable names
+- Write unit tests
+- Follow DRY principle
+\`\`\`
+**Describes code (agent can read it):**
+\`\`\`markdown
+## Architecture
+The FeatureService class manages features. It has methods
+for create, read, update, and delete operations.
+\`\`\`
+**Irrelevant metadata:**
+\`\`\`markdown
+## Project History
+Created in January 2024 by the platform team.
+Originally built for internal use.
+\`\`\`
+## Verification
+Before finalizing AGENTS.md updates:
+- [ ] Every entry answers: "What mistake does this prevent?"
+- [ ] No generic advice that applies to all projects
+- [ ] Build/test commands are first
+- [ ] Gotchas section exists and is populated
+- [ ] Total length under 500 lines (800 absolute max)
+- [ ] No entries describing what code does
+- [ ] Fresh agent session would benefit from each entry
+## Summary
+AGENTS.md is **behavioral memory**, not documentation:
+- Write for agents, optimize for behavior change
+- Signal = prevents mistakes, Noise = describes observables
+- Sync after features, prune quarterly
+- Test: Would agent make a mistake without this entry?
+**Quality > quantity. Every line counts.**`
+  },
   {
     name: "brainstorming",
     description: "Use before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation.",
@@ -12795,6 +13047,351 @@ From debugging session (2025-10-03):
 - All investigations completed concurrently
 - All fixes integrated successfully
 - Zero conflicts between agent changes`
+  },
+  {
+    name: "docker-mastery",
+    description: "Use when working with Docker containers — debugging container failures, writing Dockerfiles, docker-compose for integration tests, image optimization, or deploying containerized applications",
+    template: `# Docker Mastery
+## Overview
+Docker is a **platform for building, shipping, and running applications**, not just isolation.
+Agents should think in containers: reproducible environments, declarative dependencies, isolated execution.
+**Core principle:** Containers are not virtual machines. They share the kernel but isolate processes, filesystems, and networks.
+**Violating the letter of these guidelines is violating the spirit of containerization.**
+## The Iron Law
+\`\`\`
+UNDERSTAND THE CONTAINER BEFORE DEBUGGING INSIDE IT
+\`\`\`
+Before exec'ing into a container or adding debug commands:
+1. Check the image (what's installed?)
+2. Check mounts (what host files are visible?)
+3. Check environment variables (what config is passed?)
+4. Check the Dockerfile (how was it built?)
+Random debugging inside containers wastes time. Context first, then debug.
+## When to Use
+Use this skill when working with:
+- **Container build failures** - Dockerfile errors, missing dependencies
+- **Test environment setup** - Reproducible test environments across machines
+- **Integration test orchestration** - Multi-service setups (DB + API + tests)
+- **Dockerfile authoring** - Writing efficient, maintainable Dockerfiles
+- **Image size optimization** - Reducing image size, layer caching
+- **Deployment** - Containerized application deployment
+- **Sandbox debugging** - Issues with Hive's Docker sandbox mode
+**Use this ESPECIALLY when:**
+- Tests pass locally but fail in CI (environment mismatch)
+- "Works on my machine" problems
+- Need to test against specific dependency versions
+- Multiple services must coordinate (database + API)
+- Building for production deployment
+## Core Concepts
+### Images vs Containers
+- **Image**: Read-only template (built from Dockerfile)
+- **Container**: Running instance of an image (ephemeral by default)
+\`\`\`bash
+# Build once
+docker build -t myapp:latest .
+# Run many times
+docker run --rm myapp:latest
+docker run --rm -e DEBUG=true myapp:latest
+\`\`\`
+**Key insight:** Changes inside containers are lost unless committed or volumes are used.
+### Volumes & Mounts
+Mount host directories into containers for persistence and code sharing:
+\`\`\`bash
+# Mount current directory to /app in container
+docker run -v $(pwd):/app myapp:latest
+# Hive worktrees are mounted automatically
+# Your code edits (via Read/Write/Edit tools) affect the host
+# Container sees the same files at runtime
+\`\`\`
+**How Hive uses this:** Worktree is mounted into container, so file tools work on host, bash commands run in container.
+### Multi-Stage Builds
+Minimize image size by using multiple FROM statements:
+\`\`\`dockerfile
+# Build stage (large, has compilers)
+FROM node:22 AS builder
+WORKDIR /app
+COPY package.json bun.lockb ./
+RUN bun install
+COPY . .
+RUN bun run build
+# Runtime stage (small, production only)
+FROM node:22-slim
+WORKDIR /app
+COPY --from=builder /app/dist ./dist
+COPY --from=builder /app/node_modules ./node_modules
+CMD ["node", "dist/index.js"]
+\`\`\`
+**Result:** Builder tools (TypeScript, bundlers) not included in final image.
+### Docker Compose for Multi-Service Setups
+Define multiple services in \`docker-compose.yml\`:
+\`\`\`yaml
+version: '3.8'
+services:
+  db:
+    image: postgres:15
+    environment:
+      POSTGRES_PASSWORD: testpass
+    ports:
+      - "5432:5432"
+  api:
+    build: .
+    environment:
+      DATABASE_URL: postgres://db:5432/testdb
+    depends_on:
+      - db
+    ports:
+      - "3000:3000"
+\`\`\`
+Run with: \`docker-compose up -d\`
+Teardown with: \`docker-compose down\`
+### Network Modes
+- **bridge** (default): Isolated network, containers can talk to each other by name
+- **host**: Container uses host's network directly (no isolation)
+- **none**: No network access
+**When to use host mode:** Debugging network issues, accessing host services directly.
+## Common Patterns
+### Debug a Failing Container
+**Problem:** Container exits immediately, logs unclear.
+**Pattern:**
+1. Run interactively with shell:
+   \`\`\`bash
+   docker run -it --entrypoint sh myapp:latest
+   \`\`\`
+2. Inspect filesystem, check if dependencies exist:
+   \`\`\`bash
+   ls /app
+   which node
+   cat /etc/os-release
+   \`\`\`
+3. Run command manually to see full error:
+   \`\`\`bash
+   node dist/index.js
+   \`\`\`
+### Integration Tests with Docker Compose
+**Pattern:**
+1. Define services in \`docker-compose.test.yml\`
+2. Add wait logic (wait for DB to be ready)
+3. Run tests
+4. Teardown
+\`\`\`yaml
+# docker-compose.test.yml
+services:
+  db:
+    image: postgres:15
+    environment:
+      POSTGRES_PASSWORD: test
+  test:
+    build: .
+    command: bun run test:integration
+    depends_on:
+      - db
+    environment:
+      DATABASE_URL: postgres://postgres:test@db:5432/testdb
+\`\`\`
+\`\`\`bash
+docker-compose -f docker-compose.test.yml up --abort-on-container-exit
+docker-compose -f docker-compose.test.yml down
+\`\`\`
+### Optimize Dockerfile
+**Anti-pattern:**
+\`\`\`dockerfile
+FROM node:22
+WORKDIR /app
+COPY . .              # Copies everything (including node_modules, .git)
+RUN bun install       # Invalidates cache on any file change
+CMD ["bun", "run", "start"]
+\`\`\`
+**Optimized:**
+\`\`\`dockerfile
+FROM node:22-slim     # Use slim variant
+WORKDIR /app
+# Copy dependency files first (cache layer)
+COPY package.json bun.lockb ./
+RUN bun install --production
+# Copy source code (changes frequently)
+COPY src ./src
+COPY tsconfig.json ./
+CMD ["bun", "run", "start"]
+\`\`\`
+**Add \`.dockerignore\`:**
+\`\`\`
+node_modules
+.git
+.env
+*.log
+dist
+.DS_Store
+\`\`\`
+### Handle Missing Dependencies
+**Problem:** Command fails with "not found" in container.
+**Pattern:**
+1. Check if dependency is in image:
+   \`\`\`bash
+   docker run -it myapp:latest which git
+   \`\`\`
+2. If missing, add to Dockerfile:
+   \`\`\`dockerfile
+   RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+   \`\`\`
+3. Or use a richer base image (e.g., \`node:22\` instead of \`node:22-slim\`).
+## Hive Sandbox Integration
+### How Hive Wraps Commands
+When sandbox mode is active (\`sandbox: 'docker'\` in config):
+1. Hive hook intercepts bash commands before execution
+2. Wraps with \`docker run --rm -v <worktree>:/workspace -w /workspace <image> sh -c "<command>"\`
+3. Command runs in container, but file edits (Read/Write/Edit) still affect host
+**Workers are unaware** — they issue normal bash commands, Hive handles containerization.
+### When Host Access is Needed
+Some operations MUST run on host:
+- **Git operations** (commit, push, branch) — repo state is on host
+- **Host-level tools** (Docker itself, system config)
+- **Cross-worktree operations** (accessing main repo from worktree)
+**Pattern:** Use \`HOST:\` prefix to escape sandbox:
+\`\`\`bash
+HOST: git status
+HOST: docker ps
+\`\`\`
+**If you need host access frequently:** Report as blocked and ask user if sandbox should be disabled for this task.
+### Persistent vs Ephemeral Containers
+**Current (v1.2.0):** Each command runs \`docker run --rm\` (ephemeral). State does NOT persist.
+Example: \`npm install lodash\` in one command → not available in next command.
+**Workaround:** Install dependencies in Dockerfile, not at runtime.
+**Future:** \`docker exec\` will reuse containers, persisting state across commands.
+### Auto-Detected Images
+Hive detects runtime from project files:
+- \`package.json\` → \`node:22-slim\`
+- \`requirements.txt\` / \`pyproject.toml\` → \`python:3.12-slim\`
+- \`go.mod\` → \`golang:1.22-slim\`
+- \`Cargo.toml\` → \`rust:1.77-slim\`
+- \`Dockerfile\` → Builds from project Dockerfile
+- Fallback → \`ubuntu:24.04\`
+**Override:** Set \`dockerImage\` in config (\`~/.config/opencode/agent_hive.json\`).
+## Red Flags - STOP
+If you catch yourself:
+- Installing packages on host instead of in Dockerfile
+- Running \`docker build\` without \`.dockerignore\` (cache invalidation)
+- Using \`latest\` tag in production (non-reproducible)
+- Ignoring container exit codes (hides failures)
+- Assuming state persists between \`docker run --rm\` commands
+- Using absolute host paths in Dockerfile (not portable)
+- Copying secrets into image layers (leaks credentials)
+**ALL of these mean: STOP. Review pattern.**
+## Anti-Patterns
+| Excuse | Reality |
+|--------|---------|
+| "I'll just run it on host" | Container mismatch bugs are worse to debug later. Build happens in container anyway. |
+| "Works in my container, don't need CI" | CI uses different cache state. Always test in CI-like environment. |
+| "I'll optimize the Dockerfile later" | Later never comes. Large images slow down deployments now. |
+| "latest tag is fine for dev" | Dev should match prod. Pin versions or face surprises. |
+| "Don't need .dockerignore, COPY is fast" | Invalidates cache on every file change. Wastes minutes per build. |
+| "Install at runtime, not in image" | Ephemeral containers lose state. Slows down every command. |
+| "Skip depends_on, services start fast" | Race conditions in integration tests. Use wait-for-it or health checks. |
+## Verification Before Completion
+Before marking Docker work complete:
+- [ ] Container runs successfully: \`docker run --rm <image> <command>\` exits 0
+- [ ] Tests pass inside container (not just on host)
+- [ ] No host pollution (dependencies installed in container, not host)
+- [ ] \`.dockerignore\` exists if using \`COPY . .\`
+- [ ] Image tags are pinned (not \`latest\`) for production
+- [ ] Multi-stage build used if applicable (separate build/runtime)
+- [ ] Integration tests teardown properly (\`docker-compose down\`)
+**If any fail:** Don't claim success. Fix or report blocker.
+## Quick Reference
+| Task | Command Pattern |
+|------|----------------|
+| **Debug container** | \`docker run -it --entrypoint sh <image>\` |
+| **Run with mounts** | \`docker run -v $(pwd):/app <image>\` |
+| **Multi-service tests** | \`docker-compose up --abort-on-container-exit\` |
+| **Check image contents** | \`docker run --rm <image> ls /app\` |
+| **Optimize build** | Add \`.dockerignore\`, use multi-stage, pin versions |
+| **Escape Hive sandbox** | Prefix with \`HOST:\` (e.g., \`HOST: git status\`) |
+## Related Skills
+- **hive_skill:systematic-debugging** - When container behavior is unexpected
+- **hive_skill:test-driven-development** - Write tests that run in containers
+- **hive_skill:verification-before-completion** - Verify tests pass in container before claiming done`
   },
   {
     name: "executing-plans",
@@ -12857,8 +13454,8 @@ Based on feedback:
 ### Step 6: Complete Development
 After all tasks complete and verified:
-- Announce: "I'm using the finishing-a-development-branch skill to complete this work."
-- **REQUIRED SUB-SKILL:** Use hive_skill:finishing-a-development-branch
+- Announce: "I'm using the verification-before-completion skill to complete this work."
+- **REQUIRED SUB-SKILL:** Use hive_skill:verification-before-completion
 - Follow that skill to verify tests, present options, execute choice
 ## When to Stop and Ask for Help
@@ -12886,66 +13483,6 @@ After all tasks complete and verified:
 - Reference skills when plan says to
 - Between batches: just report and wait
 - Stop when blocked, don't guess`
-  },
-  {
-    name: "onboarding",
-    description: "Ask about workflow preferences and store them in .hive/contexts/preferences.md before proceeding.",
-    template: `# Onboarding Preferences
-## Overview
-Gather workflow preferences so the assistant can match the user's desired working style.
-## When to Ask
-- **Immediately when the skill is loaded**, before any other work.
-- If \`.hive/contexts/preferences.md\` does not exist, start onboarding.
-- If later a decision is ambiguous and preferences are missing, ask again.
-## Preference Storage
-Use \`hive_context_write\` to write \`.hive/contexts/preferences.md\` with this exact template:
-\`\`\`
-# Preferences
-## Exploration Style
-sync
-## Research Depth
-medium
-## Confirmation Level
-standard
-## Commit Behavior
-ask-before-commit
-\`\`\`
-## If Preferences Already Exist
-Follow the same pattern used in \`packages/vscode-hive/src/tools/plan.ts\`:
-1. Use \`contextService.list(feature)\` to detect existing contexts.
-2. Ask **"Preferences already exist. Keep or overwrite?"** using the \`question()\` tool.
-3. If keep → continue using existing preferences.
-4. If overwrite → collect new answers and write them with \`hive_context_write\`.
-## Questions to Ask (Always use \`question()\`)
-Ask one at a time, with the provided options. Store the answers in \`.hive/contexts/preferences.md\`.
-1. **Exploration Style:** sync | async
-2. **Research Depth:** shallow | medium | deep
-3. **Confirmation Level:** minimal | standard | high
-4. **Commit Behavior:** ask-before-commit | auto-commit | never-commit
-## Requirements
-- Use the \`question()\` tool (no plain text questions).
-- Ask immediately when the skill loads if preferences are missing.
-- If later a decision is ambiguous and preferences are missing, ask again.
-- Always store answers using \`hive_context_write\` with the template above.`
   },
   {
     name: "parallel-exploration",
@@ -13837,7 +14374,7 @@ Never fix bugs without a test.
 ## Testing Anti-Patterns
-When adding mocks or test utilities, read @testing-anti-patterns.md to avoid common pitfalls:
+When adding mocks or test utilities, avoid common pitfalls:
 - Testing mock behavior instead of real behavior
 - Adding test-only methods to production classes
 - Mocking without understanding dependencies
@@ -14100,6 +14637,12 @@ Always include **Depends on** for each task. Use \`none\` to enable parallel sta
 **Verify**:
 - [ ] Run: \`{command}\` → {expected}
 - [ ] {Additional acceptance criteria}
+All verification MUST be agent-executable (no human intervention):
+✅ \`bun test\` → all pass
+✅ \`curl -X POST /api/x\` → 201
+❌ "User manually tests..."
+❌ "Visually confirm..."
 \`\`\`\`
 ## Remember
@@ -14108,6 +14651,7 @@ Always include **Depends on** for each task. Use \`none\` to enable parallel sta
 - Exact commands with expected output
 - Reference relevant skills with @ syntax
 - DRY, YAGNI, TDD, frequent commits
+- All acceptance criteria must be agent-executable (zero human intervention)
 ## Execution Handoff
@@ -14317,6 +14861,19 @@ Before major transitions, verify:
 - [ ] Scope defined?
 - [ ] No critical ambiguities?
+### Turn Termination
+Valid endings:
+- Ask a concrete question
+- Update draft + ask a concrete question
+- Explicitly state you are waiting on background work (tool/task)
+- Auto-transition to the next required action
+NEVER end with:
+- "Let me know if you have questions"
+- Summary without a follow-up action
+- "When you're ready..."
 ### Loading Skills (On-Demand)
 Load when detailed guidance needed:
@@ -14325,6 +14882,11 @@ Load when detailed guidance needed:
 - \`hive_skill("dispatching-parallel-agents")\` - parallel task delegation
 - \`hive_skill("parallel-exploration")\` - parallel read-only research via task() (Scout fan-out)
 - \`hive_skill("executing-plans")\` - step-by-step plan execution
+- \`hive_skill("systematic-debugging")\` - encountering bugs, test failures, or unexpected behavior
+- \`hive_skill("test-driven-development")\` - implementing features with TDD approach
+- \`hive_skill("verification-before-completion")\` - before claiming work is complete or creating PRs
+- \`hive_skill("docker-mastery")\` - working with Docker containers, debugging, docker-compose
+- \`hive_skill("agents-md-mastery")\` - bootstrapping/updating AGENTS.md, quality review
 Load ONE skill at a time. Only when you need guidance beyond this prompt.
@@ -14445,6 +15007,17 @@ After completing and merging a batch:
 2. If yes, run \`task({ subagent_type: "hygienic", prompt: "Review implementation changes from the latest batch." })\`.
 3. Apply feedback before starting the next batch.
+### AGENTS.md Maintenance
+After feature completion (all tasks merged):
+1. Sync context findings to AGENTS.md: \`hive_agents_md({ action: "sync", feature: "feature-name" })\`
+2. Review the proposed diff with the user
+3. Apply approved changes to keep AGENTS.md current
+For projects without AGENTS.md:
+- Bootstrap with \`hive_agents_md({ action: "init" })\`
+- Generates initial documentation from codebase analysis
 ### Orchestration Iron Laws
 - Delegate by default
@@ -14462,11 +15035,19 @@ After completing and merging a batch:
 - Ask user before consulting Hygienic (Consultant/Reviewer/Debugger)
 - Load skills on-demand, one at a time
-**Never:**
+### Hard Blocks
+NEVER violate:
 - Skip phase detection
 - Mix planning and orchestration in same action
 - Auto-load all skills at start
+### Anti-Patterns
+BLOCKING violations:
+- Ending a turn without a next action
+- Asking for user input in plain text instead of question()
 **User Input:** ALWAYS use \`question()\` tool for any user input - NEVER ask questions via plain text. This ensures structured responses.
 `;
@@ -14477,25 +15058,38 @@ PLANNER, NOT IMPLEMENTER. "Do X" means "create plan for X".
 ## Intent Classification (First)
-| Intent | Signals | Action |
-|--------|---------|--------|
-| Trivial | Single file, <10 lines | Do directly. No plan needed. |
-| Simple | 1-2 files, <30 min | Light interview → quick plan |
-| Complex | 3+ files, review needed | Full discovery → detailed plan |
-| Refactor | Existing code changes | Safety: tests, rollback, blast radius |
-| Greenfield | New feature | Research patterns BEFORE asking. Delegate to Scout via \`task({ subagent_type: "scout-researcher", prompt: "..." })\` for single investigations. |
+| Intent | Signals | Strategy | Action |
+|--------|---------|----------|--------|
+| Trivial | Single file, <10 lines | N/A | Do directly. No plan needed. |
+| Simple | 1-2 files, <30 min | Quick assessment | Light interview → quick plan |
+| Complex | 3+ files, review needed | Full discovery | Full discovery → detailed plan |
+| Refactor | Existing code changes | Safety-first: behavior preservation | Tests → blast radius → plan |
+| Greenfield | New feature | Discovery-first: explore before asking | Research → interview → plan |
+| Architecture | Cross-cutting, multi-system | Strategic: consult Scout | Deep research → plan |
 During Planning, use \`task({ subagent_type: "scout-researcher", ... })\` for exploration (BLOCKING — returns when done). For parallel exploration, issue multiple \`task()\` calls in the same message.
 ## Self-Clearance Check (After Every Exchange)
-□ Core objective clear?
-□ Scope defined (IN/OUT)?
-□ No critical ambiguities?
-□ Approach decided?
+□ Core objective clearly defined?
+□ Scope boundaries established (IN/OUT)?
+□ No critical ambiguities remaining?
+□ Technical approach decided?
+□ Test strategy confirmed (TDD/tests-after/none)?
+□ No blocking questions outstanding?
+ALL YES → Announce "Requirements clear. Generating plan." → Write plan
+ANY NO → Ask the specific unclear thing
+## Test Strategy (Ask Before Planning)
-ALL YES → Write plan
-ANY NO → Ask the unclear thing
+For Build and Refactor intents, ASK:
+"Should this include automated tests?"
+- TDD: Red-Green-Refactor per task
+- Tests after: Add test tasks after implementation
+- None: No unit/integration tests
+Record decision in draft. Embed in plan tasks.
 ## AI-Slop Flags
@@ -14515,6 +15109,18 @@ ANY NO → Ask the unclear thing
 | MINOR | FIX silently, note in summary |
 | AMBIGUOUS | Apply default, DISCLOSE in summary |
+## Turn Termination
+Valid endings:
+- Question to user (via question() tool)
+- Draft update + next question
+- Auto-transition to plan generation
+NEVER end with:
+- "Let me know if you have questions"
+- Summary without follow-up action
+- "When you're ready..."
 ## Draft as Working Memory
 Create draft on first exchange. Update after EVERY user response:
@@ -14631,11 +15237,13 @@ hive_worktree_create({ task: "01-task-name" })
 - Call \`hive_status()\` immediately after to check new state and find next runnable tasks
 - For parallel fan-out, issue multiple \`task()\` calls in the same message
-## After Delegation - ALWAYS VERIFY
+## After Delegation - VERIFY
+After every delegation, check:
 - Does it work as expected?
-- Followed existing codebase pattern?
-- Followed MUST DO and MUST NOT DO?
+- Followed existing codebase patterns?
+- Met MUST DO and MUST NOT DO requirements?
+- No unintended side effects?
 ## Blocker Handling
@@ -14649,8 +15257,7 @@ When worker reports blocked:
 1. STOP all further edits
 2. REVERT to last known working state
 3. DOCUMENT what was attempted
-4. Consult: \`task({ subagent_type: "oracle", prompt: "Analyze..." })\`
-5. If Oracle cannot resolve → ASK USER
+4. ASK USER via question() — present options and context
 ## Merge Strategy
@@ -14660,13 +15267,39 @@ hive_merge({ task: "01-task-name", strategy: "merge" })
 Merge only after verification passes.
-## Post-Batch Review (Hygienic)
+### Post-Batch Review (Hygienic)
 After completing and merging a batch:
 1. Ask the user via \`question()\` if they want a Hygienic code review for the batch.
 2. If yes, run \`task({ subagent_type: "hygienic", prompt: "Review implementation changes from the latest batch." })\`.
 3. Apply feedback before starting the next batch.
+### AGENTS.md Maintenance
+After completing and merging a batch:
+1. Sync context findings to AGENTS.md: \`hive_agents_md({ action: "sync", feature: "feature-name" })\`
+2. Review the proposed diff with the user
+3. Apply approved changes to keep AGENTS.md current
+For quality review of AGENTS.md content, load \`hive_skill("agents-md-mastery")\`.
+For projects without AGENTS.md:
+- Bootstrap with \`hive_agents_md({ action: "init" })\`
+- Generates initial documentation from codebase analysis
+## Turn Termination
+Valid endings:
+- Worker delegation (hive_worktree_create)
+- Status check (hive_status)
+- User question (question())
+- Merge (hive_merge)
+NEVER end with:
+- "Let me know when you're ready"
+- Summary without next action
+- Waiting for something unspecified
 ## Iron Laws
 **Never:**
@@ -14778,84 +15411,12 @@ When asked to retrieve raw data from external systems (MongoDB/Stripe/etc.):
 ## Persistence
-When operating within a feature context (background task with feature parameter):
-- If findings are substantial (3+ files discovered, architecture patterns, or key decisions):
-  Use \`hive_context_write\` to persist findings:
+When operating within a feature context:
+- If findings are substantial (3+ files, architecture patterns, or key decisions):
   \`\`\`
   hive_context_write({
-    name: "research-{topic-slug}",
-    content: "## Research: {Topic}
-Date: {date}
-## Context
-## research-findings
-# Research Findings for Hive Improvements v2
-## Worker Prompt Builder (\`worker-prompt.ts:48\`)
-- \`buildWorkerPrompt(params: WorkerPromptParams): string\`
-- Receives: feature, task, taskOrder, worktreePath, branch, plan, contextFiles, spec, previousTasks, continueFrom
-- Only uses: feature, task, taskOrder, worktreePath, branch, spec, continueFrom
-- plan/contextFiles/previousTasks passed but NOT used (already embedded in spec)
-- 10 sections: Assignment, Continuation(optional), Mission(=spec), Blocker Protocol, Completion Protocol, TDD, Debugging, Tools, Guidelines, User Input
-- **ZERO task-type awareness** — all workers get identical protocols
-- Budget: 100KB soft limit (advisory, not enforced)
-## Task Completion Flow (\`index.ts:974-1088\`)
-- \`hive_exec_complete\` accepts: task, summary (string), status (completed|blocked|failed|partial), blocker (optional)
-- Summary stored in: status.json, report.md, commit message (first 50 chars)
-- **Summary is free-form string** — no structure enforced
-- Completed summaries collected for next task: \`allTasks.filter(t => t.status === 'done' && t.summary)\`
-- Injected into spec as \`## Completed Tasks\` → \`- taskName: summary\`
-## TaskService (\`taskService.ts\`)
-- \`buildSpecContent()\` (lines 168-225): builds spec with Dependencies, Plan Section, Context, Completed Tasks
-- \`parseTasksFromPlan()\` (lines 532-602): regex \`/^###\\s+(\\d+)\\.\\s+(.+)$/\` for task headers
-- \`resolveDependencies()\` (lines 248-268): explicit deps or implicit sequential (N depends on N-1)
-- Types: TaskStatus has \`summary?: string\`, TaskInfo has \`summary?: string\`
-## Forager Agent (\`forager.ts:8-117\`)
-- Execution flow: Understand → Implement → Verify → Report
-- **NO orient/pre-flight phase** — jumps straight to understanding task spec
-- Can read codebase, use research tools (grep_app, context7, ast_grep)
-- Cannot: delegate (task/hive_exec_start), modify plan, use hive_merge
-- Notepads: \`.hive/features/{feature}/notepads/{learnings,issues,decisions}.md\` (append-only)
-## Hygienic Agent (\`hygienic.ts:8-105\`)
-- Reviews plan DOCUMENTATION quality, not design
-- 4 criteria: Clarity, Verifiability, Completeness, Big Picture
-- Verdict: OKAY or REJECT with 4-category assessment
-- When asked to review implementation → loads \`hive_skill("code-reviewer")\`
-- **Currently only invoked for plan review** (from Hive and Architect agents)
-- Cannot delegate/spawn workers
-## Scout Agent (\`scout.ts:8-112\`)
-- Read-only research agent
-- Classifies requests: CONCEPTUAL, IMPLEMENTATION, CODEBASE, COMPREHENSIVE
-- Output format: \`<results><files>...<answer>...<next_steps>...</results>\`
-- **Does NOT persist findings** — returns to orchestrator only
-- Parallel execution by default (3+ tools simultaneously)
-## Code-Reviewer Skill (\`skills/code-reviewer/SKILL.md\`)
-- Loaded by Hygienic when reviewing implementation
-- Output: APPROVE | REQUEST_CHANGES | NEEDS_DISCUSSION
-- Reviews: plan adherence, correctness, simplicity/YAGNI, risk
-- Already exists but underused (Hygienic only loads it when explicitly asked)
-## Plan Format
-- Headers: \`### N. Task Name\`
-- Sections: Depends on, What to do, Must NOT do, References (file:lines), Acceptance Criteria
-- Dependencies: \`none\` | \`1\` | \`1,3\` | implicit sequential
-## Skills (10 total)
-writing-plans, executing-plans, dispatching-parallel-agents, parallel-exploration, code-reviewer, onboarding, brainstorming, verification-before-completion, test-driven-development, systematic-debugging
-## Notepad System
-- Location: \`.hive/features/{feature}/notepads/{learnings,issues,decisions}.md\`
-- Workers append-only
-- **NOT automatically injected into next batch** — context injection only reads from \`contexts/\` directory"
+    name: "research-{topic}",
+    content: "## {Topic}\\n\\nDate: {YYYY-MM-DD}\\n\\n## Context\\n\\n## Findings"
   })
   \`\`\`
@@ -14895,6 +15456,20 @@ CAN use for quick lookups:
 - \`ast_grep_search\` — AST patterns
 - \`glob\`, \`grep\`, \`read\` — Codebase exploration
+## Resolve Before Blocking
+Default to exploration, questions are LAST resort:
+1. Read the referenced files and surrounding code
+2. Search for similar patterns in the codebase
+3. Try a reasonable approach based on conventions
+Only report as blocked when:
+- Multiple approaches failed (tried 3+)
+- Decision requires business logic you can't infer
+- External dependency is missing or broken
+Context inference: Before asking "what does X do?", READ X first.
 ## Plan = READ ONLY
 CRITICAL: NEVER MODIFY THE PLAN FILE
@@ -14919,8 +15494,11 @@ Read spec for:
 ### 2. Orient (Pre-flight Before Coding)
 Before writing code:
 - Confirm dependencies are satisfied and required context is present
+- Read the referenced files and surrounding code
+- Search for similar patterns in the codebase
 - Identify the exact files/sections to touch (from references)
 - Decide the first failing test you will write (TDD)
+- Identify the test command(s) and inputs you will run
 - Plan the minimum change to reach green
 ### 3. Implement
@@ -14966,6 +15544,16 @@ hive_worktree_commit({
 })
 \`\`\`
+## Completion Checklist
+Before calling hive_worktree_commit:
+- All tests in scope are run and passing (Record exact commands and results)
+- Build succeeds if required (Record exact command and result)
+- lsp_diagnostics clean on changed files (Record exact command and result)
+- Changes match the spec and references
+- No extra scope creep or unrelated edits
+- Summary includes what changed, why, and verification status
 ## Failure Recovery
 After 3 consecutive failures:
@@ -14975,6 +15563,15 @@ After 3 consecutive failures:
 ## Iron Laws
+### Docker Sandbox
+When sandbox mode is active, ALL bash commands automatically run inside a Docker container.
+- Your commands are transparently wrapped — you don't need to do anything special
+- File edits (Read, Write, Edit tools) still work on the host filesystem (worktree is mounted)
+- If a command must run on the host (e.g., git operations), report as blocked and ask the user
+- If a command fails with "docker: command not found", report as blocked — the host needs Docker installed
+- For deeper Docker expertise, load \`hive_skill("docker-mastery")\`
 **Never:**
 - Exceed task scope
 - Modify plan file
@@ -15017,7 +15614,10 @@ Self-check before every critique:
 ### 2. Verification & Acceptance Criteria
 - Are criteria measurable and concrete?
-- Red flags: "should work", "looks good", "properly handles"
+- Are they agent-executable (tool-runnable) without human judgment?
+- Do they specify exact commands + expected signals (exit code, output text, counts)?
+- Red flags: "should work", "looks good", "properly handles", "verify manually"
+- If manual checks are required, the plan must explain why automation is impossible
 ### 3. Context Completeness (90% Confidence)
 - Could a capable worker execute with 90% confidence?
@@ -15149,6 +15749,11 @@ import * as fs8 from "fs";
 import * as path4 from "path";
 import * as fs10 from "fs";
 import * as path6 from "path";
+import * as fs11 from "fs";
+import * as path7 from "path";
+import { existsSync as existsSync5 } from "fs";
+import { join as join8, sep } from "path";
+import { execSync } from "child_process";
 var __create = Object.create;
 var __getProtoOf = Object.getPrototypeOf;
 var __defProp2 = Object.defineProperty;
@@ -15986,6 +16591,7 @@ var DEFAULT_HIVE_CONFIG = {
   disableSkills: [],
   disableMcps: [],
   agentMode: "unified",
+  sandbox: "none",
   agents: {
     "hive-master": {
       model: DEFAULT_AGENT_MODELS["hive-master"],
@@ -21465,6 +22071,12 @@ class ContextService {
     ensureDir(contextPath);
     const filePath = path4.join(contextPath, this.normalizeFileName(fileName));
     writeText(filePath, content);
+    const totalChars = this.list(featureName).reduce((sum, c) => sum + c.content.length, 0);
+    if (totalChars > 20000) {
+      return `${filePath}
+⚠️ Context total: ${totalChars} chars (exceeds 20,000). Consider archiving older contexts with contextService.archive().`;
+    }
     return filePath;
   }
   read(featureName, fileName) {
@@ -21510,6 +22122,37 @@ ${f.content}`);
 `);
   }
+  archive(featureName) {
+    const contexts = this.list(featureName);
+    if (contexts.length === 0)
+      return { archived: [], archivePath: "" };
+    const contextPath = getContextPath(this.projectRoot, featureName);
+    const archiveDir = path4.join(contextPath, "..", "archive");
+    ensureDir(archiveDir);
+    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+    const archived = [];
+    for (const ctx of contexts) {
+      const archiveName = `${timestamp}_${ctx.name}.md`;
+      const src = path4.join(contextPath, `${ctx.name}.md`);
+      const dest = path4.join(archiveDir, archiveName);
+      fs8.copyFileSync(src, dest);
+      fs8.unlinkSync(src);
+      archived.push(ctx.name);
+    }
+    return { archived, archivePath: archiveDir };
+  }
+  stats(featureName) {
+    const contexts = this.list(featureName);
+    if (contexts.length === 0)
+      return { count: 0, totalChars: 0 };
+    const sorted2 = [...contexts].sort((a, b) => new Date(a.updatedAt).getTime() - new Date(b.updatedAt).getTime());
+    return {
+      count: contexts.length,
+      totalChars: contexts.reduce((sum, c) => sum + c.content.length, 0),
+      oldest: sorted2[0].name,
+      newest: sorted2[sorted2.length - 1].name
+    };
+  }
   normalizeFileName(name) {
     const normalized = name.replace(/\.md$/, "");
     return `${normalized}.md`;
@@ -21623,6 +22266,304 @@ class ConfigService {
     const config2 = this.get();
     return config2.disableMcps ?? [];
   }
+  getSandboxConfig() {
+    const config2 = this.get();
+    const mode = config2.sandbox ?? "none";
+    const image = config2.dockerImage;
+    const persistent = config2.persistentContainers ?? mode === "docker";
+    return { mode, ...image && { image }, persistent };
+  }
+}
+class AgentsMdService {
+  rootDir;
+  contextService;
+  constructor(rootDir, contextService) {
+    this.rootDir = rootDir;
+    this.contextService = contextService;
+  }
+  async init() {
+    const agentsMdPath = path7.join(this.rootDir, "AGENTS.md");
+    const existed = fileExists(agentsMdPath);
+    if (existed) {
+      const existing = readText(agentsMdPath);
+      return { content: existing || "", existed: true };
+    }
+    const content = await this.scanAndGenerate();
+    return { content, existed: false };
+  }
+  async sync(featureName) {
+    const contexts = this.contextService.list(featureName);
+    const agentsMdPath = path7.join(this.rootDir, "AGENTS.md");
+    const current = await fs11.promises.readFile(agentsMdPath, "utf-8").catch(() => "");
+    const findings = this.extractFindings(contexts);
+    const proposals = this.generateProposals(findings, current);
+    return { proposals, diff: this.formatDiff(current, proposals) };
+  }
+  apply(content) {
+    const agentsMdPath = path7.join(this.rootDir, "AGENTS.md");
+    const isNew = !fileExists(agentsMdPath);
+    writeText(agentsMdPath, content);
+    return { path: agentsMdPath, chars: content.length, isNew };
+  }
+  extractFindings(contexts) {
+    const findings = [];
+    const patterns = [
+      /we\s+use\s+[^.\n]+/gi,
+      /prefer\s+[^.\n]+\s+over\s+[^.\n]+/gi,
+      /don't\s+use\s+[^.\n]+/gi,
+      /do\s+not\s+use\s+[^.\n]+/gi,
+      /(?:build|test|dev)\s+command:\s*[^.\n]+/gi,
+      /[a-zA-Z]+\s+lives?\s+in\s+\/[^\s.\n]+/gi
+    ];
+    for (const context of contexts) {
+      const lines = context.content.split(`
+`);
+      for (const line of lines) {
+        const trimmed2 = line.trim();
+        if (!trimmed2 || trimmed2.startsWith("#"))
+          continue;
+        for (const pattern of patterns) {
+          const matches = trimmed2.match(pattern);
+          if (matches) {
+            for (const match of matches) {
+              const finding = match.trim();
+              if (finding && !findings.includes(finding)) {
+                findings.push(finding);
+              }
+            }
+          }
+        }
+      }
+    }
+    return findings;
+  }
+  generateProposals(findings, current) {
+    const proposals = [];
+    const currentLower = current.toLowerCase();
+    for (const finding of findings) {
+      const findingLower = finding.toLowerCase();
+      if (!currentLower.includes(findingLower)) {
+        proposals.push(finding);
+      }
+    }
+    return proposals;
+  }
+  formatDiff(current, proposals) {
+    if (proposals.length === 0)
+      return "";
+    const lines = proposals.map((p) => `+ ${p}`);
+    return lines.join(`
+`);
+  }
+  async scanAndGenerate() {
+    const detections = await this.detectProjectInfo();
+    return this.generateTemplate(detections);
+  }
+  async detectProjectInfo() {
+    const packageJsonPath = path7.join(this.rootDir, "package.json");
+    let packageJson = null;
+    if (fileExists(packageJsonPath)) {
+      try {
+        const content = readText(packageJsonPath);
+        packageJson = content ? JSON.parse(content) : null;
+      } catch {}
+    }
+    const info = {
+      packageManager: this.detectPackageManager(),
+      language: this.detectLanguage(),
+      testFramework: this.detectTestFramework(packageJson),
+      buildCommand: packageJson?.scripts?.build || null,
+      testCommand: packageJson?.scripts?.test || null,
+      devCommand: packageJson?.scripts?.dev || null,
+      isMonorepo: this.detectMonorepo(packageJson)
+    };
+    return info;
+  }
+  detectPackageManager() {
+    if (fileExists(path7.join(this.rootDir, "bun.lockb")))
+      return "bun";
+    if (fileExists(path7.join(this.rootDir, "pnpm-lock.yaml")))
+      return "pnpm";
+    if (fileExists(path7.join(this.rootDir, "yarn.lock")))
+      return "yarn";
+    if (fileExists(path7.join(this.rootDir, "package-lock.json")))
+      return "npm";
+    return "npm";
+  }
+  detectLanguage() {
+    if (fileExists(path7.join(this.rootDir, "tsconfig.json")))
+      return "TypeScript";
+    if (fileExists(path7.join(this.rootDir, "package.json")))
+      return "JavaScript";
+    if (fileExists(path7.join(this.rootDir, "requirements.txt")))
+      return "Python";
+    if (fileExists(path7.join(this.rootDir, "go.mod")))
+      return "Go";
+    if (fileExists(path7.join(this.rootDir, "Cargo.toml")))
+      return "Rust";
+    return "Unknown";
+  }
+  detectTestFramework(packageJson) {
+    if (!packageJson)
+      return null;
+    const deps = {
+      ...packageJson.dependencies,
+      ...packageJson.devDependencies
+    };
+    if (deps?.vitest)
+      return "vitest";
+    if (deps?.jest)
+      return "jest";
+    if (this.detectPackageManager() === "bun")
+      return "bun test";
+    if (deps?.pytest)
+      return "pytest";
+    return null;
+  }
+  detectMonorepo(packageJson) {
+    if (!packageJson)
+      return false;
+    return !!packageJson.workspaces;
+  }
+  generateTemplate(info) {
+    const sections = [];
+    sections.push(`# Agent Guidelines
+`);
+    sections.push(`## Overview
+`);
+    sections.push(`This project uses AI-assisted development. Follow these guidelines.
+`);
+    sections.push(`## Build & Test Commands
+`);
+    sections.push("```bash");
+    if (info.isMonorepo) {
+      sections.push("# This is a monorepo using bun workspaces");
+    }
+    if (info.buildCommand) {
+      sections.push(`# Build`);
+      sections.push(`${info.packageManager} run build`);
+      sections.push("");
+    }
+    if (info.testCommand) {
+      sections.push(`# Run tests`);
+      sections.push(`${info.packageManager} ${info.testCommand === "bun test" ? "test" : "run test"}`);
+      sections.push("");
+    }
+    if (info.devCommand) {
+      sections.push(`# Development mode`);
+      sections.push(`${info.packageManager} run dev`);
+    }
+    sections.push("```\n");
+    sections.push(`## Technology Stack
+`);
+    sections.push(`- **Language**: ${info.language}`);
+    sections.push(`- **Package Manager**: ${info.packageManager}`);
+    if (info.testFramework) {
+      sections.push(`- **Test Framework**: ${info.testFramework}`);
+    }
+    if (info.isMonorepo) {
+      sections.push(`- **Structure**: Monorepo with workspaces`);
+    }
+    sections.push("");
+    sections.push(`## Code Style
+`);
+    sections.push(`Follow existing patterns in the codebase.
+`);
+    sections.push(`## Architecture Principles
+`);
+    sections.push(`Document key architectural decisions here.
+`);
+    return sections.join(`
+`);
+  }
+}
+class DockerSandboxService {
+  static detectImage(worktreePath) {
+    if (existsSync5(join8(worktreePath, "Dockerfile"))) {
+      return null;
+    }
+    if (existsSync5(join8(worktreePath, "package.json"))) {
+      return "node:22-slim";
+    }
+    if (existsSync5(join8(worktreePath, "requirements.txt")) || existsSync5(join8(worktreePath, "pyproject.toml"))) {
+      return "python:3.12-slim";
+    }
+    if (existsSync5(join8(worktreePath, "go.mod"))) {
+      return "golang:1.22-slim";
+    }
+    if (existsSync5(join8(worktreePath, "Cargo.toml"))) {
+      return "rust:1.77-slim";
+    }
+    return "ubuntu:24.04";
+  }
+  static buildRunCommand(worktreePath, command, image) {
+    const escapedCommand = command.replace(/'/g, "'\\''");
+    return `docker run --rm -v ${worktreePath}:/app -w /app ${image} sh -c '${escapedCommand}'`;
+  }
+  static containerName(worktreePath) {
+    const parts = worktreePath.split(sep);
+    const worktreeIdx = parts.indexOf(".worktrees");
+    if (worktreeIdx === -1 || worktreeIdx + 2 >= parts.length) {
+      return `hive-sandbox-${Date.now()}`;
+    }
+    const feature = parts[worktreeIdx + 1];
+    const task = parts[worktreeIdx + 2];
+    const name = `hive-${feature}-${task}`.replace(/[^a-z0-9-]/gi, "-").toLowerCase();
+    return name.slice(0, 63);
+  }
+  static ensureContainer(worktreePath, image) {
+    const name = this.containerName(worktreePath);
+    try {
+      execSync(`docker inspect --format='{{.State.Running}}' ${name}`, { stdio: "pipe" });
+      return name;
+    } catch {
+      execSync(`docker run -d --name ${name} -v ${worktreePath}:/app -w /app ${image} tail -f /dev/null`, { stdio: "pipe" });
+      return name;
+    }
+  }
+  static buildExecCommand(containerName, command) {
+    const escapedCommand = command.replace(/'/g, "'\\''");
+    return `docker exec ${containerName} sh -c '${escapedCommand}'`;
+  }
+  static stopContainer(worktreePath) {
+    const name = this.containerName(worktreePath);
+    try {
+      execSync(`docker rm -f ${name}`, { stdio: "ignore" });
+    } catch {}
+  }
+  static isDockerAvailable() {
+    try {
+      execSync("docker info", { stdio: "ignore" });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  static wrapCommand(worktreePath, command, config2) {
+    if (command.startsWith("HOST: ")) {
+      return command.substring(6);
+    }
+    if (config2.mode === "none") {
+      return command;
+    }
+    let image;
+    if (config2.image) {
+      image = config2.image;
+    } else {
+      image = this.detectImage(worktreePath);
+      if (image === null) {
+        return command;
+      }
+    }
+    if (config2.persistent) {
+      const containerName = this.ensureContainer(worktreePath, image);
+      return this.buildExecCommand(containerName, command);
+    } else {
+      return this.buildRunCommand(worktreePath, command, image);
+    }
+  }
 }
 function computeRunnableAndBlocked(tasks) {
   const statusByFolder = new Map;
@@ -22306,6 +23247,7 @@ var plugin = async (ctx) => {
   const planService = new PlanService(directory);
   const taskService = new TaskService(directory);
   const contextService = new ContextService(directory);
+  const agentsMdService = new AgentsMdService(directory, contextService);
   const configService = new ConfigService;
   const disabledMcps = configService.getDisabledMcps();
   const disabledSkills = configService.getDisabledSkills();
@@ -22314,7 +23256,7 @@ var plugin = async (ctx) => {
   const effectiveAutoLoadSkills = configService.getAgentConfig("hive-master").autoLoadSkills ?? [];
   const worktreeService = new WorktreeService({
     baseDir: directory,
-    hiveDir: path7.join(directory, ".hive")
+    hiveDir: path8.join(directory, ".hive")
   });
   const isOmoSlimEnabled = () => {
     return configService.isOmoSlimEnabled();
@@ -22341,7 +23283,7 @@ var plugin = async (ctx) => {
   };
   const checkBlocked = (feature) => {
     const fs9 = __require("fs");
-    const blockedPath = path7.join(directory, ".hive", "features", feature, "BLOCKED");
+    const blockedPath = path8.join(directory, ".hive", "features", feature, "BLOCKED");
     if (fs9.existsSync(blockedPath)) {
       const reason = fs9.readFileSync(blockedPath, "utf-8").trim();
       return `⛔ BLOCKED by Beekeeper
@@ -22426,6 +23368,31 @@ To unblock: Remove .hive/features/${feature}/BLOCKED`;
         output.message.variant = configuredVariant;
       }
     },
+    "tool.execute.before": async (input, output) => {
+      if (input.tool !== "bash")
+        return;
+      const sandboxConfig = configService.getSandboxConfig();
+      if (sandboxConfig.mode === "none")
+        return;
+      const command = output.args?.command?.trim();
+      if (!command)
+        return;
+      if (/^HOST:\s*/i.test(command)) {
+        const strippedCommand = command.replace(/^HOST:\s*/i, "");
+        console.warn(`[hive:sandbox] HOST bypass: ${strippedCommand.slice(0, 80)}${strippedCommand.length > 80 ? "..." : ""}`);
+        output.args.command = strippedCommand;
+        return;
+      }
+      const workdir = output.args?.workdir;
+      if (!workdir)
+        return;
+      const hiveWorktreeBase = path8.join(directory, ".hive", ".worktrees");
+      if (!workdir.startsWith(hiveWorktreeBase))
+        return;
+      const wrapped = DockerSandboxService.wrapCommand(workdir, command, sandboxConfig);
+      output.args.command = wrapped;
+      output.args.workdir = undefined;
+    },
     mcp: builtinMcps,
     tool: {
       hive_skill: createHiveSkillTool(filteredSkills),
@@ -22494,8 +23461,8 @@ NEXT: Ask your first clarifying question about this feature.`;
           const feature = resolveFeature(explicitFeature);
           if (!feature)
             return "Error: No feature specified. Create a feature or provide feature param.";
-          const hasDiscovery = content.toLowerCase().includes("## discovery");
-          if (!hasDiscovery) {
+          const discoveryMatch = content.match(/^##\s+Discovery\s*$/im);
+          if (!discoveryMatch) {
             return `BLOCKED: Discovery section required before planning.
 Your plan must include a \`## Discovery\` section documenting:
@@ -22504,6 +23471,19 @@ Your plan must include a \`## Discovery\` section documenting:
 - Key decisions made
 Add this section to your plan content and try again.`;
+          }
+          const afterDiscovery = content.slice(discoveryMatch.index + discoveryMatch[0].length);
+          const nextHeading = afterDiscovery.search(/^##\s+/m);
+          const discoveryContent = nextHeading > -1 ? afterDiscovery.slice(0, nextHeading).trim() : afterDiscovery.trim();
+          if (discoveryContent.length < 100) {
+            return `BLOCKED: Discovery section is too thin (${discoveryContent.length} chars, minimum 100).
+A substantive Discovery section should include:
+- Original request quoted
+- Interview summary (key decisions)
+- Research findings with file:line references
+Expand your Discovery section and try again.`;
           }
           captureSession(feature, toolContext);
           const planPath = planService.write(feature, content);
@@ -22724,9 +23704,9 @@ Reminder: start work with hive_worktree_create to use its worktree, and ensure a
             spec: specContent,
             workerPrompt
           });
-          const hiveDir = path7.join(directory, ".hive");
+          const hiveDir = path8.join(directory, ".hive");
           const workerPromptPath = writeWorkerPromptFile(feature, task, workerPrompt, hiveDir);
-          const relativePromptPath = normalizePath(path7.relative(directory, workerPromptPath));
+          const relativePromptPath = normalizePath(path8.relative(directory, workerPromptPath));
           const PREVIEW_MAX_LENGTH = 200;
           const workerPromptPreview = workerPrompt.length > PREVIEW_MAX_LENGTH ? workerPrompt.slice(0, PREVIEW_MAX_LENGTH) + "..." : workerPrompt;
           const taskToolPrompt = `Follow instructions in @${relativePromptPath}`;
@@ -23066,6 +24046,47 @@ Files changed: ${result.filesChanged?.length || 0}`;
             nextAction: getNextAction(planStatus, tasksSummary, runnable)
           });
         }
+      }),
+      hive_agents_md: tool({
+        description: "Initialize or sync AGENTS.md. init: scan codebase and generate (preview only). sync: propose updates from feature contexts. apply: write approved content to disk.",
+        args: {
+          action: tool.schema.enum(["init", "sync", "apply"]).describe("Action to perform"),
+          feature: tool.schema.string().optional().describe("Feature name for sync action"),
+          content: tool.schema.string().optional().describe("Content to write (required for apply action)")
+        },
+        async execute({ action, feature, content }) {
+          if (action === "init") {
+            const result = await agentsMdService.init();
+            if (result.existed) {
+              return `AGENTS.md already exists (${result.content.length} chars). Use 'sync' to propose updates.`;
+            }
+            return `Generated AGENTS.md from codebase scan (${result.content.length} chars):
+${result.content}
+⚠️ This has NOT been written to disk. Ask the user via question() whether to write it to AGENTS.md.`;
+          }
+          if (action === "sync") {
+            if (!feature)
+              return "Error: feature name required for sync action";
+            const result = await agentsMdService.sync(feature);
+            if (result.proposals.length === 0) {
+              return "No new findings to sync to AGENTS.md.";
+            }
+            return `Proposed AGENTS.md updates from feature "${feature}":
+${result.diff}
+⚠️ These changes have NOT been applied. Ask the user via question() whether to apply them.`;
+          }
+          if (action === "apply") {
+            if (!content)
+              return "Error: content required for apply action. Use init or sync first to get content, then apply with the approved content.";
+            const result = agentsMdService.apply(content);
+            return `AGENTS.md ${result.isNew ? "created" : "updated"} (${result.chars} chars) at ${result.path}`;
+          }
+          return "Error: unknown action";
+        }
       })
     },
     command: {