npm - opencode-swarm-plugin - Versions diffs - 0.43.0 → 0.44.1 - Mend

opencode-swarm-plugin 0.43.0 → 0.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

package/bin/cass.characterization.test.ts +422 -0
package/bin/swarm.serve.test.ts +6 -4
package/bin/swarm.test.ts +68 -0
package/bin/swarm.ts +81 -8
package/dist/compaction-prompt-scoring.js +139 -0
package/dist/contributor-tools.d.ts +42 -0
package/dist/contributor-tools.d.ts.map +1 -0
package/dist/eval-capture.js +12811 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +12 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +7728 -62590
package/dist/plugin.js +23833 -78695
package/dist/sessions/agent-discovery.d.ts +59 -0
package/dist/sessions/agent-discovery.d.ts.map +1 -0
package/dist/sessions/index.d.ts +10 -0
package/dist/sessions/index.d.ts.map +1 -0
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/package.json +17 -5
package/.changeset/swarm-insights-data-layer.md +0 -63
package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
package/.hive/analysis/session-data-quality-audit.md +0 -320
package/.hive/eval-results.json +0 -483
package/.hive/issues.jsonl +0 -138
package/.hive/memories.jsonl +0 -729
package/.opencode/eval-history.jsonl +0 -327
package/.turbo/turbo-build.log +0 -9
package/CHANGELOG.md +0 -2255
package/SCORER-ANALYSIS.md +0 -598
package/docs/analysis/subagent-coordination-patterns.md +0 -902
package/docs/analysis-socratic-planner-pattern.md +0 -504
package/docs/planning/ADR-001-monorepo-structure.md +0 -171
package/docs/planning/ADR-002-package-extraction.md +0 -393
package/docs/planning/ADR-003-performance-improvements.md +0 -451
package/docs/planning/ADR-004-message-queue-features.md +0 -187
package/docs/planning/ADR-005-devtools-observability.md +0 -202
package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
package/docs/planning/ROADMAP.md +0 -368
package/docs/semantic-memory-cli-syntax.md +0 -123
package/docs/swarm-mail-architecture.md +0 -1147
package/docs/testing/context-recovery-test.md +0 -470
package/evals/ARCHITECTURE.md +0 -1189
package/evals/README.md +0 -768
package/evals/compaction-prompt.eval.ts +0 -149
package/evals/compaction-resumption.eval.ts +0 -289
package/evals/coordinator-behavior.eval.ts +0 -307
package/evals/coordinator-session.eval.ts +0 -154
package/evals/evalite.config.ts.bak +0 -15
package/evals/example.eval.ts +0 -31
package/evals/fixtures/compaction-cases.ts +0 -350
package/evals/fixtures/compaction-prompt-cases.ts +0 -311
package/evals/fixtures/coordinator-sessions.ts +0 -328
package/evals/fixtures/decomposition-cases.ts +0 -105
package/evals/lib/compaction-loader.test.ts +0 -248
package/evals/lib/compaction-loader.ts +0 -320
package/evals/lib/data-loader.evalite-test.ts +0 -289
package/evals/lib/data-loader.test.ts +0 -345
package/evals/lib/data-loader.ts +0 -281
package/evals/lib/llm.ts +0 -115
package/evals/scorers/compaction-prompt-scorers.ts +0 -145
package/evals/scorers/compaction-scorers.ts +0 -305
package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
package/evals/scorers/coordinator-discipline.ts +0 -325
package/evals/scorers/index.test.ts +0 -146
package/evals/scorers/index.ts +0 -328
package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
package/evals/scorers/outcome-scorers.ts +0 -349
package/evals/swarm-decomposition.eval.ts +0 -121
package/examples/commands/swarm.md +0 -745
package/examples/plugin-wrapper-template.ts +0 -2426
package/examples/skills/hive-workflow/SKILL.md +0 -212
package/examples/skills/skill-creator/SKILL.md +0 -223
package/examples/skills/swarm-coordination/SKILL.md +0 -292
package/global-skills/cli-builder/SKILL.md +0 -344
package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
package/global-skills/learning-systems/SKILL.md +0 -644
package/global-skills/skill-creator/LICENSE.txt +0 -202
package/global-skills/skill-creator/SKILL.md +0 -352
package/global-skills/skill-creator/references/output-patterns.md +0 -82
package/global-skills/skill-creator/references/workflows.md +0 -28
package/global-skills/swarm-coordination/SKILL.md +0 -995
package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
package/global-skills/swarm-coordination/references/strategies.md +0 -138
package/global-skills/system-design/SKILL.md +0 -213
package/global-skills/testing-patterns/SKILL.md +0 -430
package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
package/opencode-swarm-plugin-0.30.7.tgz +0 -0
package/opencode-swarm-plugin-0.31.0.tgz +0 -0
package/scripts/cleanup-test-memories.ts +0 -346
package/scripts/init-skill.ts +0 -222
package/scripts/migrate-unknown-sessions.ts +0 -349
package/scripts/validate-skill.ts +0 -204
package/src/agent-mail.ts +0 -1724
package/src/anti-patterns.test.ts +0 -1167
package/src/anti-patterns.ts +0 -448
package/src/compaction-capture.integration.test.ts +0 -257
package/src/compaction-hook.test.ts +0 -838
package/src/compaction-hook.ts +0 -1204
package/src/compaction-observability.integration.test.ts +0 -139
package/src/compaction-observability.test.ts +0 -187
package/src/compaction-observability.ts +0 -324
package/src/compaction-prompt-scorers.test.ts +0 -475
package/src/compaction-prompt-scoring.ts +0 -300
package/src/dashboard.test.ts +0 -611
package/src/dashboard.ts +0 -462
package/src/error-enrichment.test.ts +0 -403
package/src/error-enrichment.ts +0 -219
package/src/eval-capture.test.ts +0 -1015
package/src/eval-capture.ts +0 -929
package/src/eval-gates.test.ts +0 -306
package/src/eval-gates.ts +0 -218
package/src/eval-history.test.ts +0 -508
package/src/eval-history.ts +0 -214
package/src/eval-learning.test.ts +0 -378
package/src/eval-learning.ts +0 -360
package/src/eval-runner.test.ts +0 -223
package/src/eval-runner.ts +0 -402
package/src/export-tools.test.ts +0 -476
package/src/export-tools.ts +0 -257
package/src/hive.integration.test.ts +0 -2241
package/src/hive.ts +0 -1628
package/src/index.ts +0 -935
package/src/learning.integration.test.ts +0 -1815
package/src/learning.ts +0 -1079
package/src/logger.test.ts +0 -189
package/src/logger.ts +0 -135
package/src/mandate-promotion.test.ts +0 -473
package/src/mandate-promotion.ts +0 -239
package/src/mandate-storage.integration.test.ts +0 -601
package/src/mandate-storage.test.ts +0 -578
package/src/mandate-storage.ts +0 -794
package/src/mandates.ts +0 -540
package/src/memory-tools.test.ts +0 -195
package/src/memory-tools.ts +0 -344
package/src/memory.integration.test.ts +0 -334
package/src/memory.test.ts +0 -158
package/src/memory.ts +0 -527
package/src/model-selection.test.ts +0 -188
package/src/model-selection.ts +0 -68
package/src/observability-tools.test.ts +0 -359
package/src/observability-tools.ts +0 -871
package/src/output-guardrails.test.ts +0 -438
package/src/output-guardrails.ts +0 -381
package/src/pattern-maturity.test.ts +0 -1160
package/src/pattern-maturity.ts +0 -525
package/src/planning-guardrails.test.ts +0 -491
package/src/planning-guardrails.ts +0 -438
package/src/plugin.ts +0 -23
package/src/post-compaction-tracker.test.ts +0 -251
package/src/post-compaction-tracker.ts +0 -237
package/src/query-tools.test.ts +0 -636
package/src/query-tools.ts +0 -324
package/src/rate-limiter.integration.test.ts +0 -466
package/src/rate-limiter.ts +0 -774
package/src/replay-tools.test.ts +0 -496
package/src/replay-tools.ts +0 -240
package/src/repo-crawl.integration.test.ts +0 -441
package/src/repo-crawl.ts +0 -610
package/src/schemas/cell-events.test.ts +0 -347
package/src/schemas/cell-events.ts +0 -807
package/src/schemas/cell.ts +0 -257
package/src/schemas/evaluation.ts +0 -166
package/src/schemas/index.test.ts +0 -199
package/src/schemas/index.ts +0 -286
package/src/schemas/mandate.ts +0 -232
package/src/schemas/swarm-context.ts +0 -115
package/src/schemas/task.ts +0 -161
package/src/schemas/worker-handoff.test.ts +0 -302
package/src/schemas/worker-handoff.ts +0 -131
package/src/skills.integration.test.ts +0 -1192
package/src/skills.test.ts +0 -643
package/src/skills.ts +0 -1549
package/src/storage.integration.test.ts +0 -341
package/src/storage.ts +0 -884
package/src/structured.integration.test.ts +0 -817
package/src/structured.test.ts +0 -1046
package/src/structured.ts +0 -762
package/src/swarm-decompose.test.ts +0 -188
package/src/swarm-decompose.ts +0 -1302
package/src/swarm-deferred.integration.test.ts +0 -157
package/src/swarm-deferred.test.ts +0 -38
package/src/swarm-insights.test.ts +0 -214
package/src/swarm-insights.ts +0 -459
package/src/swarm-mail.integration.test.ts +0 -970
package/src/swarm-mail.ts +0 -739
package/src/swarm-orchestrate.integration.test.ts +0 -282
package/src/swarm-orchestrate.test.ts +0 -548
package/src/swarm-orchestrate.ts +0 -3084
package/src/swarm-prompts.test.ts +0 -1270
package/src/swarm-prompts.ts +0 -2077
package/src/swarm-research.integration.test.ts +0 -701
package/src/swarm-research.test.ts +0 -698
package/src/swarm-research.ts +0 -472
package/src/swarm-review.integration.test.ts +0 -285
package/src/swarm-review.test.ts +0 -879
package/src/swarm-review.ts +0 -709
package/src/swarm-strategies.ts +0 -407
package/src/swarm-worktree.test.ts +0 -501
package/src/swarm-worktree.ts +0 -575
package/src/swarm.integration.test.ts +0 -2377
package/src/swarm.ts +0 -38
package/src/tool-adapter.integration.test.ts +0 -1221
package/src/tool-availability.ts +0 -461
package/tsconfig.json +0 -28

package/docs/planning/ADR-009-oh-my-opencode-patterns.md DELETED Viewed

@@ -1,353 +0,0 @@
-# ADR-009: Patterns from oh-my-opencode
-## Status
-Proposed
-## Context
-[oh-my-opencode](https://github.com/code-yeongyu/oh-my-opencode) by **code-yeongyu** is the most feature-rich OpenCode plugin in the wild (2961★). After deep analysis of its architecture, we identified several patterns that would significantly strengthen our swarm coordination.
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                                                                 │
-│   "Standing on the shoulders of giants"                        │
-│                                                                 │
-│   oh-my-opencode innovations we're adopting:                   │
-│   • 7-Section Delegation Protocol                              │
-│   • Compaction Context Injection                               │
-│   • Preemptive Compaction (80% threshold)                      │
-│   • Event + Polling Hybrid for completion detection            │
-│   • Parallel Execution Minimums                                │
-│   • Context-Safe Tool Limits                                   │
-│                                                                 │
-└─────────────────────────────────────────────────────────────────┘
-```
-### What oh-my-opencode Does Well
-1. **Agent System** - Factory-based registry with model-specific configs, BackgroundManager for async execution, structured delegation prompts
-2. **21 Lifecycle Hooks** - Compaction handling, session recovery, think-mode auto-switching, external hook protocol
-3. **Background Agents** - Event + polling hybrid, todo-aware completion, fire-and-forget abort
-4. **LSP/AST Tools** - Zero-config LSP integration, 11 code intelligence tools, context-safe limits
-5. **Claude Code Compatibility** - Dual-path config loading, 4 independent loaders, auto-migration
-6. **Plugin Architecture** - Hook mapping pattern, session-scoped state, graceful degradation
-### What We Already Have
-- Swarm Mail for inter-agent messaging
-- File reservations for conflict prevention
-- UBS scan on completion
-- Hive for work item tracking
-- Worktree isolation (ADR-007)
-- Structured review (ADR-007)
-### Gaps This ADR Addresses
-1. **Coordinator prompts lack structure** - Workers sometimes go rogue
-2. **Context compaction loses critical info** - Decomposition strategy, dependency graph forgotten
-3. **No preemptive compaction** - Sessions die at 100% instead of gracefully compacting at 80%
-4. **Polling-only completion detection** - Slower than event-driven
-5. **No parallel execution enforcement** - Researchers call tools sequentially
-6. **Tool outputs can explode context** - No hard limits on results
-## Decision
-### 1. 7-Section Delegation Protocol
-Adopt oh-my-opencode's structured delegation format for coordinator → worker handoffs:
-```markdown
-## TASK
-[Specific task description]
-## EXPECTED OUTCOME
-[What success looks like]
-## REQUIRED SKILLS
-[Domain knowledge needed]
-## REQUIRED TOOLS
-[Tools the worker should use]
-## MUST DO
-- [Non-negotiable requirements]
-- [Quality gates]
-## MUST NOT DO
-- [Forbidden approaches]
-- [Anti-patterns to avoid]
-## CONTEXT
-[Shared context from coordinator]
-[Dependency information]
-[What other workers are doing]
-```
-**Implementation:** Update `swarm_subtask_prompt` to generate this format.
-**Why:** Reduces rogue behavior. Workers have clear boundaries and success criteria.
-### 2. Compaction Context Injection
-Preserve critical information through context compaction by injecting a structured prompt BEFORE the summarization API call:
-```markdown
-## 1. User Requests (As-Is)
-[Exact wording of original requests - preserved verbatim]
-## 2. Final Goal
-[End result expected from this session]
-## 3. Work Completed
-[Files modified, features implemented, problems solved]
-## 4. Remaining Tasks
-[Pending items, follow-ups, blocked work]
-## 5. MUST NOT Do
-[Forbidden approaches, failed attempts, anti-patterns discovered]
-## 6. Swarm State (if applicable)
-[Epic ID, completed subtasks, in-progress workers, dependency graph]
-```
-**Implementation:** Add `experimental.session.compacting` hook that injects this prompt.
-**Why:** Currently compaction loses decomposition strategy, dependency graph, and failed approaches. Workers repeat mistakes.
-### 3. Preemptive Compaction
-Monitor token usage and trigger compaction at 80% threshold instead of waiting for overflow:
-```typescript
-// In chat.message or message.updated hook
-const usageRatio = totalTokens / contextLimit;
-if (usageRatio >= 0.8 && !compactionInProgress.has(sessionID)) {
-  compactionInProgress.add(sessionID);
-  await ctx.client.session.summarize({ ... });
-  // Auto-resume after compaction
-  setTimeout(() => {
-    ctx.client.session.promptAsync({ parts: [{ text: "Continue" }] });
-  }, 500);
-}
-```
-**Configuration:**
-```json
-{
-  "preemptive_compaction": {
-    "enabled": true,
-    "threshold": 0.8,
-    "cooldown_seconds": 300
-  }
-}
-```
-**Why:** Prevents context overflow mid-work. Coordinators and workers survive long-running epics.
-### 4. Event + Polling Hybrid for Completion Detection
-Currently we only poll for worker completion. Add event-driven detection as primary path:
-```typescript
-// Primary: Event-driven (fast)
-event: async ({ event }) => {
-  if (event.type === "session.idle") {
-    const todos = await client.session.todo();
-    if (todos.length === 0) {
-      markWorkerComplete(event.properties?.info?.id);
-    }
-  }
-}
-// Fallback: Polling (reliable)
-setInterval(() => {
-  for (const worker of runningWorkers) {
-    const status = await client.session.status(worker.sessionID);
-    if (status.type === "idle") {
-      // Same completion logic
-    }
-  }
-}, 2000);
-```
-**Why:** Events are faster. Polling catches missed events. Hybrid = reliable + fast.
-### 5. Parallel Execution Minimums
-Enforce minimum parallel tool calls for researcher agents:
-```markdown
-## PARALLEL EXECUTION REQUIREMENTS
-You MUST launch multiple tools simultaneously in your first action.
-Never call tools sequentially unless output depends on prior result.
-Minimum parallel calls by request type:
-- TYPE A (conceptual): 3+ tools (context7 + pdf-brain + websearch)
-- TYPE B (implementation): 4+ tools (repo-autopsy + grep + ast-grep + read)
-- TYPE C (comprehensive): 6+ tools (all of the above)
-WRONG:
-1. Search for X
-2. Wait for result
-3. Search for Y
-RIGHT:
-[Search X | Search Y | Search Z] → single response
-```
-**Implementation:** Add to `swarm-researcher` agent prompt. Enforce via output validation.
-**Why:** Sequential tool calls waste round-trips and burn context. Parallel = faster + cheaper.
-### 6. Context-Safe Tool Limits
-Add hard limits and truncation reporting to tools that can explode context:
-| Tool | Limit | Truncation Message |
-|------|-------|-------------------|
-| `repo-autopsy_search` | 100 results | `Found 347 results (showing first 100):` |
-| `cass_search` | 50 results | `Found 89 sessions (showing first 50):` |
-| `find-exports` | 50 results | `Found 72 exports (showing first 50):` |
-| `semantic-memory_find` | 20 results | `Found 45 memories (showing first 20):` |
-**Implementation:**
-```typescript
-const MAX_RESULTS = 100;
-const total = results.length;
-const truncated = total > MAX_RESULTS;
-const limited = truncated ? results.slice(0, MAX_RESULTS) : results;
-if (truncated) {
-  output.unshift(`Found ${total} results (showing first ${MAX_RESULTS}):`);
-}
-```
-**Why:** Unbounded results kill context. Truncation with count lets agent know there's more.
-### 7. Todo-Aware Completion (Bonus)
-Before marking a worker complete, check if it left unfinished work:
-```typescript
-async function canMarkComplete(sessionID: string): Promise<boolean> {
-  const todos = await client.session.todo({ path: { id: sessionID } });
-  const incomplete = todos.filter(t => t.status !== "completed");
-  if (incomplete.length > 0) {
-    // Inject continuation prompt
-    await client.session.prompt({
-      path: { id: sessionID },
-      body: { parts: [{ text: "You have incomplete TODOs. Continue working." }] }
-    });
-    return false;
-  }
-  return true;
-}
-```
-**Why:** Prevents race conditions where worker marks complete before finishing TODO list.
-### 8. Think Mode for Complex Decomposition (Bonus)
-Auto-enable extended thinking when decomposing complex tasks:
-```typescript
-// In chat.params hook
-const complexityIndicators = [
-  "decompose", "break down", "plan", "architect",
-  "refactor across", "migrate", "redesign"
-];
-if (complexityIndicators.some(k => prompt.toLowerCase().includes(k))) {
-  output.message.model = { modelID: "claude-sonnet-4-5-high" };
-  output.message.thinking = { type: "enabled", budget_tokens: 16000 };
-}
-```
-**Why:** Complex decomposition benefits from extended reasoning. Auto-switching removes friction.
-## Implementation
-### Phase 1: Delegation Protocol + Context Injection (Priority)
-1. Update `swarm_subtask_prompt` with 7-section format
-2. Add compaction context injection hook
-3. Test with existing swarm workflows
-### Phase 2: Preemptive Compaction + Event Hybrid
-1. Add token monitoring to coordinator/worker sessions
-2. Implement 80% threshold compaction with cooldown
-3. Add event-driven completion detection
-4. Keep polling as fallback
-### Phase 3: Tool Limits + Parallel Enforcement
-1. Add limits to repo-autopsy, cass, find-exports, semantic-memory
-2. Update swarm-researcher prompt with parallel minimums
-3. Add output validation for parallel enforcement
-### Phase 4: Bonus Features
-1. Todo-aware completion check
-2. Think mode auto-switching for decomposition
-## Consequences
-### Positive
-- **Better worker behavior**: 7-section protocol reduces rogue actions
-- **Context survives compaction**: Critical info preserved through summarization
-- **No more context overflow**: Preemptive compaction at 80%
-- **Faster completion detection**: Events + polling hybrid
-- **Cheaper research**: Parallel execution reduces round-trips
-- **Predictable tool output**: Hard limits prevent context explosion
-### Negative
-- **More complexity**: Additional hooks and state management
-- **Prompt bloat**: 7-section format is verbose (but worth it)
-- **Compaction overhead**: Preemptive compaction adds latency (but prevents crashes)
-### Neutral
-- **Attribution**: All patterns credited to code-yeongyu/oh-my-opencode
-- **Not a fork**: We're adopting patterns, not copying code
-## Alternatives Considered
-### Copy oh-my-opencode Wholesale
-Could fork and adapt. Rejected because:
-- Different architecture (we have Swarm Mail, Hive, etc.)
-- Our patterns complement theirs, not replace
-- Selective adoption is cleaner
-### Skip Compaction Handling
-Could rely on OpenCode's built-in compaction. Rejected because:
-- Default compaction loses swarm-specific context
-- Decomposition strategy, dependency graph critical for workers
-### Always Use Extended Thinking
-Could enable thinking for all coordinator actions. Rejected because:
-- Overkill for simple tasks
-- Slower and more expensive
-- Auto-detection is smarter
-## References
-- [oh-my-opencode](https://github.com/code-yeongyu/oh-my-opencode) by code-yeongyu - Primary source
-- [ADR-007](./ADR-007-swarm-enhancements-worktree-review.md) - Worktree isolation + structured review
-- [ADR-008](./ADR-008-worker-handoff-protocol.md) - Worker handoff protocol
-## Acknowledgments
-Major thanks to **code-yeongyu** for building oh-my-opencode and open-sourcing these patterns. The OpenCode ecosystem is stronger because of contributions like this.
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                                                                 │
-│   🐝  "Good artists copy, great artists steal"                 │
-│       — Picasso (probably misattributed)                       │
-│                                                                 │
-│   We're stealing the good stuff and making it ours.            │
-│   With proper attribution, of course.                          │
-│                                                                 │
-└─────────────────────────────────────────────────────────────────┘
-```

package/docs/planning/ROADMAP.md DELETED Viewed

@@ -1,368 +0,0 @@
-# Implementation Roadmap
-## Overview
-This roadmap implements all 5 ADRs in phased releases over 12 weeks. Each phase delivers incremental value and can ship independently.
-## Phase 0: Preparation (Week 0)
-**Goal:** Set up monorepo infrastructure and validate all ADRs
-**Tasks:**
-- [ ] Install Turborepo + configure turbo.json
-- [ ] Set up Changesets for versioning
-- [ ] Configure dependency-cruiser for circular dep detection
-- [ ] Create packages/swarm-mail and packages/opencode-swarm-plugin directories
-- [ ] Set up TypeScript project references
-- [ ] Configure CI/CD with Turborepo caching
-**Deliverables:**
-- Working monorepo build (`turbo run build`)
-- Published ADRs in docs/planning/
-- CI pipeline running tests for both packages
-**Success Criteria:**
-- `bun run build` builds both packages in correct order
-- No circular dependencies detected
-- CI completes in <2 minutes (with caching)
----
-## Phase 1: Package Extraction (Weeks 1-2)
-**Goal:** Extract swarm-mail package and publish to npm
-**ADRs:** ADR-001, ADR-002
-**Tasks:**
-- [ ] Move src/streams/\* to packages/swarm-mail/src/streams/
-- [ ] Move agent-mail.ts, swarm-mail.ts to swarm-mail
-- [ ] Update imports in opencode-swarm-plugin to use swarm-mail
-- [ ] Migrate integration tests
-- [ ] Write swarm-mail README with examples
-- [ ] Add deprecation warnings in opencode-swarm-plugin
-- [ ] Publish swarm-mail@0.1.0 to npm
-**Deliverables:**
-- swarm-mail published on npm
-- opencode-swarm-plugin depends on swarm-mail
-- Migration guide for existing users
-- TypeDoc API documentation
-**Success Criteria:**
-- All tests pass in both packages
-- swarm-mail works in standalone project
-- No circular dependencies
-- Published tarball <500KB
----
-## Phase 2: Performance Optimizations (Weeks 3-4)
-**Goal:** Replace polling with live queries and add batch operations
-**ADRs:** ADR-003
-**Tasks:**
-- [ ] Create live query wrapper (src/streams/live-query.ts)
-- [ ] Add subscription cleanup tracking
-- [ ] Replace polling in watchInbox()
-- [ ] Replace polling in watchEvents()
-- [ ] Add batch message send API
-- [ ] Add batch event append API
-- [ ] Write integration tests for live queries
-- [ ] Run performance benchmarks (latency, CPU, memory)
-**Deliverables:**
-- Live queries for inbox, events, file reservations
-- Batch APIs for messages and events
-- Performance benchmarks showing improvements
-- Feature flag for gradual rollout
-**Success Criteria:**
-- Notification latency <50ms (99th percentile)
-- CPU usage <1% in idle state
-- Batch operations 10x faster than individual
-- Memory usage increase <20%
-**Metrics (Before → After):**
-- Latency: 250-500ms → <10ms (25-50x improvement)
-- CPU: 5-10% → <1% (5-10x reduction)
-- Queries/sec: 2-4 → 0 (eliminated)
----
-## Phase 3: Message Queue Features (Weeks 5-7)
-**Goal:** Add priority queues, DLQ, TTL, pub/sub
-**ADRs:** ADR-004
-**Tasks:**
-**Week 5: Priority Queues + DLQ**
-- [ ] Add priority column to messages table
-- [ ] Update getInbox() to ORDER BY priority DESC
-- [ ] Create failed_messages table
-- [ ] Implement retry logic with exponential backoff
-- [ ] Add DLQ viewer to CLI
-**Week 6: TTL + Pub/Sub**
-- [ ] Add expires_at column to messages
-- [ ] Implement background TTL cleanup job
-- [ ] Add topic column to messages
-- [ ] Implement subscribeToTopic() using live queries
-- [ ] Support wildcard topic subscriptions
-**Week 7: Testing + Documentation**
-- [ ] Write integration tests for all features
-- [ ] Add examples to README
-- [ ] Document retry/DLQ behavior
-- [ ] Document pub/sub patterns
-**Deliverables:**
-- Priority queues (4 levels: 0=urgent, 3=low)
-- DLQ with retry tracking
-- TTL with background cleanup
-- Pub/sub with wildcard topics
-**Success Criteria:**
-- Priority messages processed first
-- Failed messages retry 3x before DLQ
-- Expired messages cleaned up within 5 minutes
-- Topic subscriptions work with wildcards
----
-## Phase 4: DevTools + CLI (Weeks 8-10)
-**Goal:** Build DevTools UI and CLI for observability
-**ADRs:** ADR-005
-**Tasks:**
-**Week 8: CLI**
-- [ ] Add @effect/cli dependency
-- [ ] Implement `swarm events` command
-- [ ] Implement `swarm messages` command
-- [ ] Implement `swarm locks` command
-- [ ] Implement `swarm replay` command
-- [ ] Add `--tail` mode for real-time updates
-**Week 9: DevTools UI**
-- [ ] Scaffold SvelteKit app in apps/devtools
-- [ ] Build event stream viewer
-- [ ] Build message inbox/outbox viewer
-- [ ] Build file reservation timeline
-- [ ] Add SSE endpoint for real-time updates
-**Week 10: Integration + Polish**
-- [ ] Static export of DevTools UI
-- [ ] Embed UI in plugin (serve at /\_swarm/devtools)
-- [ ] Add screenshots to README
-- [ ] Write user guide
-**Deliverables:**
-- CLI with 5 commands (events, messages, locks, replay, metrics)
-- DevTools UI (embeddable SvelteKit app)
-- Real-time updates via SSE
-- User guide with screenshots
-**Success Criteria:**
-- CLI can tail events in real-time
-- DevTools UI shows live message stream
-- UI works offline (static export)
-- Documentation covers all CLI commands
----
-## Phase 5: Metrics + Tracing (Weeks 11-12)
-**Goal:** Add Prometheus metrics and OpenTelemetry tracing
-**ADRs:** ADR-005
-**Tasks:**
-**Week 11: Metrics**
-- [ ] Add prom-client dependency
-- [ ] Instrument message send/receive latency
-- [ ] Add lock contention histogram
-- [ ] Add queue depth gauge
-- [ ] Expose /metrics endpoint
-- [ ] Add Grafana dashboard template
-**Week 12: Tracing**
-- [ ] Add @effect/opentelemetry dependency
-- [ ] Instrument message send/receive spans
-- [ ] Propagate trace context in messages
-- [ ] Add trace_id to message metadata
-- [ ] Test with Jaeger/Zipkin
-- [ ] Write tracing guide
-**Deliverables:**
-- Prometheus metrics at /metrics endpoint
-- OpenTelemetry tracing integration
-- Grafana dashboard template
-- Tracing guide with Jaeger setup
-**Success Criteria:**
-- Metrics exposed and scrapeable by Prometheus
-- Traces visible in Jaeger UI
-- Trace propagation across agents works
-- Documentation for all observability tools
----
-## Phase 6: Saga Pattern (Future)
-**Goal:** Implement saga orchestration for long-running workflows
-**ADRs:** ADR-004 (Phase 5)
-**Status:** Deferred to v2.0
-**Tasks:**
-- [ ] Create saga_instances and saga_steps tables
-- [ ] Add saga coordinator logic
-- [ ] Implement compensation pattern
-- [ ] Add saga viewer to DevTools UI
-- [ ] Write saga pattern examples
-**Deliverables:**
-- Saga orchestration pattern
-- Compensation (undo) support
-- Saga viewer in DevTools
-- 3+ example saga workflows
----
-## Release Schedule
-| Version   | Phase | Features                       | ETA     |
-| --------- | ----- | ------------------------------ | ------- |
-| **0.1.0** | 1     | swarm-mail package extraction | Week 2  |
-| **0.2.0** | 2     | Live queries, batch operations | Week 4  |
-| **0.3.0** | 3     | Priority, DLQ, TTL, pub/sub    | Week 7  |
-| **0.4.0** | 4     | DevTools UI + CLI              | Week 10 |
-| **0.5.0** | 5     | Metrics + tracing              | Week 12 |
-| **1.0.0** | All   | Stable release                 | Week 13 |
-| **2.0.0** | 6     | Saga pattern (future)          | TBD     |
----
-## Dependencies Between Phases
-```
-Phase 0 (Monorepo)
-  └──> Phase 1 (Package Extraction)
-         └──> Phase 2 (Performance)
-                ├──> Phase 3 (Queue Features)
-                └──> Phase 4 (DevTools)
-                       └──> Phase 5 (Metrics/Tracing)
-                              └──> Phase 6 (Sagas, future)
-```
-**Critical Path:** Phases 0→1→2 are sequential. Phases 3-5 can partially overlap after Phase 2.
----
-## Risk Mitigation
-| Risk                                     | Phase | Mitigation                                                |
-| ---------------------------------------- | ----- | --------------------------------------------------------- |
-| Breaking changes during extraction       | 1     | Feature branch, comprehensive tests, migration guide      |
-| Performance regression with live queries | 2     | Feature flag, benchmark before/after, fallback to polling |
-| Complexity of saga pattern               | 6     | Defer to v2.0, gather user feedback first                 |
-| DevTools UI maintenance burden           | 4     | Keep UI minimal, focus on CLI for power users             |
-| Metrics overhead                         | 5     | Make metrics opt-in, minimal instrumentation              |
----
-## Success Metrics
-**Phase 1 (Package Extraction):**
-- swarm-mail used in 3+ external projects within 3 months
-- Zero breaking changes reported by users
-**Phase 2 (Performance):**
-- 25x faster notification latency (500ms → <10ms)
-- 5x lower CPU usage (10% → <2%)
-**Phase 3 (Queue Features):**
-- 95% of messages processed within priority SLA
-- <1% messages fail to DLQ
-**Phase 4 (DevTools):**
-- 80% of developers use DevTools UI for debugging
-- CLI used in 50%+ of support cases
-**Phase 5 (Metrics/Tracing):**
-- Metrics dashboard used in production monitoring
-- Distributed traces reduce debugging time by 50%
----
-## Post-1.0 Backlog (v2.0+)
-**Saga Pattern** (ADR-004 Phase 5)
-- Long-running multi-agent workflows
-- Compensation (undo) support
-- Saga state visualization
-**Advanced Pub/Sub**
-- Message routing rules
-- Filter expressions (SQL-like WHERE clauses)
-- At-least-once vs exactly-once delivery guarantees
-**Multi-Project Support**
-- Cross-project message routing
-- Project-level isolation
-- Shared infra for mono repos
-**Performance Tier 2**
-- Connection pooling for multi-DB scenarios
-- Message batching optimizations
-- SKIP LOCKED for exactly-once semantics
-**Security**
-- Message encryption at rest
-- Agent authentication/authorization
-- Audit logging