speexor 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/API-REFERENCE.md +96 -1
  2. package/ARCHITECTURE.md +83 -32
  3. package/BENCHMARKS.md +73 -0
  4. package/CHANGELOG.md +59 -4
  5. package/CODE-OF-CONDUCT.md +83 -83
  6. package/CONTRIBUTING.md +92 -97
  7. package/FAQ.md +132 -105
  8. package/GLOSSARY.md +34 -0
  9. package/LICENSE.md +21 -21
  10. package/PUBLISH.md +82 -77
  11. package/README.md +220 -6
  12. package/REFACTOR-LOG.md +40 -40
  13. package/ROADMAP.md +31 -42
  14. package/SECURITY-DEFAULTS.md +118 -0
  15. package/SECURITY.md +80 -79
  16. package/SUMMARY.md +31 -8
  17. package/TESTING.md +140 -140
  18. package/dist/{agent-5D3BVWNK.js → agent-C64T66XT.js} +4 -4
  19. package/dist/agent-C64T66XT.js.map +1 -0
  20. package/dist/{chunk-B7WLHC4W.js → chunk-5OD5UWB5.js} +322 -121
  21. package/dist/chunk-5OD5UWB5.js.map +1 -0
  22. package/dist/chunk-GOGI3JQD.js +1637 -0
  23. package/dist/chunk-GOGI3JQD.js.map +1 -0
  24. package/dist/{chunk-2F66BZYJ.js → chunk-VEZQT5SX.js} +80 -8
  25. package/dist/chunk-VEZQT5SX.js.map +1 -0
  26. package/dist/cli/index.js +2058 -18
  27. package/dist/cli/index.js.map +1 -1
  28. package/dist/core/index.d.ts +682 -3
  29. package/dist/core/index.js +1 -1
  30. package/dist/index.d.ts +102 -14
  31. package/dist/index.js +55 -29
  32. package/dist/index.js.map +1 -1
  33. package/dist/plugins/index.d.ts +1 -1
  34. package/dist/plugins/index.js +1 -1
  35. package/dist/types-BOMap-tI.d.ts +389 -0
  36. package/docs/PRD03.md +119 -0
  37. package/docs/PRD06.md +125 -0
  38. package/docs/SETUP.md +94 -94
  39. package/docs/TROUBLESHOOTING.md +113 -113
  40. package/docs/adr/0001-record-architecture-decisions.md +44 -0
  41. package/docs/adr/0002-plugin-architecture.md +53 -0
  42. package/docs/adr/0003-recursive-task-decomposition.md +57 -0
  43. package/docs/adr/0004-local-first-security.md +58 -0
  44. package/docs/adr/0005-data-directory-layout.md +69 -0
  45. package/examples/basic.yaml +61 -61
  46. package/package.json +103 -102
  47. package/schema/config.schema.json +119 -119
  48. package/speexor.config.yaml.example +30 -30
  49. package/dist/agent-5D3BVWNK.js.map +0 -1
  50. package/dist/chunk-2F66BZYJ.js.map +0 -1
  51. package/dist/chunk-B7WLHC4W.js.map +0 -1
  52. package/dist/chunk-SXALZEOJ.js +0 -345
  53. package/dist/chunk-SXALZEOJ.js.map +0 -1
  54. package/dist/types-0q_okI2g.d.ts +0 -205
@@ -0,0 +1,118 @@
1
+ # Security & Safety Defaults
2
+
3
+ > Reference document for Speexor's two-layer security and safety model.
4
+ > Created per FR-94 (PRD05 §4.5) to clarify the distinction between
5
+ > Extension Permissions and Action Risk Tiers.
6
+
7
+ ## Overview
8
+
9
+ Speexor has **two independent safety layers** that operate at different levels:
10
+
11
+ 1. **Extension Permissions** — gates what an extension *can ever do* (set once at install time)
12
+ 2. **Action Risk Tiers** — gates what *any* action (from any already-permitted extension or core agent) *does right now* (evaluated every time)
13
+
14
+ These are intentionally separate systems. An extension that passes its permission check still has every runtime action evaluated against the Action Risk Tier policy.
15
+
16
+ ---
17
+
18
+ ## Layer 1: Extension Permissions
19
+
20
+ ### What it controls
21
+ Capabilities granted to third-party extensions at install time.
22
+
23
+ ### Configuration
24
+ `spexor.config.yaml` → `extensions.permissionsMode`
25
+
26
+ | Mode | Behavior |
27
+ |------|----------|
28
+ | `strict` (default) | Extensions run in sandboxed process with enforced permission boundaries |
29
+ | `relaxed` | Permissions still require user confirmation, but sandboxing is less restrictive |
30
+
31
+ ### Permission Axes
32
+
33
+ | Axis | Levels | Default | Description |
34
+ |------|--------|---------|-------------|
35
+ | `fileSystem` | `none`, `read-only`, `read-write`, `scoped`, `full` | `none` | Access to the local file system |
36
+ | `network` | `none`, `read-only`, `scoped`, `full` | `none` | Outbound network access |
37
+ | `shell` | `none`, `read-only`, `scoped`, `full` | `none` | Shell command execution |
38
+ | `secrets` | string[] of named scopes | `[]` | Access to named secrets in the Vault |
39
+
40
+ ### Permission Lifecycle
41
+ 1. **Install**: Extension manifest declares permissions → displayed to user in plain English
42
+ 2. **Confirm**: User explicitly accepts or rejects the permission set
43
+ 3. **Upgrade**: Any permission upgrade on update requires re-confirmation
44
+ 4. **Runtime**: PermissionEnforcer checks every operation against declared permissions
45
+
46
+ ### Enforcement
47
+ - Extensions with `shell: none` + `network: none` → `IsolatedSandbox` (node:vm, no Node built-ins)
48
+ - Extensions requiring broader access → `ProcessSandbox` (child_process with proxy)
49
+ - `worker_threads` is NEVER used as a security boundary (it shares process memory)
50
+
51
+ ---
52
+
53
+ ## Layer 2: Action Risk Tiers
54
+
55
+ ### What it controls
56
+ Whether a specific runtime action (from any source) requires user approval before execution.
57
+
58
+ ### Configuration
59
+ `spexor.config.yaml` → `riskPolicy`
60
+
61
+ | Setting | Default | Description |
62
+ |---------|---------|-------------|
63
+ | `autoApprove` | `[]` | Action categories that auto-execute without approval |
64
+ | `requireApproval` | `["irreversible-high-stakes"]` | Action categories that always require approval |
65
+ | `approvalTimeout` | `4h` | How long an approval request waits before default action |
66
+ | `approvalDefaultAction` | `skip` | What happens when approval times out |
67
+ | `defaultRiskTierForUnknownActions` | `medium` | Default tier when action risk can't be classified |
68
+
69
+ ### Risk Tiers
70
+
71
+ | Tier | Auto-Approve? | Examples |
72
+ |------|---------------|----------|
73
+ | `reversible-low` | Yes (default) | Running tests, linting, reading files |
74
+ | `reversible-medium` | Yes (default) | Creating branches, committing code |
75
+ | `irreversible-high-stakes` | No (requires approval) | Merging PRs, deleting branches, publishing packages, modifying CI config |
76
+ | `unknown` | Depends on `defaultRiskTierForUnknownActions` | Actions not classified by the risk classifier |
77
+
78
+ ### Approval Workflow
79
+ 1. Action is evaluated by GovernanceEngine.evaluateAction()
80
+ 2. If requires approval → ApprovalItem created with expiry + default action
81
+ 3. Appears in dashboard "Approvals" panel (tagged as Axis 2)
82
+ 4. User approves/rejects or timeout triggers default action
83
+
84
+ ---
85
+
86
+ ## Comparison: When Each Layer Applies
87
+
88
+ | Scenario | Extension Permissions | Action Risk Tier |
89
+ |----------|----------------------|------------------|
90
+ | Installing a code-review skill | ✅ Checked (install time) | ❌ Not applicable |
91
+ | Skill reads a file in workspace | ✅ Checked (fileSystem permission) | ❌ Not applicable (reversible-low) |
92
+ | Skill runs `git push` | ✅ Checked (shell permission) | ✅ Checked (irreversible-high-stakes) |
93
+ | Skill makes HTTP call | ✅ Checked (network permission) | ❌ Not applicable (reversible-medium) |
94
+ | Core agent creates a PR | ❌ Not applicable (core, not extension) | ✅ Checked (irreversible-high-stakes) |
95
+ | Core agent proposes a new task | ❌ Not applicable | ✅ Checked (task-origin-gate, Axis 1) |
96
+
97
+ ---
98
+
99
+ ## Safety Defaults Summary
100
+
101
+ | Default | Value | Rationale |
102
+ |---------|-------|-----------|
103
+ | Extension permissions default | All `none` | Least privilege by default |
104
+ | Permissions mode | `strict` | Sandbox everything third-party |
105
+ | Unknown risk tier | `medium` | Conservative when can't classify |
106
+ | Approval timeout default action | `skip` | Fail safe, don't auto-execute |
107
+ | Budget limit | Not set (opt-in) | User must explicitly set cost controls |
108
+ | Auto-merge PRs | `false` | Human judgment required for merges |
109
+
110
+ ---
111
+
112
+ ## Related Documentation
113
+
114
+ - [SECURITY.md](SECURITY.md) — Vulnerability reporting, incident response
115
+ - [ARCHITECTURE.md](ARCHITECTURE.md) — System architecture and plugin model
116
+ - [CONTRIBUTING.md](CONTRIBUTING.md) — Extension development guidelines
117
+ - `src/sandbox/` — Sandbox implementation
118
+ - `src/governance/` — Governance engine implementation
package/SECURITY.md CHANGED
@@ -1,79 +1,80 @@
1
- # Security Policy
2
-
3
- ## Supported Versions
4
-
5
- | Version | Supported |
6
- |---------|-----------|
7
- | 0.1.x | ✅ Active development |
8
-
9
- ## Reporting a Vulnerability
10
-
11
- Speexor handles API tokens (GitHub tokens, AI provider keys) and executes AI-generated code. Security is a top priority.
12
-
13
- **To report a vulnerability:**
14
- 1. **Do NOT** open a public GitHub issue
15
- 2. Email: opensource@superdevids.com (or use GitHub's private vulnerability reporting)
16
- 3. Include a detailed description, steps to reproduce, and potential impact
17
-
18
- You can expect:
19
- - **Acknowledgment** within 48 hours
20
- - **Initial assessment** within 5 business days
21
- - **Fix timeline** depending on severity
22
-
23
- ## Security Practices
24
-
25
- ### Credential Management
26
- - API tokens are stored in environment variables or the system keychain
27
- - Secrets are never logged in plaintext
28
- - GitHub tokens are reused from `gh` CLI when possible
29
- - `.speexor/` directory is git-ignored (add to `.gitignore`)
30
-
31
- ### Code Execution
32
- - AI agents generate and execute code in isolated git worktrees
33
- - Each agent runs in its own process/tmux session
34
- - Process runtime has resource limits (SIGTERM → SIGKILL after 5s)
35
- - Worktrees are cleaned up on session stop or package destroy
36
-
37
- ### Network Security
38
- - Dashboard server listens on localhost by default
39
- - CORS is enabled (configurable in production)
40
- - No telemetry or tracking
41
- - No cloud dependency — fully local-first
42
-
43
- ### Data Protection
44
- - Session state stored in `.speexor/state.json` (local only)
45
- - Logs stored in `.speexor/logs/` (local only)
46
- - No data sent to external servers except configured AI providers and GitHub API
47
- - Users control which API endpoints are called
48
-
49
- ### Supply Chain Security
50
- - All dependencies are pinned with exact versions
51
- - pnpm lockfile for deterministic installs
52
- - Regular dependency audits via `pnpm audit`
53
- - Minimal runtime dependencies (12 packages)
54
-
55
- ## Security Checklist for Deployments
56
-
57
- - [ ] AI provider API keys stored in environment variables (not config files)
58
- - [ ] GitHub tokens use minimal scopes (repo, issues, pull requests)
59
- - [ ] Dashboard port not exposed to public internet
60
- - [ ] `.speexor/` directory added to `.gitignore`
61
- - [ ] Regular dependency updates (`pnpm audit`)
62
- - [ ] Agent output reviewed before auto-merge (auto-merge disabled by default)
63
-
64
- ## Third-Party Security
65
-
66
- Speexor relies on:
67
- - **GitHub CLI (`gh`)** — user's existing authentication
68
- - **AI agent CLIs** — OpenCode, Claude Code, Aider, Codex — each with their own security models
69
- - **tmux** (Unix) / **Process** (Windows)for runtime isolation
70
-
71
- Review the security documentation for each tool you use.
72
-
73
- ## Incident Response
74
-
75
- 1. **Detection** — Monitoring for unusual agent behavior or unauthorized access
76
- 2. **Containment** — `speexor stop <session>` to halt affected agents
77
- 3. **Analysis** — Review logs in `.speexor/logs/` and session state
78
- 4. **Recovery** — Rotate affected credentials, clean up worktrees
79
- 5. **Postmortem** — Document findings, update security practices
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ |---------|-----------|
7
+ | 0.1.x | ✅ Active development |
8
+
9
+ ## Reporting a Vulnerability
10
+
11
+ Speexor handles API tokens (GitHub tokens, AI provider keys) and executes AI-generated code. Security is a top priority.
12
+
13
+ **To report a vulnerability:**
14
+ 1. **Do NOT** open a public GitHub issue
15
+ 2. Email: opensource@superdevids.com (or use GitHub's private vulnerability reporting)
16
+ 3. Include a detailed description, steps to reproduce, and potential impact
17
+
18
+ You can expect:
19
+ - **Acknowledgment** within 48 hours
20
+ - **Initial assessment** within 5 business days
21
+ - **Fix timeline** depending on severity
22
+
23
+ ## Security Practices
24
+
25
+ ### Credential Management
26
+ - API tokens are stored in environment variables or the system keychain
27
+ - Secrets are never logged in plaintext
28
+ - GitHub tokens are reused from `gh` CLI when possible
29
+ - `.speexor/` directory is git-ignored (add to `.gitignore`)
30
+
31
+ ### Code Execution
32
+ - AI agents generate and execute code in isolated git worktrees
33
+ - Each agent runs in its own process/tmux session
34
+ - Process runtime has resource limits (SIGTERM → SIGKILL after 5s)
35
+ - Worktrees are cleaned up on session stop or package destroy
36
+
37
+ ### Network Security
38
+ - Dashboard server listens on localhost by default
39
+ - CORS is enabled (configurable in production)
40
+ - No telemetry or tracking
41
+ - **No mandatory cloud dependency**core orchestration is fully local-first
42
+ - **Known network exception:** The Extension Marketplace (`speexor ext search`, `speexor ext install`) makes outbound HTTPS calls to a public registry index (`marketplaceIndex` URL) to discover and download community extensions. This is optional: the marketplace can be fully disabled for air-gapped use by setting `extensions.marketplaceIndex: null` in config. User-configured LLM provider API calls and SCM (GitHub) API calls are the only other network dependencies — these are explicit user choices, not mandatory telemetry.
43
+
44
+ ### Data Protection
45
+ - Session state stored in `.speexor/state.json` (local only)
46
+ - Logs stored in `.speexor/logs/` (local only)
47
+ - No data sent to external servers except configured AI providers and GitHub API
48
+ - Users control which API endpoints are called
49
+
50
+ ### Supply Chain Security
51
+ - All dependencies are pinned with exact versions
52
+ - pnpm lockfile for deterministic installs
53
+ - Regular dependency audits via `pnpm audit`
54
+ - Minimal runtime dependencies (12 packages)
55
+
56
+ ## Security Checklist for Deployments
57
+
58
+ - [ ] AI provider API keys stored in environment variables (not config files)
59
+ - [ ] GitHub tokens use minimal scopes (repo, issues, pull requests)
60
+ - [ ] Dashboard port not exposed to public internet
61
+ - [ ] `.speexor/` directory added to `.gitignore`
62
+ - [ ] Regular dependency updates (`pnpm audit`)
63
+ - [ ] Agent output reviewed before auto-merge (auto-merge disabled by default)
64
+
65
+ ## Third-Party Security
66
+
67
+ Speexor relies on:
68
+ - **GitHub CLI (`gh`)** — user's existing authentication
69
+ - **AI agent CLIs** OpenCode, Claude Code, Aider, Codex each with their own security models
70
+ - **tmux** (Unix) / **Process** (Windows) — for runtime isolation
71
+
72
+ Review the security documentation for each tool you use.
73
+
74
+ ## Incident Response
75
+
76
+ 1. **Detection** — Monitoring for unusual agent behavior or unauthorized access
77
+ 2. **Containment** — `speexor stop <session>` to halt affected agents
78
+ 3. **Analysis** — Review logs in `.speexor/logs/` and session state
79
+ 4. **Recovery** — Rotate affected credentials, clean up worktrees
80
+ 5. **Postmortem** — Document findings, update security practices
package/SUMMARY.md CHANGED
@@ -3,19 +3,20 @@
3
3
  > Agent Orchestrator for multi-AI coding agent orchestration across repositories.
4
4
 
5
5
  ## Project Status
6
- - **Version:** 0.1.0 (pre-release)
6
+ - **Version:** 0.2.0 (pre-release)
7
7
  - **License:** MIT
8
8
  - **Language:** TypeScript (ESM, ES2022)
9
9
  - **Node.js:** >= 18.0.0
10
+ - **Test files:** 19 files, ~320 tests
10
11
 
11
12
  ## What It Does
12
13
  Speexor is an orchestration layer that spawns and manages multiple AI coding agents in parallel across one or more git repositories. Each agent runs in an isolated git worktree with its own runtime session, handles one task autonomously, and can automatically respond to CI failures and PR review comments.
13
14
 
14
15
  ## Scope
15
- - **Source files:** 31 TypeScript files (~2,430 lines)
16
- - **Plugins:** 10 implementations across 6 plugin slots
17
- - **CLI commands:** 6
18
- - **AI adapters:** OpenCode, Claude Code, Aider, Codex
16
+ - **Source files:** 60+ TypeScript files (~5,200+ lines)
17
+ - **Plugins:** 13+ implementations across 7 plugin slots
18
+ - **CLI commands:** 14 (6 original + 8 new)
19
+ - **AI adapters:** OpenCode, Claude Code, Aider, Codex (all with exponential retry)
19
20
  - **Runtime backends:** tmux (Unix), Process (Windows)
20
21
 
21
22
  ## Architecture Highlights
@@ -23,6 +24,13 @@ Speexor is an orchestration layer that spawns and manages multiple AI coding age
23
24
  - Agent-agnostic: no vendor lock to a single AI provider
24
25
  - Git-provider agnostic: GitHub as first implementation, interface open for GitLab/Gitea
25
26
  - Dashboard built-in: no external dependencies for monitoring
27
+ - Recursive Task Decomposition (DAG-based planner)
28
+ - Governance Engine (two-axis approval model)
29
+ - Cost Tracking (per-provider/project/node with budget guard)
30
+ - Extension Manager + Plugin SDK
31
+ - Secrets Vault (OS-keychain backend)
32
+ - Decision Quality Evaluation & Calibration
33
+ - Interactive Dashboard v2 (Task Tree, Fleet, Approvals, Cost)
26
34
 
27
35
  ## Milestones
28
36
 
@@ -31,16 +39,31 @@ Speexor is an orchestration layer that spawns and manages multiple AI coding age
31
39
  | M0 — Foundation | Monorepo, core types, CLI skeleton, plugin contracts | ✅ Done |
32
40
  | M1 — Single Agent E2E | OpenCode adapter + tmux + worktree, E2E flow | ✅ Done |
33
41
  | M2 — GitHub Integration | Tracker + SCM + reaction engine | ✅ Done |
34
- | M3 — Multi-Agent | Claude Code, Aider, Codex adapters | ✅ Done |
42
+ | M3 — Multi-Agent Adapters | Claude Code, Aider, Codex adapters | ✅ Done |
35
43
  | M4 — Dashboard MVP | REST API + HTML dashboard | ✅ Done |
36
- | M5 — Cost/Provider | Multi-provider routing config | 🔄 In Progress |
37
- | M6 — Polish & Docs | Documentation, examples, open-source readiness | 🔄 In Progress |
44
+ | M5 — Cost Tracking & Budget Guard | Multi-provider routing config | Done |
45
+ | M6 — Polish & Testing | Documentation, examples, open-source readiness | Done |
46
+ | M7 — Task Graph Core & Decomposition | DAG planner, recursive decomposition | ✅ Done |
47
+ | M8 — Parallel Scheduler | Resource-aware parallel execution | ✅ Done |
48
+ | M9 — Governance & Approval | Two-axis approval model, risk policy | ✅ Done |
49
+ | M10 — Extension Manager & SDK | Marketplace, manifest, permissions | ✅ Done |
50
+ | M11 — Security & Secrets Vault | OS-keychain credential storage | ✅ Done |
51
+ | M12 — Interactive Dashboard v2 | Task Tree, Fleet, Approvals, Cost panels | ✅ Done |
52
+ | M22a — Critical Fixes (v5) | Edge case hardening, crash fixes | ✅ Done |
53
+ | M22b — Major Fixes (v5) | Retry logic, race condition fixes | ✅ Done |
54
+ | M22c — Documentation Consolidation | Cross-reference, glossary, API ref | ✅ Done |
55
+ | M13 — Hardening & Cost Guard | Budget enforcement, guardrails, sandbox security, async store, live streaming | ✅ Done |
38
56
 
39
57
  ## Quick Links
40
58
  - [README](./README.md)
41
59
  - [Architecture](./ARCHITECTURE.md)
42
60
  - [Changelog](./CHANGELOG.md)
43
61
  - [Roadmap](./ROADMAP.md)
62
+ - [Glossary](./GLOSSARY.md)
63
+ - [API Reference](./API-REFERENCE.md)
64
+ - [Benchmarks](./BENCHMARKS.md)
65
+ - [Architecture Decisions](./docs/adr/)
66
+ - [Security Defaults](./SECURITY-DEFAULTS.md)
44
67
  - [Contributing](./CONTRIBUTING.md)
45
68
  - [Security](./SECURITY.md)
46
69
  - [Testing](./TESTING.md)
package/TESTING.md CHANGED
@@ -1,140 +1,140 @@
1
- # Testing Guide
2
-
3
- > Testing strategy and guidelines for Speexor — Agent Orchestrator.
4
-
5
- ## Test Framework
6
-
7
- Speexor uses [Vitest](https://vitest.dev/) as the test framework with `@vitest/coverage-v8` for coverage reporting.
8
-
9
- ```bash
10
- # Run all tests
11
- pnpm test
12
-
13
- # Watch mode
14
- pnpm test:watch
15
-
16
- # With coverage
17
- pnpm test:coverage
18
- ```
19
-
20
- ## Test Categories
21
-
22
- ### Unit Tests
23
- Test individual modules in isolation:
24
- - **Core types** — Verify interface contracts and type guards
25
- - **Config validation** — Test Zod schemas with valid/invalid YAML
26
- - **Event bus** — Test emit/on/off/once behavior
27
- - **Plugin contracts** — Verify plugins implement required interfaces
28
-
29
- ### Integration Tests
30
- Test module interactions:
31
- - **Lifecycle + plugins** — Verify plugin registration, agent spawn flow
32
- - **Config + lifecycle** — Verify config loading → lifecycle initialization
33
- - **Dashboard state** — Verify state mutations reflect correctly
34
- - **Session store** — Verify persistence round-trip
35
-
36
- ### Plugin Tests
37
- Each plugin should be tested against its interface contract:
38
- - **Agent plugins** — Test spawn/sendInput/getStatus/kill lifecycle
39
- - **Runtime plugins** — Test createSession/destroySession flow
40
- - **Workspace plugin** — Test worktree lifecycle with mock git repo
41
- - **Tracker/SCM** — Test API calls (mock gh CLI)
42
- - **Notifier** — Test notification dispatch
43
-
44
- ### E2E Tests (Future)
45
- Full end-to-end tests with real agent CLIs:
46
- - Requires installed agent CLI (opencode, claude-code, etc.)
47
- - Requires GitHub CLI authentication
48
- - Creates real worktrees and branches (clean up after)
49
-
50
- ## Coverage Requirements
51
-
52
- | Category | Target |
53
- |----------|--------|
54
- | Core types & config | ≥90% |
55
- | CLI commands | ≥80% |
56
- | Plugin loader | ≥90% |
57
- | Agent adapters | ≥75% |
58
- | Runtime adapters | ≥80% |
59
- | Dashboard | ≥70% |
60
- | Reaction engine | ≥85% |
61
- | Session store | ≥90% |
62
-
63
- ## Test Structure
64
-
65
- ```
66
- tests/
67
- ├── unit/
68
- │ ├── core/
69
- │ │ ├── config.test.ts
70
- │ │ ├── event-bus.test.ts
71
- │ │ └── lifecycle.test.ts
72
- │ ├── plugins/
73
- │ │ ├── opencode.test.ts
74
- │ │ ├── tmux.test.ts
75
- │ │ └── git-worktree.test.ts
76
- │ └── cli/
77
- │ └── commands.test.ts
78
- ├── integration/
79
- │ ├── lifecycle-plugins.test.ts
80
- │ └── config-lifecycle.test.ts
81
- └── fixtures/
82
- ├── valid-config.yaml
83
- ├── invalid-config.yaml
84
- └── mock-gh-responses/
85
- ```
86
-
87
- ## Writing Tests
88
-
89
- ### Vitest Setup
90
- ```typescript
91
- import { describe, it, expect, vi, beforeEach } from 'vitest'
92
- ```
93
-
94
- ### Example: Testing Config Validation
95
- ```typescript
96
- import { validateConfig } from '../src/core/config.js'
97
-
98
- describe('Config Validation', () => {
99
- it('accepts valid minimal config', () => {
100
- const config = {
101
- version: '1',
102
- projects: [
103
- {
104
- name: 'test',
105
- repository: 'https://github.com/user/repo',
106
- provider: { primary: 'opencode' },
107
- },
108
- ],
109
- }
110
- expect(() => validateConfig(config)).not.toThrow()
111
- })
112
-
113
- it('rejects config without version', () => {
114
- expect(() => validateConfig({ projects: [] })).toThrow()
115
- })
116
- })
117
- ```
118
-
119
- ### Example: Testing Plugin Contract
120
- ```typescript
121
- import { OpenCodeAgent } from '../src/plugins/agent/opencode.js'
122
- import { createEventBus } from '../src/core/event-bus.js'
123
-
124
- describe('OpenCodeAgent', () => {
125
- it('implements AgentPlugin interface', () => {
126
- const agent = new OpenCodeAgent()
127
- expect(agent.name).toBe('opencode-agent')
128
- expect(agent.type).toBe('agent')
129
- expect(agent.spawn).toBeInstanceOf(Function)
130
- expect(agent.kill).toBeInstanceOf(Function)
131
- })
132
- })
133
- ```
134
-
135
- ## Mocking Guidelines
136
-
137
- - **gh CLI** — Use `vi.mock('node:child_process')` to mock execSync
138
- - **tmux** — Mock process execution, don't require tmux installation
139
- - **File system** — Use `vi.mock('node:fs')` for config/store tests
140
- - **Event bus** — Use real EventBus (not mocked) for integration tests
1
+ # Testing Guide
2
+
3
+ > Testing strategy and guidelines for Speexor — Agent Orchestrator.
4
+
5
+ ## Test Framework
6
+
7
+ Speexor uses [Vitest](https://vitest.dev/) as the test framework with `@vitest/coverage-v8` for coverage reporting.
8
+
9
+ ```bash
10
+ # Run all tests
11
+ pnpm test
12
+
13
+ # Watch mode
14
+ pnpm test:watch
15
+
16
+ # With coverage
17
+ pnpm test:coverage
18
+ ```
19
+
20
+ ## Test Categories
21
+
22
+ ### Unit Tests
23
+ Test individual modules in isolation:
24
+ - **Core types** — Verify interface contracts and type guards
25
+ - **Config validation** — Test Zod schemas with valid/invalid YAML
26
+ - **Event bus** — Test emit/on/off/once behavior
27
+ - **Plugin contracts** — Verify plugins implement required interfaces
28
+
29
+ ### Integration Tests
30
+ Test module interactions:
31
+ - **Lifecycle + plugins** — Verify plugin registration, agent spawn flow
32
+ - **Config + lifecycle** — Verify config loading → lifecycle initialization
33
+ - **Dashboard state** — Verify state mutations reflect correctly
34
+ - **Session store** — Verify persistence round-trip
35
+
36
+ ### Plugin Tests
37
+ Each plugin should be tested against its interface contract:
38
+ - **Agent plugins** — Test spawn/sendInput/getStatus/kill lifecycle
39
+ - **Runtime plugins** — Test createSession/destroySession flow
40
+ - **Workspace plugin** — Test worktree lifecycle with mock git repo
41
+ - **Tracker/SCM** — Test API calls (mock gh CLI)
42
+ - **Notifier** — Test notification dispatch
43
+
44
+ ### E2E Tests (Future)
45
+ Full end-to-end tests with real agent CLIs:
46
+ - Requires installed agent CLI (opencode, claude-code, etc.)
47
+ - Requires GitHub CLI authentication
48
+ - Creates real worktrees and branches (clean up after)
49
+
50
+ ## Coverage Requirements
51
+
52
+ | Category | Target |
53
+ |----------|--------|
54
+ | Core types & config | ≥90% |
55
+ | CLI commands | ≥80% |
56
+ | Plugin loader | ≥90% |
57
+ | Agent adapters | ≥75% |
58
+ | Runtime adapters | ≥80% |
59
+ | Dashboard | ≥70% |
60
+ | Reaction engine | ≥85% |
61
+ | Session store | ≥90% |
62
+
63
+ ## Test Structure
64
+
65
+ ```
66
+ tests/
67
+ ├── unit/
68
+ │ ├── core/
69
+ │ │ ├── config.test.ts
70
+ │ │ ├── event-bus.test.ts
71
+ │ │ └── lifecycle.test.ts
72
+ │ ├── plugins/
73
+ │ │ ├── opencode.test.ts
74
+ │ │ ├── tmux.test.ts
75
+ │ │ └── git-worktree.test.ts
76
+ │ └── cli/
77
+ │ └── commands.test.ts
78
+ ├── integration/
79
+ │ ├── lifecycle-plugins.test.ts
80
+ │ └── config-lifecycle.test.ts
81
+ └── fixtures/
82
+ ├── valid-config.yaml
83
+ ├── invalid-config.yaml
84
+ └── mock-gh-responses/
85
+ ```
86
+
87
+ ## Writing Tests
88
+
89
+ ### Vitest Setup
90
+ ```typescript
91
+ import { describe, it, expect, vi, beforeEach } from 'vitest'
92
+ ```
93
+
94
+ ### Example: Testing Config Validation
95
+ ```typescript
96
+ import { validateConfig } from '../src/core/config.js'
97
+
98
+ describe('Config Validation', () => {
99
+ it('accepts valid minimal config', () => {
100
+ const config = {
101
+ version: '1',
102
+ projects: [
103
+ {
104
+ name: 'test',
105
+ repository: 'https://github.com/user/repo',
106
+ provider: { primary: 'opencode' },
107
+ },
108
+ ],
109
+ }
110
+ expect(() => validateConfig(config)).not.toThrow()
111
+ })
112
+
113
+ it('rejects config without version', () => {
114
+ expect(() => validateConfig({ projects: [] })).toThrow()
115
+ })
116
+ })
117
+ ```
118
+
119
+ ### Example: Testing Plugin Contract
120
+ ```typescript
121
+ import { OpenCodeAgent } from '../src/plugins/agent/opencode.js'
122
+ import { createEventBus } from '../src/core/event-bus.js'
123
+
124
+ describe('OpenCodeAgent', () => {
125
+ it('implements AgentPlugin interface', () => {
126
+ const agent = new OpenCodeAgent()
127
+ expect(agent.name).toBe('opencode-agent')
128
+ expect(agent.type).toBe('agent')
129
+ expect(agent.spawn).toBeInstanceOf(Function)
130
+ expect(agent.kill).toBeInstanceOf(Function)
131
+ })
132
+ })
133
+ ```
134
+
135
+ ## Mocking Guidelines
136
+
137
+ - **gh CLI** — Use `vi.mock('node:child_process')` to mock execSync
138
+ - **tmux** — Mock process execution, don't require tmux installation
139
+ - **File system** — Use `vi.mock('node:fs')` for config/store tests
140
+ - **Event bus** — Use real EventBus (not mocked) for integration tests
@@ -1,5 +1,5 @@
1
- import { loadConfig, SpeexorLifecycle } from './chunk-2F66BZYJ.js';
2
- import { loadAllPlugins } from './chunk-B7WLHC4W.js';
1
+ import { loadConfig, SpeexorLifecycle } from './chunk-VEZQT5SX.js';
2
+ import { loadAllPlugins } from './chunk-5OD5UWB5.js';
3
3
  import Debug from 'debug';
4
4
 
5
5
  var debug = Debug("speexor:agent");
@@ -33,5 +33,5 @@ async function agentSpawnCommand(options) {
33
33
  }
34
34
 
35
35
  export { agentSpawnCommand };
36
- //# sourceMappingURL=agent-5D3BVWNK.js.map
37
- //# sourceMappingURL=agent-5D3BVWNK.js.map
36
+ //# sourceMappingURL=agent-C64T66XT.js.map
37
+ //# sourceMappingURL=agent-C64T66XT.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/cli/agent.ts"],"names":[],"mappings":";;;;AAMA,IAAM,KAAA,GAAQ,MAAM,eAAe,CAAA;AAOnC,eAAsB,kBAAkB,OAAA,EAAuB;AAC7D,EAAA,MAAM,SAAS,UAAA,EAAW;AAC1B,EAAA,MAAM,SAAA,GAAY,IAAI,gBAAA,CAAiB,MAAM,CAAA;AAC7C,EAAA,MAAM,UAAU,UAAA,EAAW;AAE3B,EAAA,MAAM,UAAU,cAAA,EAAe;AAC/B,EAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,IAAA,SAAA,CAAU,eAAe,MAAM,CAAA;AAAA,EACjC;AAEA,EAAA,MAAM,OAAA,GAAU,MAAA,CAAO,QAAA,CAAS,CAAC,CAAA;AACjC,EAAA,IAAI,CAAC,OAAA,EAAS;AACZ,IAAA,MAAM,IAAI,MAAM,uBAAuB,CAAA;AAAA,EACzC;AAEA,EAAA,MAAM,IAAA,GAAkB;AAAA,IACtB,IAAI,OAAA,CAAQ,IAAA;AAAA,IACZ,OAAO,OAAA,CAAQ,IAAA;AAAA,IACf,WAAA,EAAa,CAAA,KAAA,EAAQ,OAAA,CAAQ,IAAI,CAAA,CAAA;AAAA,IACjC,YAAY,OAAA,CAAQ,UAAA;AAAA,IACpB,MAAA,EAAQ,CAAA,QAAA,EAAW,OAAA,CAAQ,IAAI,CAAA,CAAA;AAAA,IAC/B,QAAA,EAAW,OAAA,CAAQ,KAAA,IAA2B,OAAA,CAAQ,QAAA,CAAS;AAAA,GACjE;AAEA,EAAA,KAAA,CAAM,2BAA2B,IAAA,CAAK,EAAE,CAAA,OAAA,EAAU,IAAA,CAAK,QAAQ,CAAA,CAAE,CAAA;AAEjE,EAAA,MAAM,OAAA,GAAU,MAAM,SAAA,CAAU,UAAA,CAAW,IAAI,CAAA;AAC/C,EAAA,OAAA,CAAQ,GAAA,CAAI;AAAA,wBAAA,EAAwB,OAAA,CAAQ,EAAE,CAAA,CAAE,CAAA;AAChD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,sBAAA,EAAkB,OAAA,CAAQ,QAAQ,CAAA,CAAE,CAAA;AAChD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,kBAAA,EAAc,IAAA,CAAK,EAAE;AAAA,CAAI,CAAA;AACvC","file":"agent-C64T66XT.js","sourcesContent":["import { loadConfig } from '../core/config.js'\r\nimport { SpeexorLifecycle } from '../core/lifecycle.js'\r\nimport { loadAllPlugins } from '../plugins/index.js'\r\nimport type { AgentTask, AgentProvider } from '../core/types.js'\r\nimport Debug from 'debug'\r\n\r\nconst debug = Debug('speexor:agent')\r\n\r\ninterface SpawnOptions {\r\n task: string\r\n agent: string\r\n}\r\n\r\nexport async function agentSpawnCommand(options: SpawnOptions) {\r\n const config = loadConfig()\r\n const lifecycle = new SpeexorLifecycle(config)\r\n await lifecycle.initialize()\r\n\r\n const plugins = loadAllPlugins()\r\n for (const plugin of plugins) {\r\n lifecycle.registerPlugin(plugin)\r\n }\r\n\r\n const project = config.projects[0]\r\n if (!project) {\r\n throw new Error('No project configured')\r\n }\r\n\r\n const task: AgentTask = {\r\n id: options.task,\r\n title: options.task,\r\n description: `Task ${options.task}`,\r\n repository: project.repository,\r\n branch: `speexor/${options.task}`,\r\n provider: (options.agent as AgentProvider) || project.provider.primary,\r\n }\r\n\r\n debug(`Spawning agent for task ${task.id} using ${task.provider}`)\r\n\r\n const session = await lifecycle.spawnAgent(task)\r\n console.log(`\\n ✅ Agent spawned: ${session.id}`)\r\n console.log(` 🤖 Provider: ${session.provider}`)\r\n console.log(` 📋 Task: ${task.id}\\n`)\r\n}\r\n"]}