agentic-orchestrator 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/CLAUDE.md +2 -2
- package/README.md +47 -14
- package/agentic/orchestrator/agents.yaml +13 -0
- package/agentic/orchestrator/policy.yaml +3 -0
- package/agentic/orchestrator/schemas/agents.schema.json +76 -0
- package/agentic/orchestrator/schemas/policy.schema.json +16 -0
- package/agentic/orchestrator/schemas/policy.user.schema.json +16 -0
- package/agentic/orchestrator/schemas/state.schema.json +53 -0
- package/apps/control-plane/src/application/configuration-service.ts +181 -0
- package/apps/control-plane/src/application/kernel-tool-wiring.ts +292 -0
- package/apps/control-plane/src/application/services/checkpoint-service.ts +523 -0
- package/apps/control-plane/src/application/services/feature-send-message-service.ts +132 -0
- package/apps/control-plane/src/application/services/patch-service.ts +29 -5
- package/apps/control-plane/src/application/services/repo-operations-service.ts +276 -0
- package/apps/control-plane/src/application/services/worktree-watchdog-service.ts +156 -0
- package/apps/control-plane/src/cli/cli-argument-parser.ts +12 -0
- package/apps/control-plane/src/cli/help-command-handler.ts +17 -0
- package/apps/control-plane/src/cli/init-command-handler.ts +31 -0
- package/apps/control-plane/src/cli/resume-command-handler.ts +31 -4
- package/apps/control-plane/src/cli/rollback-command-handler.ts +217 -0
- package/apps/control-plane/src/cli/run-command-handler.ts +8 -0
- package/apps/control-plane/src/cli/types.ts +3 -0
- package/apps/control-plane/src/core/kernel-types.ts +55 -0
- package/apps/control-plane/src/core/kernel.ts +61 -878
- package/apps/control-plane/src/core/tool-caller.ts +10 -0
- package/apps/control-plane/src/core/utils/field-readers.ts +38 -0
- package/apps/control-plane/src/core/utils/index-normalizer.ts +119 -0
- package/apps/control-plane/src/core/utils/path-normalizers.ts +22 -0
- package/apps/control-plane/src/interfaces/cli/bootstrap.ts +15 -0
- package/apps/control-plane/src/providers/api-worker-provider.ts +14 -12
- package/apps/control-plane/src/providers/cli-worker-provider.ts +82 -12
- package/apps/control-plane/src/providers/providers.ts +45 -24
- package/apps/control-plane/src/providers/worker-provider-factory.ts +36 -1
- package/apps/control-plane/src/supervisor/run-coordinator.ts +91 -36
- package/apps/control-plane/src/supervisor/runtime.ts +107 -1
- package/apps/control-plane/src/supervisor/types.ts +9 -0
- package/apps/control-plane/src/supervisor/worker-decision-loop.ts +253 -14
- package/apps/control-plane/test/checkpoint-service.spec.ts +537 -0
- package/apps/control-plane/test/cli-helpers.spec.ts +28 -0
- package/apps/control-plane/test/cli.unit.spec.ts +52 -0
- package/apps/control-plane/test/configuration-service.spec.ts +466 -0
- package/apps/control-plane/test/dashboard-api.integration.spec.ts +537 -0
- package/apps/control-plane/test/dashboard-client.spec.ts +233 -0
- package/apps/control-plane/test/feature-send-message-service.spec.ts +314 -0
- package/apps/control-plane/test/init-wizard.spec.ts +35 -0
- package/apps/control-plane/test/path-normalizers.spec.ts +41 -0
- package/apps/control-plane/test/repo-operations-service.spec.ts +339 -0
- package/apps/control-plane/test/resume-command.spec.ts +33 -0
- package/apps/control-plane/test/review-workspace-logic.spec.ts +130 -0
- package/apps/control-plane/test/rollback-command.spec.ts +208 -0
- package/apps/control-plane/test/run-coordinator.spec.ts +119 -0
- package/apps/control-plane/test/worker-decision-loop.spec.ts +209 -0
- package/apps/control-plane/test/worker-provider-adapters.spec.ts +102 -0
- package/apps/control-plane/test/worker-provider-factory.spec.ts +14 -0
- package/apps/control-plane/test/worktree-watchdog-service.spec.ts +147 -0
- package/config/agentic/orchestrator/agents.yaml +13 -0
- package/dist/apps/control-plane/application/configuration-service.d.ts +19 -0
- package/dist/apps/control-plane/application/configuration-service.js +123 -0
- package/dist/apps/control-plane/application/configuration-service.js.map +1 -0
- package/dist/apps/control-plane/application/kernel-tool-wiring.d.ts +39 -0
- package/dist/apps/control-plane/application/kernel-tool-wiring.js +38 -0
- package/dist/apps/control-plane/application/kernel-tool-wiring.js.map +1 -0
- package/dist/apps/control-plane/application/services/checkpoint-service.d.ts +84 -0
- package/dist/apps/control-plane/application/services/checkpoint-service.js +367 -0
- package/dist/apps/control-plane/application/services/checkpoint-service.js.map +1 -0
- package/dist/apps/control-plane/application/services/feature-send-message-service.d.ts +25 -0
- package/dist/apps/control-plane/application/services/feature-send-message-service.js +105 -0
- package/dist/apps/control-plane/application/services/feature-send-message-service.js.map +1 -0
- package/dist/apps/control-plane/application/services/patch-service.d.ts +6 -0
- package/dist/apps/control-plane/application/services/patch-service.js +11 -2
- package/dist/apps/control-plane/application/services/patch-service.js.map +1 -1
- package/dist/apps/control-plane/application/services/repo-operations-service.d.ts +70 -0
- package/dist/apps/control-plane/application/services/repo-operations-service.js +213 -0
- package/dist/apps/control-plane/application/services/repo-operations-service.js.map +1 -0
- package/dist/apps/control-plane/application/services/worktree-watchdog-service.d.ts +23 -0
- package/dist/apps/control-plane/application/services/worktree-watchdog-service.js +119 -0
- package/dist/apps/control-plane/application/services/worktree-watchdog-service.js.map +1 -0
- package/dist/apps/control-plane/cli/cli-argument-parser.js +12 -0
- package/dist/apps/control-plane/cli/cli-argument-parser.js.map +1 -1
- package/dist/apps/control-plane/cli/help-command-handler.js +17 -0
- package/dist/apps/control-plane/cli/help-command-handler.js.map +1 -1
- package/dist/apps/control-plane/cli/init-command-handler.js +23 -0
- package/dist/apps/control-plane/cli/init-command-handler.js.map +1 -1
- package/dist/apps/control-plane/cli/resume-command-handler.js +25 -5
- package/dist/apps/control-plane/cli/resume-command-handler.js.map +1 -1
- package/dist/apps/control-plane/cli/rollback-command-handler.d.ts +6 -0
- package/dist/apps/control-plane/cli/rollback-command-handler.js +177 -0
- package/dist/apps/control-plane/cli/rollback-command-handler.js.map +1 -0
- package/dist/apps/control-plane/cli/run-command-handler.js +7 -1
- package/dist/apps/control-plane/cli/run-command-handler.js.map +1 -1
- package/dist/apps/control-plane/cli/types.d.ts +3 -0
- package/dist/apps/control-plane/cli/types.js +1 -0
- package/dist/apps/control-plane/cli/types.js.map +1 -1
- package/dist/apps/control-plane/core/configuration-service.d.ts +25 -0
- package/dist/apps/control-plane/core/configuration-service.js +130 -0
- package/dist/apps/control-plane/core/configuration-service.js.map +1 -0
- package/dist/apps/control-plane/core/kernel-tool-wiring.d.ts +50 -0
- package/dist/apps/control-plane/core/kernel-tool-wiring.js +44 -0
- package/dist/apps/control-plane/core/kernel-tool-wiring.js.map +1 -0
- package/dist/apps/control-plane/core/kernel-types.d.ts +48 -0
- package/dist/apps/control-plane/core/kernel-types.js +2 -0
- package/dist/apps/control-plane/core/kernel-types.js.map +1 -0
- package/dist/apps/control-plane/core/kernel.d.ts +17 -48
- package/dist/apps/control-plane/core/kernel.js +44 -539
- package/dist/apps/control-plane/core/kernel.js.map +1 -1
- package/dist/apps/control-plane/core/tool-caller.d.ts +10 -0
- package/dist/apps/control-plane/core/utils/error-normalizer.d.ts +2 -0
- package/dist/apps/control-plane/core/utils/error-normalizer.js +51 -0
- package/dist/apps/control-plane/core/utils/error-normalizer.js.map +1 -0
- package/dist/apps/control-plane/core/utils/field-readers.d.ts +9 -0
- package/dist/apps/control-plane/core/utils/field-readers.js +30 -0
- package/dist/apps/control-plane/core/utils/field-readers.js.map +1 -0
- package/dist/apps/control-plane/core/utils/index-normalizer.d.ts +7 -0
- package/dist/apps/control-plane/core/utils/index-normalizer.js +92 -0
- package/dist/apps/control-plane/core/utils/index-normalizer.js.map +1 -0
- package/dist/apps/control-plane/core/utils/path-normalizers.d.ts +2 -0
- package/dist/apps/control-plane/core/utils/path-normalizers.js +17 -0
- package/dist/apps/control-plane/core/utils/path-normalizers.js.map +1 -0
- package/dist/apps/control-plane/interfaces/cli/bootstrap.js +13 -1
- package/dist/apps/control-plane/interfaces/cli/bootstrap.js.map +1 -1
- package/dist/apps/control-plane/providers/api-worker-provider.d.ts +4 -13
- package/dist/apps/control-plane/providers/api-worker-provider.js +10 -0
- package/dist/apps/control-plane/providers/api-worker-provider.js.map +1 -1
- package/dist/apps/control-plane/providers/cli-worker-provider.d.ts +11 -13
- package/dist/apps/control-plane/providers/cli-worker-provider.js +64 -0
- package/dist/apps/control-plane/providers/cli-worker-provider.js.map +1 -1
- package/dist/apps/control-plane/providers/providers.d.ts +31 -24
- package/dist/apps/control-plane/providers/providers.js +10 -0
- package/dist/apps/control-plane/providers/providers.js.map +1 -1
- package/dist/apps/control-plane/providers/worker-provider-factory.d.ts +11 -0
- package/dist/apps/control-plane/providers/worker-provider-factory.js +20 -1
- package/dist/apps/control-plane/providers/worker-provider-factory.js.map +1 -1
- package/dist/apps/control-plane/supervisor/run-coordinator.d.ts +3 -0
- package/dist/apps/control-plane/supervisor/run-coordinator.js +81 -33
- package/dist/apps/control-plane/supervisor/run-coordinator.js.map +1 -1
- package/dist/apps/control-plane/supervisor/runtime.d.ts +8 -1
- package/dist/apps/control-plane/supervisor/runtime.js +90 -0
- package/dist/apps/control-plane/supervisor/runtime.js.map +1 -1
- package/dist/apps/control-plane/supervisor/types.d.ts +11 -0
- package/dist/apps/control-plane/supervisor/types.js.map +1 -1
- package/dist/apps/control-plane/supervisor/worker-decision-loop.d.ts +21 -1
- package/dist/apps/control-plane/supervisor/worker-decision-loop.js +207 -13
- package/dist/apps/control-plane/supervisor/worker-decision-loop.js.map +1 -1
- package/package.json +1 -1
- package/packages/web-dashboard/package.json +2 -0
- package/packages/web-dashboard/src/app/analytics/page.tsx +83 -2
- package/packages/web-dashboard/src/app/api/actions/route.ts +92 -1
- package/packages/web-dashboard/src/app/api/analytics/route.ts +5 -2
- package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/[checkpointId]/diff/route.ts +43 -0
- package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/compare/route.ts +45 -0
- package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/stream/route.ts +170 -0
- package/packages/web-dashboard/src/app/api/features/[id]/file-diff/route.ts +144 -0
- package/packages/web-dashboard/src/app/api/features/[id]/log-stream/route.ts +167 -0
- package/packages/web-dashboard/src/app/api/features/[id]/raw-logs/[filename]/route.ts +65 -0
- package/packages/web-dashboard/src/app/api/features/[id]/raw-logs/route.ts +63 -0
- package/packages/web-dashboard/src/app/api/features/[id]/timeline/route.ts +60 -0
- package/packages/web-dashboard/src/app/feature/[id]/page.tsx +32 -11
- package/packages/web-dashboard/src/app/globals.css +2 -0
- package/packages/web-dashboard/src/components/detail-panel.tsx +483 -0
- package/packages/web-dashboard/src/components/review-workspace.tsx +1162 -0
- package/packages/web-dashboard/src/lib/aop-client.ts +725 -0
- package/packages/web-dashboard/src/lib/review-contracts.ts +182 -0
- package/packages/web-dashboard/src/lib/review-workspace-logic.ts +64 -0
- package/packages/web-dashboard/src/lib/types.ts +131 -0
- package/packages/web-dashboard/src/styles/dashboard.module.css +333 -0
- package/spec-files/completed/agentic_orchestrator_execution_mode_spec.md +1905 -0
- package/spec-files/outstanding/agentic_orchestrator_runtime_inspection_spec.md +940 -0
- package/spec-files/outstanding/execution_mode_critical_review.md +355 -0
- package/spec-files/outstanding/shadow_workspace_implementation_spec.md +1271 -0
- package/spec-files/outstanding/shadow_workspace_spec_summary.md +222 -0
- package/spec-files/progress.md +269 -1
|
@@ -0,0 +1,1905 @@
|
|
|
1
|
+
# Agentic Orchestrator Execution Mode Specification
|
|
2
|
+
|
|
3
|
+
**Status:** Completed
|
|
4
|
+
**Created:** 2026-03-05
|
|
5
|
+
**Author:** System
|
|
6
|
+
**Scope:** Architectural refactor to support configurable execution modes
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 0. Executive Summary
|
|
11
|
+
|
|
12
|
+
### 0.1 Problem Statement
|
|
13
|
+
|
|
14
|
+
The current architecture enforces a **deterministic, API-first execution model** where agents:
|
|
15
|
+
|
|
16
|
+
- Receive context bundles via tool calls (`feature.get_context`, `repo.read_file`, `repo.search`)
|
|
17
|
+
- Return structured patches via `repo.apply_patch` tool calls
|
|
18
|
+
- Cannot directly modify files in the worktree
|
|
19
|
+
- Must isolate all changes in their context window
|
|
20
|
+
|
|
21
|
+
This approach guarantees determinism and auditability but **handicaps modern coding agents** (Claude, Codex, Copilot, Kiro-CLI) that work best when they can:
|
|
22
|
+
|
|
23
|
+
- Directly edit files in their working directory
|
|
24
|
+
- Use native file system operations
|
|
25
|
+
- Iterate rapidly without context window constraints
|
|
26
|
+
- Leverage their built-in file editing capabilities
|
|
27
|
+
|
|
28
|
+
### 0.2 Proposed Solution
|
|
29
|
+
|
|
30
|
+
Refactor the orchestrator to support **two configurable execution modes**:
|
|
31
|
+
|
|
32
|
+
1. **Deterministic Mode (API-first)** - Current behavior
|
|
33
|
+
- Agent receives context via tool calls
|
|
34
|
+
- Agent returns patches via `repo.apply_patch`
|
|
35
|
+
- Supervisor validates patches against plan/policy before applying
|
|
36
|
+
- Full audit trail of every change
|
|
37
|
+
|
|
38
|
+
2. **Interactive Mode (Direct worktree access)** - New behavior
|
|
39
|
+
- Agent works directly in feature worktree (or shadow workspace)
|
|
40
|
+
- Agent modifies files using native file operations
|
|
41
|
+
- Supervisor monitors worktree with file system watchdog
|
|
42
|
+
- Periodic validation checkpoints instead of per-patch validation
|
|
43
|
+
- Supervisor captures diffs at checkpoints for audit trail
|
|
44
|
+
- **Two strategies:** Direct worktree (simple) or Shadow workspace (safe)
|
|
45
|
+
|
|
46
|
+
### 0.3 Key Design Principles
|
|
47
|
+
|
|
48
|
+
- **Configurable, not exclusive:** Users choose mode per feature or globally
|
|
49
|
+
- **Preserve determinism guarantees:** Both modes enforce plan/policy/lock constraints
|
|
50
|
+
- **Backward compatible:** Existing deterministic workflows unchanged
|
|
51
|
+
- **Provider-agnostic:** Mode selection independent of provider (Claude, Codex, etc.)
|
|
52
|
+
- **Audit trail maintained:** Both modes produce complete change history
|
|
53
|
+
- **Safety first:** Shadow workspace strategy available for high-risk features
|
|
54
|
+
- **Graceful degradation:** Circuit breakers, fallbacks, and health checks throughout
|
|
55
|
+
- **Performance budgets:** Explicit latency/throughput targets with degradation strategies
|
|
56
|
+
|
|
57
|
+
### 0.4 User Benefits
|
|
58
|
+
|
|
59
|
+
**For API-first providers (Codex API, Claude API):**
|
|
60
|
+
|
|
61
|
+
- Continue using deterministic mode
|
|
62
|
+
- No changes required
|
|
63
|
+
|
|
64
|
+
**For interactive CLI providers (Kiro-CLI, local Claude, Copilot):**
|
|
65
|
+
|
|
66
|
+
- Enable interactive mode
|
|
67
|
+
- Agent works naturally in worktree
|
|
68
|
+
- Higher quality output (agent uses native capabilities)
|
|
69
|
+
- Faster iteration (no context window bottleneck)
|
|
70
|
+
- Choose safety level (direct worktree vs shadow workspace)
|
|
71
|
+
|
|
72
|
+
### 0.5 Critical Design Improvements (Post-Review)
|
|
73
|
+
|
|
74
|
+
This spec has been enhanced with critical mitigations for production readiness:
|
|
75
|
+
|
|
76
|
+
1. **Race condition handling:** Agent pause/resume protocol with acknowledgment
|
|
77
|
+
2. **Validation timing:** Shadow workspace strategy for validation-before-write
|
|
78
|
+
3. **Rollback strategies:** Full, partial, and smart rollback capabilities
|
|
79
|
+
4. **Security hardening:** Path validation, symlink blocking, disk quotas, `.git` protection
|
|
80
|
+
5. **Performance budgets:** Explicit latency targets with timeout and degradation strategies
|
|
81
|
+
6. **Graceful degradation:** Circuit breakers, health checks, automatic fallback to deterministic mode
|
|
82
|
+
7. **Audit trail completeness:** Continuous event log, checkpoint chains, incremental + cumulative diffs
|
|
83
|
+
8. **Concurrency control:** Per-feature service instances, validation queues, backpressure
|
|
84
|
+
9. **Agent communication:** Structured message protocol with acknowledgment and retry
|
|
85
|
+
10. **Monitoring and alerts:** Comprehensive metrics and alert thresholds
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## 1. Current Architecture Analysis
|
|
90
|
+
|
|
91
|
+
### 1.1 Current Execution Flow
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
SupervisorRuntime
|
|
95
|
+
└─> WorkerDecisionLoop
|
|
96
|
+
└─> WorkerProvider.runWorker(context_bundle, instructions)
|
|
97
|
+
└─> Agent receives:
|
|
98
|
+
- feature.get_context (spec, state, plan, evidence)
|
|
99
|
+
- repo.read_file (specific files)
|
|
100
|
+
- repo.search (grep results)
|
|
101
|
+
└─> Agent returns:
|
|
102
|
+
- tool_calls: [{ name: "repo.apply_patch", args: { unified_diff: "..." } }]
|
|
103
|
+
└─> Supervisor validates patch:
|
|
104
|
+
- Plan enforcement (allowed_areas, forbidden_areas)
|
|
105
|
+
- Policy enforcement (patch_policy.enforce_allowed_areas)
|
|
106
|
+
- Lock enforcement (contracts require held locks)
|
|
107
|
+
- Path rules (protected_areas)
|
|
108
|
+
└─> Supervisor applies patch:
|
|
109
|
+
- git apply in feature worktree
|
|
110
|
+
- Update QA index
|
|
111
|
+
- Update feature state
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 1.2 Key Components
|
|
115
|
+
|
|
116
|
+
**WorkerProvider Interface** (`apps/control-plane/src/providers/providers.ts`)
|
|
117
|
+
|
|
118
|
+
- `runWorker(input)` - Executes agent with context bundle
|
|
119
|
+
- Returns structured output with `tool_calls` array
|
|
120
|
+
- Implementations: `CliWorkerProvider`, `ApiWorkerProvider`, `NullWorkerProvider`
|
|
121
|
+
|
|
122
|
+
**PatchService** (`apps/control-plane/src/application/services/patch-service.ts`)
|
|
123
|
+
|
|
124
|
+
- `repoApplyPatch(featureId, unifiedDiff)` - Validates and applies patches
|
|
125
|
+
- Enforces plan, policy, locks, path rules
|
|
126
|
+
- Updates QA index after successful apply
|
|
127
|
+
|
|
128
|
+
**WorkerDecisionLoop** (`apps/control-plane/src/supervisor/worker-decision-loop.ts`)
|
|
129
|
+
|
|
130
|
+
- Orchestrates agent execution cycles
|
|
131
|
+
- Handles tool call dispatch
|
|
132
|
+
- Detects stalls, loops, no-progress conditions
|
|
133
|
+
|
|
134
|
+
**ToolRuntime** (`apps/control-plane/src/mcp/tool-runtime.ts`)
|
|
135
|
+
|
|
136
|
+
- Validates tool inputs/outputs against schemas
|
|
137
|
+
- Enforces RBAC (role-based access control)
|
|
138
|
+
- Tracks operation_id for idempotency
|
|
139
|
+
|
|
140
|
+
### 1.3 Constraints Enforced
|
|
141
|
+
|
|
142
|
+
**Plan Constraints:**
|
|
143
|
+
|
|
144
|
+
- `allowed_areas` - Paths agent may modify
|
|
145
|
+
- `forbidden_areas` - Paths agent must not touch
|
|
146
|
+
- `contracts` - Locks required (openapi, events, db)
|
|
147
|
+
|
|
148
|
+
**Policy Constraints:**
|
|
149
|
+
|
|
150
|
+
- `patch_policy.enforce_plan` - Require accepted plan before patches
|
|
151
|
+
- `patch_policy.enforce_allowed_areas` - Validate patch paths against plan
|
|
152
|
+
- `protected_areas` - Global read-only paths
|
|
153
|
+
- `path_rules` - Per-path RBAC rules
|
|
154
|
+
|
|
155
|
+
**Lock Constraints:**
|
|
156
|
+
|
|
157
|
+
- Features must hold locks for contract resources before modifying them
|
|
158
|
+
- Lock leases expire after TTL (default 300s)
|
|
159
|
+
- Heartbeat service renews leases during execution
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## 2. Proposed Architecture
|
|
164
|
+
|
|
165
|
+
### 2.1 Execution Mode Enum
|
|
166
|
+
|
|
167
|
+
**New type:** `ExecutionMode`
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
type ExecutionMode = 'deterministic' | 'interactive';
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Configuration location:** `agents.yaml`
|
|
174
|
+
|
|
175
|
+
```yaml
|
|
176
|
+
runtime:
|
|
177
|
+
execution_mode: deterministic # or 'interactive'
|
|
178
|
+
interactive:
|
|
179
|
+
strategy: direct_worktree # or 'shadow_workspace'
|
|
180
|
+
watchdog_poll_interval_ms: 2000
|
|
181
|
+
checkpoint_interval_ms: 30000 # Hybrid trigger: time-based
|
|
182
|
+
max_uncommitted_changes: 50 # Hybrid trigger: change-based
|
|
183
|
+
validation_on_checkpoint: true
|
|
184
|
+
revert_on_violation: false
|
|
185
|
+
violation_severity: warning # info | warning | error | critical
|
|
186
|
+
shadow_workspace:
|
|
187
|
+
enabled: false # Enable shadow workspace strategy
|
|
188
|
+
promotion_strategy: atomic # atomic | incremental
|
|
189
|
+
cleanup_on_failure: true # Delete shadow after validation failure
|
|
190
|
+
max_shadow_size_mb: 2048 # Disk quota for shadow workspace
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**CLI override:** `--execution-mode <deterministic|interactive>`
|
|
194
|
+
|
|
195
|
+
### 2.2 Mode Selection Precedence
|
|
196
|
+
|
|
197
|
+
1. CLI flag `--execution-mode`
|
|
198
|
+
2. Feature-level override in `state.md` frontmatter (`execution_mode`)
|
|
199
|
+
3. `agents.yaml runtime.execution_mode`
|
|
200
|
+
4. Default: `deterministic`
|
|
201
|
+
|
|
202
|
+
### 2.3 Deterministic Mode (Current Behavior)
|
|
203
|
+
|
|
204
|
+
**No changes required.** This is the existing implementation.
|
|
205
|
+
|
|
206
|
+
**Flow:**
|
|
207
|
+
|
|
208
|
+
1. Agent receives context via tool calls
|
|
209
|
+
2. Agent returns `repo.apply_patch` tool calls
|
|
210
|
+
3. Supervisor validates patch before applying
|
|
211
|
+
4. Supervisor applies patch with `git apply`
|
|
212
|
+
5. Supervisor updates QA index and state
|
|
213
|
+
|
|
214
|
+
**Guarantees:**
|
|
215
|
+
|
|
216
|
+
- Every change validated before application
|
|
217
|
+
- Complete audit trail in operation ledger
|
|
218
|
+
- Atomic patch application
|
|
219
|
+
- Immediate policy/plan enforcement
|
|
220
|
+
|
|
221
|
+
### 2.4 Interactive Mode (New Behavior)
|
|
222
|
+
|
|
223
|
+
**Two implementation strategies:**
|
|
224
|
+
|
|
225
|
+
#### Strategy A: Direct Worktree with Checkpoints (Proposed)
|
|
226
|
+
|
|
227
|
+
**Flow:**
|
|
228
|
+
|
|
229
|
+
1. Supervisor spawns agent with `cwd` set to feature worktree
|
|
230
|
+
2. Agent works directly in worktree (native file operations)
|
|
231
|
+
3. Supervisor monitors worktree with file system watchdog
|
|
232
|
+
4. Watchdog detects file changes in real-time
|
|
233
|
+
5. At checkpoints (time-based or change-count-based):
|
|
234
|
+
- Supervisor pauses agent
|
|
235
|
+
- Supervisor captures `git diff` snapshot
|
|
236
|
+
- Supervisor validates diff against plan/policy/locks
|
|
237
|
+
- If validation fails: notify agent, optionally revert changes
|
|
238
|
+
- If validation passes: record checkpoint in audit log
|
|
239
|
+
- Supervisor resumes agent
|
|
240
|
+
6. On agent completion:
|
|
241
|
+
- Final checkpoint validation
|
|
242
|
+
- Commit changes if valid
|
|
243
|
+
- Update QA index and state
|
|
244
|
+
|
|
245
|
+
**Pros:**
|
|
246
|
+
|
|
247
|
+
- Agent works in real worktree (no sync overhead)
|
|
248
|
+
- Simple implementation
|
|
249
|
+
- Fast iteration for agent
|
|
250
|
+
|
|
251
|
+
**Cons:**
|
|
252
|
+
|
|
253
|
+
- Validation happens after writes (risky)
|
|
254
|
+
- Revert is destructive
|
|
255
|
+
- Race conditions during checkpoint
|
|
256
|
+
- Agent could corrupt worktree before validation
|
|
257
|
+
|
|
258
|
+
#### Strategy B: Shadow Workspace with Promotion (Alternative - RECOMMENDED)
|
|
259
|
+
|
|
260
|
+
**See detailed specification:** [Shadow Workspace Implementation Specification](../outstanding/shadow_workspace_implementation_spec.md)
|
|
261
|
+
|
|
262
|
+
**Flow:**
|
|
263
|
+
|
|
264
|
+
1. Supervisor creates shadow workspace (copy of feature worktree)
|
|
265
|
+
2. Supervisor spawns agent with `cwd` set to shadow workspace
|
|
266
|
+
3. Agent works in shadow (isolated from real worktree)
|
|
267
|
+
4. Supervisor monitors shadow with file system watchdog
|
|
268
|
+
5. At checkpoints:
|
|
269
|
+
- Supervisor pauses agent
|
|
270
|
+
- Supervisor captures diff between real worktree and shadow
|
|
271
|
+
- Supervisor validates diff
|
|
272
|
+
- If validation passes: promote shadow changes to real worktree (atomic)
|
|
273
|
+
- If validation fails: discard shadow changes, notify agent
|
|
274
|
+
- Supervisor resumes agent in fresh shadow
|
|
275
|
+
6. On agent completion:
|
|
276
|
+
- Final validation
|
|
277
|
+
- Promote shadow to real worktree if valid
|
|
278
|
+
- Commit changes
|
|
279
|
+
- Update QA index and state
|
|
280
|
+
|
|
281
|
+
**Pros:**
|
|
282
|
+
|
|
283
|
+
- Validation before promotion (safe)
|
|
284
|
+
- Real worktree never corrupted
|
|
285
|
+
- Easy rollback (just discard shadow)
|
|
286
|
+
- No race conditions (shadow is isolated)
|
|
287
|
+
|
|
288
|
+
**Cons:**
|
|
289
|
+
|
|
290
|
+
- Disk space overhead (2x worktree size)
|
|
291
|
+
- Promotion overhead (copy operation)
|
|
292
|
+
- More complex implementation
|
|
293
|
+
- Agent must restart in fresh shadow after revert
|
|
294
|
+
|
|
295
|
+
**Recommendation:** Start with Strategy A (simpler), migrate to Strategy B if validation-after-write proves problematic in practice.
|
|
296
|
+
|
|
297
|
+
**Hybrid approach:** Use Strategy A by default, Strategy B for high-risk features (e.g., features modifying contracts or schemas).
|
|
298
|
+
|
|
299
|
+
**Key Differences from Deterministic Mode:**
|
|
300
|
+
|
|
301
|
+
- Agent has direct file system access
|
|
302
|
+
- Validation happens at checkpoints, not per-patch
|
|
303
|
+
- Supervisor is reactive (watchdog) instead of proactive (gate-keeping)
|
|
304
|
+
- Agent can iterate freely between checkpoints
|
|
305
|
+
|
|
306
|
+
**Guarantees:**
|
|
307
|
+
|
|
308
|
+
- Plan/policy/lock constraints enforced at checkpoints
|
|
309
|
+
- Audit trail captured at checkpoints (not per-change)
|
|
310
|
+
- Rollback capability if validation fails
|
|
311
|
+
- Final state is deterministic (validated before commit)
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## 3. Implementation Plan
|
|
316
|
+
|
|
317
|
+
### 3.1 Schema Changes
|
|
318
|
+
|
|
319
|
+
**File:** `agentic/orchestrator/schemas/agents.schema.json`
|
|
320
|
+
|
|
321
|
+
Add to `runtime` properties:
|
|
322
|
+
|
|
323
|
+
```json
|
|
324
|
+
{
|
|
325
|
+
"execution_mode": {
|
|
326
|
+
"type": "string",
|
|
327
|
+
"enum": ["deterministic", "interactive"],
|
|
328
|
+
"default": "deterministic",
|
|
329
|
+
"description": "Execution mode for agent workers"
|
|
330
|
+
},
|
|
331
|
+
"interactive": {
|
|
332
|
+
"type": "object",
|
|
333
|
+
"description": "Configuration for interactive execution mode",
|
|
334
|
+
"properties": {
|
|
335
|
+
"watchdog_poll_interval_ms": {
|
|
336
|
+
"type": "number",
|
|
337
|
+
"default": 2000,
|
|
338
|
+
"description": "File system watchdog polling interval"
|
|
339
|
+
},
|
|
340
|
+
"checkpoint_interval_ms": {
|
|
341
|
+
"type": "number",
|
|
342
|
+
"default": 30000,
|
|
343
|
+
"description": "Time between validation checkpoints (hybrid trigger)"
|
|
344
|
+
},
|
|
345
|
+
"max_uncommitted_changes": {
|
|
346
|
+
"type": "number",
|
|
347
|
+
"default": 50,
|
|
348
|
+
"description": "Trigger checkpoint after N file changes (hybrid trigger)"
|
|
349
|
+
},
|
|
350
|
+
"validation_on_checkpoint": {
|
|
351
|
+
"type": "boolean",
|
|
352
|
+
"default": true,
|
|
353
|
+
"description": "Validate diffs at checkpoints"
|
|
354
|
+
},
|
|
355
|
+
"revert_on_violation": {
|
|
356
|
+
"type": "boolean",
|
|
357
|
+
"default": false,
|
|
358
|
+
"description": "Auto-revert changes that violate plan/policy"
|
|
359
|
+
},
|
|
360
|
+
"violation_severity": {
|
|
361
|
+
"type": "string",
|
|
362
|
+
"enum": ["info", "warning", "error", "critical"],
|
|
363
|
+
"default": "warning",
|
|
364
|
+
"description": "Severity level for violation notifications sent to agent"
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
**File:** `agentic/orchestrator/schemas/state.schema.json`
|
|
372
|
+
|
|
373
|
+
Add to properties:
|
|
374
|
+
|
|
375
|
+
```json
|
|
376
|
+
{
|
|
377
|
+
"execution_mode": {
|
|
378
|
+
"type": "string",
|
|
379
|
+
"enum": ["deterministic", "interactive"],
|
|
380
|
+
"description": "Execution mode override for this feature (optional)"
|
|
381
|
+
},
|
|
382
|
+
"checkpoints": {
|
|
383
|
+
"type": "array",
|
|
384
|
+
"description": "Validation checkpoints recorded during interactive mode",
|
|
385
|
+
"items": {
|
|
386
|
+
"type": "object",
|
|
387
|
+
"required": ["checkpoint_id", "timestamp", "files_changed", "validation_status"],
|
|
388
|
+
"properties": {
|
|
389
|
+
"checkpoint_id": { "type": "string" },
|
|
390
|
+
"timestamp": { "type": "string", "format": "date-time" },
|
|
391
|
+
"files_changed": { "type": "array", "items": { "type": "string" } },
|
|
392
|
+
"validation_status": { "type": "string", "enum": ["valid", "invalid", "skipped"] },
|
|
393
|
+
"violations": { "type": "array", "items": { "type": "string" } },
|
|
394
|
+
"severity": { "type": "string", "enum": ["info", "warning", "error", "critical"] },
|
|
395
|
+
"diff_snapshot": { "type": "string", "description": "Path to diff file" }
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### 3.2 New Services
|
|
403
|
+
|
|
404
|
+
**File:** `apps/control-plane/src/application/services/worktree-watchdog-service.ts`
|
|
405
|
+
|
|
406
|
+
```typescript
|
|
407
|
+
interface WorktreeWatchdogService {
|
|
408
|
+
startWatching(featureId: string): Promise<void>;
|
|
409
|
+
stopWatching(featureId: string): Promise<void>;
|
|
410
|
+
getChangedFiles(featureId: string): Promise<string[]>;
|
|
411
|
+
getChangeCount(featureId: string): number;
|
|
412
|
+
resetChangeCount(featureId: string): void;
|
|
413
|
+
on(event: 'changeThreshold', handler: (featureId: string) => void): void;
|
|
414
|
+
}
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
**Responsibilities:**
|
|
418
|
+
|
|
419
|
+
- Monitor feature worktree for file changes
|
|
420
|
+
- Track changed file paths
|
|
421
|
+
- Emit `changeThreshold` event when `max_uncommitted_changes` reached
|
|
422
|
+
- Use `chokidar` for cross-platform file system monitoring
|
|
423
|
+
- Support concurrent monitoring of multiple feature worktrees
|
|
424
|
+
|
|
425
|
+
**File:** `apps/control-plane/src/application/services/checkpoint-service.ts`
|
|
426
|
+
|
|
427
|
+
```typescript
|
|
428
|
+
interface CheckpointService {
|
|
429
|
+
createCheckpoint(featureId: string): Promise<CheckpointResult>;
|
|
430
|
+
validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult>;
|
|
431
|
+
recordCheckpoint(featureId: string, checkpoint: Checkpoint): Promise<void>;
|
|
432
|
+
getCheckpoints(featureId: string): Promise<Checkpoint[]>;
|
|
433
|
+
notifyAgent(featureId: string, violations: string[], severity: ViolationSeverity): Promise<void>;
|
|
434
|
+
rollbackToCheckpoint(featureId: string, checkpointId: string): Promise<void>;
|
|
435
|
+
rollbackFiles(featureId: string, filePaths: string[]): Promise<void>;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
type ViolationSeverity = 'info' | 'warning' | 'error' | 'critical';
|
|
439
|
+
|
|
440
|
+
interface CheckpointResult {
|
|
441
|
+
checkpoint_id: string;
|
|
442
|
+
timestamp: string;
|
|
443
|
+
files_changed: string[];
|
|
444
|
+
diff_snapshot: string;
|
|
445
|
+
validation_status: 'valid' | 'invalid' | 'skipped';
|
|
446
|
+
violations: string[];
|
|
447
|
+
severity?: ViolationSeverity;
|
|
448
|
+
}
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
**Responsibilities:**
|
|
452
|
+
|
|
453
|
+
- Capture `git diff` snapshots at checkpoints (triggered by time OR change count)
|
|
454
|
+
- Validate diffs against plan/policy/locks (reuse PatchService validation logic)
|
|
455
|
+
- Record checkpoint metadata in feature state
|
|
456
|
+
- Store diff snapshots in `.aop/features/<id>/checkpoints/`
|
|
457
|
+
- Notify agent of violations with configurable severity level
|
|
458
|
+
- Optionally revert changes if `revert_on_violation: true`
|
|
459
|
+
- **Rollback to specific checkpoint (full or partial)**
|
|
460
|
+
- **Rollback specific files only (surgical revert)**
|
|
461
|
+
|
|
462
|
+
**Rollback strategies:**
|
|
463
|
+
|
|
464
|
+
1. **Full checkpoint rollback:** Restore worktree to exact state at checkpoint
|
|
465
|
+
|
|
466
|
+
```typescript
|
|
467
|
+
async rollbackToCheckpoint(featureId: string, checkpointId: string): Promise<void> {
|
|
468
|
+
const checkpoint = await this.getCheckpoint(featureId, checkpointId);
|
|
469
|
+
const worktreePath = this.kernel.worktreePath(featureId);
|
|
470
|
+
|
|
471
|
+
// Apply inverse diff to restore state
|
|
472
|
+
const inverseDiff = this.invertDiff(checkpoint.diff_snapshot);
|
|
473
|
+
await this.git.apply(worktreePath, inverseDiff);
|
|
474
|
+
|
|
475
|
+
// Update state to reflect rollback
|
|
476
|
+
await this.kernel.updateFeatureState(featureId, {
|
|
477
|
+
last_checkpoint_id: checkpointId,
|
|
478
|
+
rollback_count: (state.rollback_count || 0) + 1,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
2. **Partial file rollback:** Revert only specific files that violated constraints
|
|
484
|
+
|
|
485
|
+
```typescript
|
|
486
|
+
async rollbackFiles(featureId: string, filePaths: string[]): Promise<void> {
|
|
487
|
+
const worktreePath = this.kernel.worktreePath(featureId);
|
|
488
|
+
|
|
489
|
+
// Get last valid checkpoint
|
|
490
|
+
const lastValidCheckpoint = await this.getLastValidCheckpoint(featureId);
|
|
491
|
+
|
|
492
|
+
for (const filePath of filePaths) {
|
|
493
|
+
// Extract file content from checkpoint diff
|
|
494
|
+
const fileContent = this.extractFileFromDiff(
|
|
495
|
+
lastValidCheckpoint.diff_snapshot,
|
|
496
|
+
filePath
|
|
497
|
+
);
|
|
498
|
+
|
|
499
|
+
// Restore file
|
|
500
|
+
await fs.writeFile(path.join(worktreePath, filePath), fileContent);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// Notify agent which files were reverted
|
|
504
|
+
await this.notifyAgent(featureId, [
|
|
505
|
+
`Reverted ${filePaths.length} files: ${filePaths.join(', ')}`
|
|
506
|
+
], 'warning');
|
|
507
|
+
}
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
3. **Smart rollback:** Keep valid changes, revert only violations
|
|
511
|
+
|
|
512
|
+
```typescript
|
|
513
|
+
async smartRollback(featureId: string, violations: ValidationViolation[]): Promise<void> {
|
|
514
|
+
// Group violations by file
|
|
515
|
+
const violatedFiles = new Set(violations.map(v => v.file_path));
|
|
516
|
+
|
|
517
|
+
// Get all changed files
|
|
518
|
+
const allChangedFiles = await this.watchdog.getChangedFiles(featureId);
|
|
519
|
+
|
|
520
|
+
// Rollback only violated files, keep valid changes
|
|
521
|
+
await this.rollbackFiles(featureId, Array.from(violatedFiles));
|
|
522
|
+
|
|
523
|
+
// Notify agent
|
|
524
|
+
await this.notifyAgent(featureId, [
|
|
525
|
+
`Reverted ${violatedFiles.size} files with violations`,
|
|
526
|
+
`Kept ${allChangedFiles.length - violatedFiles.size} valid changes`
|
|
527
|
+
], 'warning');
|
|
528
|
+
}
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
**Notification message format:**
|
|
532
|
+
|
|
533
|
+
```json
|
|
534
|
+
{
|
|
535
|
+
"type": "checkpoint_violation",
|
|
536
|
+
"severity": "warning",
|
|
537
|
+
"checkpoint_id": "ckpt-002-b7e4c1d9",
|
|
538
|
+
"timestamp": "2026-03-05T16:29:05.127Z",
|
|
539
|
+
"violations": [
|
|
540
|
+
{
|
|
541
|
+
"file": "src/config.ts",
|
|
542
|
+
"constraint": "allowed_areas",
|
|
543
|
+
"message": "Path 'src/config.ts' not in allowed_areas",
|
|
544
|
+
"suggestion": "Add 'src/config.ts' to plan.allowed_areas or move changes to allowed path"
|
|
545
|
+
}
|
|
546
|
+
],
|
|
547
|
+
"files_changed": ["src/config.ts"],
|
|
548
|
+
"action_taken": "none" // or "reverted" if revert_on_violation: true
|
|
549
|
+
}
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### 3.3 Modified Services
|
|
553
|
+
|
|
554
|
+
**File:** `apps/control-plane/src/supervisor/worker-decision-loop.ts`
|
|
555
|
+
|
|
556
|
+
**Changes:**
|
|
557
|
+
|
|
558
|
+
- Add `executionMode` parameter to constructor
|
|
559
|
+
- Branch execution logic based on mode:
|
|
560
|
+
- **Deterministic:** Current behavior (tool call loop)
|
|
561
|
+
- **Interactive:** New behavior (spawn agent, start watchdog, checkpoint loop)
|
|
562
|
+
|
|
563
|
+
**New method:** `runInteractiveWorker(input: WorkerDecisionInput)`
|
|
564
|
+
|
|
565
|
+
```typescript
|
|
566
|
+
async runInteractiveWorker(input: WorkerDecisionInput): Promise<WorkerDecisionResult> {
|
|
567
|
+
const { featureId, role, instructions } = input;
|
|
568
|
+
|
|
569
|
+
// 1. Start watchdog
|
|
570
|
+
await this.watchdog.startWatching(featureId);
|
|
571
|
+
|
|
572
|
+
// 2. Spawn agent with cwd = worktree
|
|
573
|
+
const worktreePath = this.kernel.worktreePath(featureId);
|
|
574
|
+
const session = await this.provider.createSession(role, featureId, systemPrompt);
|
|
575
|
+
|
|
576
|
+
// 3. Start checkpoint loop (hybrid: time-based OR change-based)
|
|
577
|
+
const checkpointTimer = setInterval(async () => {
|
|
578
|
+
await this.maybeCreateCheckpoint(featureId);
|
|
579
|
+
}, this.checkpointIntervalMs);
|
|
580
|
+
|
|
581
|
+
// Also trigger on change threshold
|
|
582
|
+
this.watchdog.on('changeThreshold', async (fid) => {
|
|
583
|
+
if (fid === featureId) {
|
|
584
|
+
await this.maybeCreateCheckpoint(featureId);
|
|
585
|
+
}
|
|
586
|
+
});
|
|
587
|
+
|
|
588
|
+
// 4. Run agent (non-blocking, agent works in worktree)
|
|
589
|
+
const result = await this.provider.runWorker({
|
|
590
|
+
role,
|
|
591
|
+
feature_id: featureId,
|
|
592
|
+
working_directory: worktreePath,
|
|
593
|
+
instructions,
|
|
594
|
+
execution_mode: 'interactive',
|
|
595
|
+
});
|
|
596
|
+
|
|
597
|
+
// 5. Stop checkpoint loop
|
|
598
|
+
clearInterval(checkpointTimer);
|
|
599
|
+
|
|
600
|
+
// 6. Final checkpoint
|
|
601
|
+
await this.checkpointService.createCheckpoint(featureId);
|
|
602
|
+
|
|
603
|
+
// 7. Stop watchdog
|
|
604
|
+
await this.watchdog.stopWatching(featureId);
|
|
605
|
+
|
|
606
|
+
return result;
|
|
607
|
+
}
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
**File:** `apps/control-plane/src/application/services/patch-service.ts`
|
|
611
|
+
|
|
612
|
+
**Changes:**
|
|
613
|
+
|
|
614
|
+
- Extract validation logic into reusable method: `validateDiff(featureId, parsedDiff)`
|
|
615
|
+
- Used by both `repoApplyPatch` (deterministic) and `CheckpointService.validateCheckpoint` (interactive)
|
|
616
|
+
|
|
617
|
+
**New method:** `validateDiff(featureId: string, parsedDiff: ParsedDiff): Promise<ValidationResult>`
|
|
618
|
+
|
|
619
|
+
```typescript
|
|
620
|
+
async validateDiff(featureId: string, parsedDiff: ParsedDiff): Promise<ValidationResult> {
|
|
621
|
+
const plan = await this.loadAcceptedPlan(featureId);
|
|
622
|
+
|
|
623
|
+
// Validate paths against plan
|
|
624
|
+
await this.validatePatchPaths(featureId, parsedDiff, plan);
|
|
625
|
+
|
|
626
|
+
// Validate locks held
|
|
627
|
+
await this.assertPlanLocksHeld(featureId, plan);
|
|
628
|
+
|
|
629
|
+
return { valid: true, violations: [] };
|
|
630
|
+
}
|
|
631
|
+
```
|
|
632
|
+
|
|
633
|
+
### 3.4 Provider Interface Changes
|
|
634
|
+
|
|
635
|
+
**File:** `apps/control-plane/src/providers/providers.ts`
|
|
636
|
+
|
|
637
|
+
**Extend `WorkerProvider` interface:**
|
|
638
|
+
|
|
639
|
+
```typescript
|
|
640
|
+
interface WorkerProvider {
|
|
641
|
+
// Existing methods
|
|
642
|
+
runWorker(input: WorkerRunInput): Promise<WorkerRunOutput>;
|
|
643
|
+
createSession(role: string, featureId: string, systemPrompt: string): Promise<SessionInfo>;
|
|
644
|
+
|
|
645
|
+
// NEW: Interactive mode support
|
|
646
|
+
pauseAgent(featureId: string, timeout?: number): Promise<PauseAckResult>;
|
|
647
|
+
resumeAgent(featureId: string): Promise<void>;
|
|
648
|
+
sendMessage(featureId: string, message: AgentMessage): Promise<MessageAckResult>;
|
|
649
|
+
getCapabilities(): WorkerProviderCapabilities;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
interface PauseAckResult {
|
|
653
|
+
acknowledged: boolean;
|
|
654
|
+
timeout: boolean;
|
|
655
|
+
latency_ms: number;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
interface MessageAckResult {
|
|
659
|
+
delivered: boolean;
|
|
660
|
+
acknowledged: boolean;
|
|
661
|
+
agent_response?: string;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
interface AgentMessage {
|
|
665
|
+
type: 'checkpoint_violation' | 'info' | 'warning' | 'error';
|
|
666
|
+
severity: ViolationSeverity;
|
|
667
|
+
content: string;
|
|
668
|
+
structured_data?: Record<string, unknown>;
|
|
669
|
+
requires_acknowledgment: boolean;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
interface WorkerProviderCapabilities {
|
|
673
|
+
supportsInteractiveMode: boolean;
|
|
674
|
+
supportsWorkingDirectory: boolean;
|
|
675
|
+
supportsPauseResume: boolean;
|
|
676
|
+
supportsMessagePassing: boolean;
|
|
677
|
+
supportsAcknowledgment: boolean;
|
|
678
|
+
}
|
|
679
|
+
```
|
|
680
|
+
|
|
681
|
+
**Extend `WorkerRunInput`:**
|
|
682
|
+
|
|
683
|
+
```typescript
|
|
684
|
+
interface WorkerRunInput {
|
|
685
|
+
role: string;
|
|
686
|
+
feature_id: string;
|
|
687
|
+
context_bundle?: Record<string, unknown>;
|
|
688
|
+
instructions?: string;
|
|
689
|
+
last_tool_results?: Array<Record<string, unknown>>;
|
|
690
|
+
runtime_selection?: {
|
|
691
|
+
provider: string;
|
|
692
|
+
model: string;
|
|
693
|
+
provider_config_ref: string | null;
|
|
694
|
+
};
|
|
695
|
+
// NEW: Interactive mode fields
|
|
696
|
+
execution_mode?: 'deterministic' | 'interactive';
|
|
697
|
+
working_directory?: string; // Set to worktree path in interactive mode
|
|
698
|
+
pause_resume_protocol?: 'signal' | 'message' | 'none'; // How to pause agent
|
|
699
|
+
}
|
|
700
|
+
```
|
|
701
|
+
|
|
702
|
+
**Provider implementations:**
|
|
703
|
+
|
|
704
|
+
- **CliWorkerProvider:**
|
|
705
|
+
- Set `cwd` to `working_directory` when spawning agent process
|
|
706
|
+
- Implement `pauseAgent` via SIGSTOP/SIGCONT or stdin message
|
|
707
|
+
- Implement `sendMessage` via stdin JSON messages
|
|
708
|
+
- Implement `getCapabilities` returning full support
|
|
709
|
+
- **ApiWorkerProvider:**
|
|
710
|
+
- Include `working_directory` in context (API may not support it)
|
|
711
|
+
- `pauseAgent` not supported (return `{ acknowledged: false, timeout: true }`)
|
|
712
|
+
- `sendMessage` via API message endpoint (if available)
|
|
713
|
+
- `getCapabilities` returning limited support
|
|
714
|
+
- **NullWorkerProvider:**
|
|
715
|
+
- No changes (stub)
|
|
716
|
+
- `getCapabilities` returning no support
|
|
717
|
+
|
|
718
|
+
**Agent-side protocol (for CLI agents):**
|
|
719
|
+
|
|
720
|
+
Agents must implement stdin message handling:
|
|
721
|
+
|
|
722
|
+
```json
|
|
723
|
+
// Supervisor -> Agent: Pause request
|
|
724
|
+
{
|
|
725
|
+
"type": "pause_request",
|
|
726
|
+
"checkpoint_id": "ckpt-003-xyz",
|
|
727
|
+
"reason": "checkpoint_validation",
|
|
728
|
+
"timeout_ms": 10000
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Agent -> Supervisor: Pause acknowledgment
|
|
732
|
+
{
|
|
733
|
+
"type": "pause_ack",
|
|
734
|
+
"checkpoint_id": "ckpt-003-xyz",
|
|
735
|
+
"status": "paused",
|
|
736
|
+
"pending_operations": []
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
// Supervisor -> Agent: Resume
|
|
740
|
+
{
|
|
741
|
+
"type": "resume",
|
|
742
|
+
"checkpoint_id": "ckpt-003-xyz"
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// Supervisor -> Agent: Violation notification
|
|
746
|
+
{
|
|
747
|
+
"type": "checkpoint_violation",
|
|
748
|
+
"severity": "warning",
|
|
749
|
+
"checkpoint_id": "ckpt-003-xyz",
|
|
750
|
+
"violations": [
|
|
751
|
+
{
|
|
752
|
+
"file": "src/config.ts",
|
|
753
|
+
"constraint": "allowed_areas",
|
|
754
|
+
"message": "Path not in allowed_areas",
|
|
755
|
+
"suggestion": "Add to plan.allowed_areas or move to allowed path"
|
|
756
|
+
}
|
|
757
|
+
],
|
|
758
|
+
"action_taken": "none",
|
|
759
|
+
"requires_acknowledgment": true
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// Agent -> Supervisor: Violation acknowledgment
|
|
763
|
+
{
|
|
764
|
+
"type": "violation_ack",
|
|
765
|
+
"checkpoint_id": "ckpt-003-xyz",
|
|
766
|
+
"understood": true,
|
|
767
|
+
"corrective_action": "will_move_file_to_allowed_path"
|
|
768
|
+
}
|
|
769
|
+
```
|
|
770
|
+
|
|
771
|
+
### 3.5 Supervisor Integration
|
|
772
|
+
|
|
773
|
+
**File:** `apps/control-plane/src/supervisor/runtime.ts`
|
|
774
|
+
|
|
775
|
+
**Changes:**
|
|
776
|
+
|
|
777
|
+
- Resolve `execution_mode` from config/CLI/state
|
|
778
|
+
- Pass `execution_mode` to `WorkerDecisionLoop`
|
|
779
|
+
- Pass `interactive` config to `CheckpointService`
|
|
780
|
+
|
|
781
|
+
**New method:** `resolveExecutionMode(featureId: string): ExecutionMode`
|
|
782
|
+
|
|
783
|
+
```typescript
|
|
784
|
+
private resolveExecutionMode(featureId: string): ExecutionMode {
|
|
785
|
+
// 1. Check CLI override
|
|
786
|
+
if (this.cliExecutionMode) {
|
|
787
|
+
return this.cliExecutionMode;
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// 2. Check feature state override
|
|
791
|
+
const state = await this.kernel.readState(featureId);
|
|
792
|
+
if (state.frontMatter.execution_mode) {
|
|
793
|
+
return state.frontMatter.execution_mode;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// 3. Check agents.yaml
|
|
797
|
+
const agentsConfig = this.kernel.getAgentsConfig();
|
|
798
|
+
if (agentsConfig.runtime?.execution_mode) {
|
|
799
|
+
return agentsConfig.runtime.execution_mode;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
// 4. Default
|
|
803
|
+
return 'deterministic';
|
|
804
|
+
}
|
|
805
|
+
```
|
|
806
|
+
|
|
807
|
+
### 3.6 CLI Changes
|
|
808
|
+
|
|
809
|
+
**File:** `apps/control-plane/src/cli/types.ts`
|
|
810
|
+
|
|
811
|
+
Add to `CliArgs`:
|
|
812
|
+
|
|
813
|
+
```typescript
|
|
814
|
+
interface CliArgs {
|
|
815
|
+
// ... existing fields
|
|
816
|
+
execution_mode?: 'deterministic' | 'interactive';
|
|
817
|
+
}
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
**File:** `apps/control-plane/src/cli/cli-argument-parser.ts`
|
|
821
|
+
|
|
822
|
+
Parse `--execution-mode` flag.
|
|
823
|
+
|
|
824
|
+
**File:** `apps/control-plane/src/cli/help-command-handler.ts`
|
|
825
|
+
|
|
826
|
+
Add to `run` command help:
|
|
827
|
+
|
|
828
|
+
```
|
|
829
|
+
--execution-mode <deterministic|interactive>
|
|
830
|
+
Execution mode for agent workers (default: deterministic)
|
|
831
|
+
```
|
|
832
|
+
|
|
833
|
+
---
|
|
834
|
+
|
|
835
|
+
## 4. Validation and Audit Trail
|
|
836
|
+
|
|
837
|
+
### 4.1 Deterministic Mode Audit Trail
|
|
838
|
+
|
|
839
|
+
**Current behavior (unchanged):**
|
|
840
|
+
|
|
841
|
+
- Every `repo.apply_patch` call logged in operation ledger
|
|
842
|
+
- Full unified diff captured per operation
|
|
843
|
+
- Validation errors logged with rejection reason
|
|
844
|
+
- Complete history in `.aop/runtime/operation-ledger/run:<run_id>.json`
|
|
845
|
+
|
|
846
|
+
### 4.2 Interactive Mode Audit Trail
|
|
847
|
+
|
|
848
|
+
**New behavior:**
|
|
849
|
+
|
|
850
|
+
- Checkpoints logged in feature state (`checkpoints` array)
|
|
851
|
+
- Diff snapshots stored in `.aop/features/<id>/checkpoints/<checkpoint_id>.diff`
|
|
852
|
+
- Validation results recorded per checkpoint
|
|
853
|
+
- Final diff captured on agent completion
|
|
854
|
+
- Checkpoint metadata includes:
|
|
855
|
+
- `checkpoint_id` (UUID)
|
|
856
|
+
- `timestamp` (ISO 8601)
|
|
857
|
+
- `files_changed` (array of paths)
|
|
858
|
+
- `validation_status` (valid/invalid/skipped)
|
|
859
|
+
- `violations` (array of error messages)
|
|
860
|
+
- `severity` (info/warning/error/critical)
|
|
861
|
+
- `diff_snapshot` (path to diff file)
|
|
862
|
+
|
|
863
|
+
**Audit trail location:**
|
|
864
|
+
|
|
865
|
+
```
|
|
866
|
+
.aop/features/<feature_id>/
|
|
867
|
+
├── state.md (includes checkpoints array)
|
|
868
|
+
└── checkpoints/
|
|
869
|
+
├── checkpoint-001.diff
|
|
870
|
+
├── checkpoint-002.diff
|
|
871
|
+
└── checkpoint-003.diff
|
|
872
|
+
```
|
|
873
|
+
|
|
874
|
+
### 4.3 Validation Enforcement
|
|
875
|
+
|
|
876
|
+
**Both modes enforce:**
|
|
877
|
+
|
|
878
|
+
- Plan constraints (allowed_areas, forbidden_areas)
|
|
879
|
+
- Policy constraints (patch_policy, protected_areas)
|
|
880
|
+
- Lock constraints (contracts require held locks)
|
|
881
|
+
- Path rules (RBAC per path)
|
|
882
|
+
|
|
883
|
+
**Difference:**
|
|
884
|
+
|
|
885
|
+
- **Deterministic:** Validation before application (gate-keeping)
|
|
886
|
+
- **Interactive:** Validation at checkpoints (monitoring)
|
|
887
|
+
|
|
888
|
+
**Interactive mode violation handling:**
|
|
889
|
+
|
|
890
|
+
1. **Notify agent:** Send message via `WorkerProvider.sendMessage` with:
|
|
891
|
+
- Violation details (which files, which constraints violated)
|
|
892
|
+
- Severity level (info/warning/error/critical) from `violation_severity` config
|
|
893
|
+
- Suggested remediation actions
|
|
894
|
+
2. **Optional revert:** If `revert_on_violation: true`, run `git checkout -- <files>` to undo changes
|
|
895
|
+
3. **Block merge:** Feature cannot reach `ready_to_merge` if any checkpoint has `validation_status: invalid` with severity >= `error`
|
|
896
|
+
|
|
897
|
+
**Severity level behavior:**
|
|
898
|
+
|
|
899
|
+
- `info`: Log only, no blocking
|
|
900
|
+
- `warning`: Notify agent, no blocking (default)
|
|
901
|
+
- `error`: Notify agent, block merge
|
|
902
|
+
- `critical`: Notify agent, block merge, optionally revert
|
|
903
|
+
|
|
904
|
+
---
|
|
905
|
+
|
|
906
|
+
## 5. Migration Path
|
|
907
|
+
|
|
908
|
+
### 5.1 Phase 1: Schema and Config (Week 1)
|
|
909
|
+
|
|
910
|
+
- Add `execution_mode` to `agents.schema.json`
|
|
911
|
+
- Add `interactive` config to `agents.schema.json`
|
|
912
|
+
- Add `execution_mode` and `checkpoints` to `state.schema.json`
|
|
913
|
+
- Update `agents.yaml` with default `execution_mode: deterministic`
|
|
914
|
+
- Add CLI flag `--execution-mode`
|
|
915
|
+
|
|
916
|
+
### 5.2 Phase 2: Watchdog and Checkpoint Services (Week 2)
|
|
917
|
+
|
|
918
|
+
- Implement `WorktreeWatchdogService`
|
|
919
|
+
- Implement `CheckpointService`
|
|
920
|
+
- Extract `PatchService.validateDiff` for reuse
|
|
921
|
+
- Add unit tests (>= 90% coverage)
|
|
922
|
+
|
|
923
|
+
### 5.3 Phase 3: Interactive Mode Execution (Week 3)
|
|
924
|
+
|
|
925
|
+
- Add `runInteractiveWorker` to `WorkerDecisionLoop`
|
|
926
|
+
- Update `SupervisorRuntime.resolveExecutionMode`
|
|
927
|
+
- Update provider implementations to support `working_directory`
|
|
928
|
+
- Add integration tests
|
|
929
|
+
|
|
930
|
+
### 5.4 Phase 4: Audit Trail and Dashboard (Week 4)
|
|
931
|
+
|
|
932
|
+
- Store checkpoint diffs in `.aop/features/<id>/checkpoints/`
|
|
933
|
+
- Update dashboard to display checkpoints (new tab in RuntimeInspector)
|
|
934
|
+
- Add checkpoint timeline view
|
|
935
|
+
- Add validation status indicators
|
|
936
|
+
|
|
937
|
+
### 5.5 Phase 5: Documentation and Rollout (Week 5)
|
|
938
|
+
|
|
939
|
+
- Update README with execution mode documentation
|
|
940
|
+
- Update AGENTS.md and CLAUDE.md
|
|
941
|
+
- Add example configurations for interactive mode
|
|
942
|
+
- Update `aop init` wizard to ask about execution mode preference
|
|
943
|
+
|
|
944
|
+
---
|
|
945
|
+
|
|
946
|
+
## 6. Testing Strategy
|
|
947
|
+
|
|
948
|
+
### 6.1 Unit Tests
|
|
949
|
+
|
|
950
|
+
**WorktreeWatchdogService:**
|
|
951
|
+
|
|
952
|
+
- Detects file creation
|
|
953
|
+
- Detects file modification
|
|
954
|
+
- Detects file deletion
|
|
955
|
+
- Tracks change count correctly
|
|
956
|
+
- Resets change count on demand
|
|
957
|
+
|
|
958
|
+
**CheckpointService:**
|
|
959
|
+
|
|
960
|
+
- Creates checkpoint with valid diff
|
|
961
|
+
- Validates diff against plan
|
|
962
|
+
- Records checkpoint in state
|
|
963
|
+
- Stores diff snapshot in correct location
|
|
964
|
+
- Handles validation failures
|
|
965
|
+
|
|
966
|
+
**PatchService.validateDiff:**
|
|
967
|
+
|
|
968
|
+
- Rejects paths outside allowed_areas
|
|
969
|
+
- Rejects paths in forbidden_areas
|
|
970
|
+
- Requires locks for contract modifications
|
|
971
|
+
- Enforces protected_areas
|
|
972
|
+
|
|
973
|
+
### 6.2 Integration Tests
|
|
974
|
+
|
|
975
|
+
**Deterministic Mode (Regression):**
|
|
976
|
+
|
|
977
|
+
- Existing tests continue to pass
|
|
978
|
+
- No behavior changes
|
|
979
|
+
|
|
980
|
+
**Interactive Mode:**
|
|
981
|
+
|
|
982
|
+
- Agent spawns with correct working directory
|
|
983
|
+
- Watchdog detects agent file changes
|
|
984
|
+
- Checkpoints created at intervals
|
|
985
|
+
- Validation runs at checkpoints
|
|
986
|
+
- Violations notify agent
|
|
987
|
+
- Final checkpoint validates before commit
|
|
988
|
+
- Audit trail captured correctly
|
|
989
|
+
|
|
990
|
+
### 6.3 End-to-End Tests
|
|
991
|
+
|
|
992
|
+
**Scenario 1: Interactive mode with valid changes**
|
|
993
|
+
|
|
994
|
+
1. Start feature with `--execution-mode interactive`
|
|
995
|
+
2. Agent modifies files in worktree
|
|
996
|
+
3. Checkpoints validate successfully
|
|
997
|
+
4. Feature completes with all changes committed
|
|
998
|
+
|
|
999
|
+
**Scenario 2: Interactive mode with policy violation**
|
|
1000
|
+
|
|
1001
|
+
1. Start feature with `--execution-mode interactive`
|
|
1002
|
+
2. Agent modifies file outside allowed_areas
|
|
1003
|
+
3. Checkpoint validation fails
|
|
1004
|
+
4. Agent receives violation notification
|
|
1005
|
+
5. Agent corrects violation
|
|
1006
|
+
6. Next checkpoint validates successfully
|
|
1007
|
+
|
|
1008
|
+
**Scenario 3: Mode switching**
|
|
1009
|
+
|
|
1010
|
+
1. Start feature in deterministic mode
|
|
1011
|
+
2. Update state with `execution_mode: interactive`
|
|
1012
|
+
3. Resume feature
|
|
1013
|
+
4. Execution switches to interactive mode
|
|
1014
|
+
|
|
1015
|
+
---
|
|
1016
|
+
|
|
1017
|
+
## 7. Acceptance Criteria
|
|
1018
|
+
|
|
1019
|
+
### 7.1 Must Have
|
|
1020
|
+
|
|
1021
|
+
- [x] Schema changes for `execution_mode` and `checkpoints`
|
|
1022
|
+
- [x] CLI flag `--execution-mode` implemented
|
|
1023
|
+
- [x] `WorktreeWatchdogService` implemented
|
|
1024
|
+
- [x] `CheckpointService` implemented
|
|
1025
|
+
- [x] `PatchService.validateDiff` extracted and reused
|
|
1026
|
+
- [x] `WorkerDecisionLoop.runInteractiveWorker` implemented
|
|
1027
|
+
- [x] `SupervisorRuntime.resolveExecutionMode` implemented
|
|
1028
|
+
- [x] Provider interface supports `working_directory`
|
|
1029
|
+
- [x] Checkpoint diffs stored in `.aop/features/<id>/checkpoints/`
|
|
1030
|
+
- [x] Validation enforced at checkpoints
|
|
1031
|
+
- [x] Audit trail complete for both modes
|
|
1032
|
+
- [x] All existing tests pass (deterministic mode regression)
|
|
1033
|
+
- [x] New tests for interactive mode (>= 90% coverage)
|
|
1034
|
+
- [x] TypeScript strict mode passes
|
|
1035
|
+
- [x] ESLint zero warnings
|
|
1036
|
+
|
|
1037
|
+
### 7.2 Should Have
|
|
1038
|
+
|
|
1039
|
+
- [x] Dashboard displays checkpoints in RuntimeInspector
|
|
1040
|
+
- [x] Checkpoint timeline view
|
|
1041
|
+
- [x] Validation status indicators
|
|
1042
|
+
- [x] Agent notification on violation
|
|
1043
|
+
- [x] Optional auto-revert on violation
|
|
1044
|
+
- [x] Documentation in README
|
|
1045
|
+
- [x] Example configurations
|
|
1046
|
+
|
|
1047
|
+
### 7.3 Nice to Have
|
|
1048
|
+
|
|
1049
|
+
- [x] Real-time checkpoint streaming to dashboard
|
|
1050
|
+
- [x] Diff viewer for checkpoint snapshots
|
|
1051
|
+
- [x] Checkpoint comparison (diff between checkpoints)
|
|
1052
|
+
- [x] Checkpoint rollback command (`aop rollback --checkpoint <id>`)
|
|
1053
|
+
- [x] Interactive mode performance metrics
|
|
1054
|
+
|
|
1055
|
+
---
|
|
1056
|
+
|
|
1057
|
+
## 8. Open Questions
|
|
1058
|
+
|
|
1059
|
+
### 8.1 Checkpoint Frequency ✅ RESOLVED
|
|
1060
|
+
|
|
1061
|
+
**Question:** What is the optimal checkpoint interval?
|
|
1062
|
+
|
|
1063
|
+
**Options:**
|
|
1064
|
+
|
|
1065
|
+
- Time-based: Every 30 seconds (default)
|
|
1066
|
+
- Change-based: Every 50 file changes (default)
|
|
1067
|
+
- Hybrid: Whichever comes first
|
|
1068
|
+
|
|
1069
|
+
**Decision:** Hybrid approach with configurable thresholds.
|
|
1070
|
+
|
|
1071
|
+
- `checkpoint_interval_ms: 30000` (time trigger)
|
|
1072
|
+
- `max_uncommitted_changes: 50` (change trigger)
|
|
1073
|
+
- Checkpoint created when either threshold is reached
|
|
1074
|
+
|
|
1075
|
+
### 8.2 Violation Handling ✅ RESOLVED
|
|
1076
|
+
|
|
1077
|
+
**Question:** Should violations auto-revert or just notify?
|
|
1078
|
+
|
|
1079
|
+
**Options:**
|
|
1080
|
+
|
|
1081
|
+
- **Notify only:** Agent receives message, decides how to fix
|
|
1082
|
+
- **Auto-revert:** Supervisor reverts changes, agent starts fresh
|
|
1083
|
+
- **Configurable:** User chooses via `revert_on_violation` flag
|
|
1084
|
+
|
|
1085
|
+
**Decision:** Configurable with default `false` (notify only).
|
|
1086
|
+
|
|
1087
|
+
**Severity levels added:**
|
|
1088
|
+
|
|
1089
|
+
- `violation_severity: info | warning | error | critical`
|
|
1090
|
+
- Default: `warning`
|
|
1091
|
+
- Behavior varies by severity (see Section 4.3)
|
|
1092
|
+
|
|
1093
|
+
### 8.3 Provider Compatibility ✅ RESOLVED
|
|
1094
|
+
|
|
1095
|
+
**Question:** Which providers support interactive mode?
|
|
1096
|
+
|
|
1097
|
+
**Analysis:**
|
|
1098
|
+
|
|
1099
|
+
- **CliWorkerProvider:** Full support (can set `cwd`)
|
|
1100
|
+
- **ApiWorkerProvider:** Limited support (API may not honor `working_directory`)
|
|
1101
|
+
- **NullWorkerProvider:** N/A (stub)
|
|
1102
|
+
|
|
1103
|
+
**Decision:** Use recommendation - document provider compatibility in README. Warn users if provider doesn't support interactive mode.
|
|
1104
|
+
|
|
1105
|
+
### 8.4 Concurrent Checkpoints ✅ RESOLVED
|
|
1106
|
+
|
|
1107
|
+
**Question:** Can multiple features run in interactive mode simultaneously?
|
|
1108
|
+
|
|
1109
|
+
**Decision:** Yes, confirmed understanding is correct.
|
|
1110
|
+
|
|
1111
|
+
- Each feature has isolated worktree and watchdog instance
|
|
1112
|
+
- Watchdog service maintains map of `featureId -> WatcherInstance`
|
|
1113
|
+
- Checkpoint service handles concurrent validation requests
|
|
1114
|
+
- File system watchdog overhead scales linearly with active features
|
|
1115
|
+
- Monitor performance with `max_active_features > 5`
|
|
1116
|
+
|
|
1117
|
+
---
|
|
1118
|
+
|
|
1119
|
+
## 9. Risks and Mitigations
|
|
1120
|
+
|
|
1121
|
+
### 9.1 Risk: Race Conditions During Checkpoint Validation
|
|
1122
|
+
|
|
1123
|
+
**Problem:** Agent continues writing while checkpoint validation runs, causing:
|
|
1124
|
+
|
|
1125
|
+
- Validation of incomplete/inconsistent state
|
|
1126
|
+
- File corruption if revert happens mid-write
|
|
1127
|
+
- Duplicate checkpoints if change threshold triggers during time-based checkpoint
|
|
1128
|
+
|
|
1129
|
+
**Mitigation:**
|
|
1130
|
+
|
|
1131
|
+
- **Agent pause protocol:** Send `PAUSE` signal to agent before checkpoint, wait for `ACK`, then validate
|
|
1132
|
+
- **Debounce checkpoint triggers:** Minimum 5s between checkpoints regardless of trigger source
|
|
1133
|
+
- **File write detection:** Use `chokidar` `awaitWriteFinish` option (waits for file size to stabilize)
|
|
1134
|
+
- **Checkpoint queue:** Serialize checkpoint operations per feature (no concurrent checkpoints)
|
|
1135
|
+
- **Timeout:** If agent doesn't ACK pause within 10s, force checkpoint anyway and log warning
|
|
1136
|
+
|
|
1137
|
+
**Implementation:**
|
|
1138
|
+
|
|
1139
|
+
```typescript
|
|
1140
|
+
interface WorkerProvider {
|
|
1141
|
+
pauseAgent(featureId: string): Promise<void>; // Send PAUSE signal
|
|
1142
|
+
resumeAgent(featureId: string): Promise<void>; // Send RESUME signal
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
class CheckpointService {
|
|
1146
|
+
private checkpointLocks = new Map<string, Promise<void>>();
|
|
1147
|
+
|
|
1148
|
+
async createCheckpoint(featureId: string): Promise<CheckpointResult> {
|
|
1149
|
+
// Serialize checkpoints per feature
|
|
1150
|
+
const existingCheckpoint = this.checkpointLocks.get(featureId);
|
|
1151
|
+
if (existingCheckpoint) {
|
|
1152
|
+
await existingCheckpoint;
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
const checkpointPromise = this._doCheckpoint(featureId);
|
|
1156
|
+
this.checkpointLocks.set(featureId, checkpointPromise);
|
|
1157
|
+
|
|
1158
|
+
try {
|
|
1159
|
+
return await checkpointPromise;
|
|
1160
|
+
} finally {
|
|
1161
|
+
this.checkpointLocks.delete(featureId);
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
private async _doCheckpoint(featureId: string): Promise<CheckpointResult> {
|
|
1166
|
+
// 1. Pause agent
|
|
1167
|
+
await this.provider.pauseAgent(featureId).catch(() => {
|
|
1168
|
+
this.logger.warn(`Agent ${featureId} did not acknowledge pause`);
|
|
1169
|
+
});
|
|
1170
|
+
|
|
1171
|
+
// 2. Wait for file writes to stabilize (100ms)
|
|
1172
|
+
await this.waitForStableFileSystem(featureId);
|
|
1173
|
+
|
|
1174
|
+
// 3. Capture diff
|
|
1175
|
+
const diff = await this.captureDiff(featureId);
|
|
1176
|
+
|
|
1177
|
+
// 4. Validate
|
|
1178
|
+
const validation = await this.validateDiff(featureId, diff);
|
|
1179
|
+
|
|
1180
|
+
// 5. Resume agent
|
|
1181
|
+
await this.provider.resumeAgent(featureId);
|
|
1182
|
+
|
|
1183
|
+
return validation;
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
```
|
|
1187
|
+
|
|
1188
|
+
### 9.2 Risk: Validation After Write (Too Late)
|
|
1189
|
+
|
|
1190
|
+
**Problem:** Interactive mode validates AFTER agent writes files. If validation fails:
|
|
1191
|
+
|
|
1192
|
+
- Agent has already made dependent changes based on invalid state
|
|
1193
|
+
- Revert is destructive and loses agent's work
|
|
1194
|
+
- No atomic rollback guarantee
|
|
1195
|
+
|
|
1196
|
+
**Mitigation:**
|
|
1197
|
+
|
|
1198
|
+
- **Shadow workspace:** Agent writes to shadow directory, validation promotes to real worktree
|
|
1199
|
+
- **Incremental validation:** Validate each file as it's written (watchdog triggers per-file validation)
|
|
1200
|
+
- **Validation cache:** Cache validation results per file to avoid re-validating unchanged files
|
|
1201
|
+
- **Partial revert:** Only revert files that violate constraints, keep valid changes
|
|
1202
|
+
- **Agent guidance:** Send pre-emptive warnings when agent attempts to write to suspicious paths
|
|
1203
|
+
|
|
1204
|
+
**Implementation (Shadow Workspace):**
|
|
1205
|
+
|
|
1206
|
+
```typescript
|
|
1207
|
+
class InteractiveExecutionService {
|
|
1208
|
+
async runInteractiveWorker(featureId: string): Promise<void> {
|
|
1209
|
+
const realWorktree = this.kernel.worktreePath(featureId);
|
|
1210
|
+
const shadowWorktree = `${realWorktree}.shadow`;
|
|
1211
|
+
|
|
1212
|
+
// 1. Create shadow workspace (copy of real worktree)
|
|
1213
|
+
await fs.cp(realWorktree, shadowWorktree, { recursive: true });
|
|
1214
|
+
|
|
1215
|
+
// 2. Agent works in shadow
|
|
1216
|
+
await this.provider.runWorker({
|
|
1217
|
+
working_directory: shadowWorktree,
|
|
1218
|
+
// ...
|
|
1219
|
+
});
|
|
1220
|
+
|
|
1221
|
+
// 3. Validate shadow changes
|
|
1222
|
+
const diff = await this.git.diff(realWorktree, shadowWorktree);
|
|
1223
|
+
const validation = await this.validateDiff(featureId, diff);
|
|
1224
|
+
|
|
1225
|
+
if (validation.valid) {
|
|
1226
|
+
// 4. Promote shadow to real (atomic)
|
|
1227
|
+
await fs.rm(realWorktree, { recursive: true });
|
|
1228
|
+
await fs.rename(shadowWorktree, realWorktree);
|
|
1229
|
+
} else {
|
|
1230
|
+
// 5. Discard shadow, notify agent
|
|
1231
|
+
await fs.rm(shadowWorktree, { recursive: true });
|
|
1232
|
+
await this.notifyAgent(featureId, validation.violations);
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
```
|
|
1237
|
+
|
|
1238
|
+
**Alternative (Incremental Validation):**
|
|
1239
|
+
|
|
1240
|
+
```typescript
|
|
1241
|
+
class WorktreeWatchdogService {
|
|
1242
|
+
async startWatching(featureId: string): Promise<void> {
|
|
1243
|
+
const watcher = chokidar.watch(worktreePath, {
|
|
1244
|
+
awaitWriteFinish: { stabilityThreshold: 100, pollInterval: 50 },
|
|
1245
|
+
});
|
|
1246
|
+
|
|
1247
|
+
watcher.on('change', async (path) => {
|
|
1248
|
+
// Validate single file immediately
|
|
1249
|
+
const validation = await this.validateSingleFile(featureId, path);
|
|
1250
|
+
|
|
1251
|
+
if (!validation.valid) {
|
|
1252
|
+
// Immediate feedback to agent
|
|
1253
|
+
await this.notifyAgent(featureId, validation.violations, 'critical');
|
|
1254
|
+
|
|
1255
|
+
if (this.config.revert_on_violation) {
|
|
1256
|
+
// Revert single file only
|
|
1257
|
+
await this.git.checkout(featureId, path);
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
});
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
```
|
|
1264
|
+
|
|
1265
|
+
### 9.3 Risk: Agent Modifies Files Outside Worktree
|
|
1266
|
+
|
|
1267
|
+
**Problem:** Agent could escape worktree via:
|
|
1268
|
+
|
|
1269
|
+
- Symlinks (`ln -s /etc/passwd ./passwd`)
|
|
1270
|
+
- Parent directory traversal (`../../sensitive-file`)
|
|
1271
|
+
- Absolute paths (`/etc/passwd`)
|
|
1272
|
+
- `.git` directory modification
|
|
1273
|
+
|
|
1274
|
+
**Mitigation:**
|
|
1275
|
+
|
|
1276
|
+
- **Symlink detection:** Watchdog rejects symlink creation immediately
|
|
1277
|
+
- **Path canonicalization:** Resolve all paths to absolute, verify they start with worktree prefix
|
|
1278
|
+
- **`.git` protection:** Watchdog ignores `.git` directory, validation rejects any `.git` changes
|
|
1279
|
+
- **Filesystem sandbox (advanced):** Run agent in container with bind-mount to worktree only
|
|
1280
|
+
|
|
1281
|
+
**Implementation:**
|
|
1282
|
+
|
|
1283
|
+
```typescript
|
|
1284
|
+
class WorktreeWatchdogService {
|
|
1285
|
+
private async validatePath(featureId: string, filePath: string): Promise<boolean> {
|
|
1286
|
+
const worktreePath = this.kernel.worktreePath(featureId);
|
|
1287
|
+
const canonicalPath = path.resolve(filePath);
|
|
1288
|
+
|
|
1289
|
+
// 1. Must be inside worktree
|
|
1290
|
+
if (!canonicalPath.startsWith(worktreePath)) {
|
|
1291
|
+
await this.notifyAgent(
|
|
1292
|
+
featureId,
|
|
1293
|
+
[`Path escape detected: ${filePath} is outside worktree`],
|
|
1294
|
+
'critical',
|
|
1295
|
+
);
|
|
1296
|
+
return false;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
// 2. Must not be .git directory
|
|
1300
|
+
if (canonicalPath.includes('/.git/')) {
|
|
1301
|
+
await this.notifyAgent(
|
|
1302
|
+
featureId,
|
|
1303
|
+
[`Git directory modification blocked: ${filePath}`],
|
|
1304
|
+
'critical',
|
|
1305
|
+
);
|
|
1306
|
+
return false;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
// 3. Must not be symlink
|
|
1310
|
+
const stats = await fs.lstat(canonicalPath);
|
|
1311
|
+
if (stats.isSymbolicLink()) {
|
|
1312
|
+
await this.notifyAgent(featureId, [`Symlink creation blocked: ${filePath}`], 'critical');
|
|
1313
|
+
// Auto-remove symlink
|
|
1314
|
+
await fs.unlink(canonicalPath);
|
|
1315
|
+
return false;
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
return true;
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
```
|
|
1322
|
+
|
|
1323
|
+
### 9.4 Risk: Validation Overhead in Interactive Mode
|
|
1324
|
+
|
|
1325
|
+
**Problem:** Every checkpoint runs full `git diff` + validation. With large changesets:
|
|
1326
|
+
|
|
1327
|
+
- `git diff` on 1000 files takes seconds
|
|
1328
|
+
- Parsing 1000 diffs is CPU-intensive
|
|
1329
|
+
- Validating 1000 paths against complex plan patterns is slow
|
|
1330
|
+
- Checkpoint blocks agent progress
|
|
1331
|
+
|
|
1332
|
+
**Mitigation:**
|
|
1333
|
+
|
|
1334
|
+
- **Incremental diff:** Only diff files changed since last checkpoint (use watchdog's changed file list)
|
|
1335
|
+
- **Validation cache:** Cache validation results per file path + plan version
|
|
1336
|
+
- **Validation budget:** Timeout validation after 5s, log warning, allow agent to continue
|
|
1337
|
+
- **Async validation:** Run validation in background, don't block agent (notify on completion)
|
|
1338
|
+
- **Sampling:** For large changesets (>100 files), validate random sample + high-risk files only
|
|
1339
|
+
|
|
1340
|
+
**Implementation:**
|
|
1341
|
+
|
|
1342
|
+
```typescript
|
|
1343
|
+
class CheckpointService {
|
|
1344
|
+
private validationCache = new Map<string, ValidationResult>();
|
|
1345
|
+
|
|
1346
|
+
async validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult> {
|
|
1347
|
+
const plan = await this.loadPlan(featureId);
|
|
1348
|
+
const parsedDiff = this.parseDiff(diff);
|
|
1349
|
+
const changedFiles = parsedDiff.map((d) => d.path);
|
|
1350
|
+
|
|
1351
|
+
// Check cache, validate uncached files with timeout
|
|
1352
|
+
const validationPromise = this.validateFiles(featureId, changedFiles, plan);
|
|
1353
|
+
const timeoutPromise = new Promise<ValidationResult>((resolve) =>
|
|
1354
|
+
setTimeout(
|
|
1355
|
+
() =>
|
|
1356
|
+
resolve({
|
|
1357
|
+
valid: false,
|
|
1358
|
+
violations: ['Validation timeout after 5s'],
|
|
1359
|
+
severity: 'warning',
|
|
1360
|
+
}),
|
|
1361
|
+
5000,
|
|
1362
|
+
),
|
|
1363
|
+
);
|
|
1364
|
+
|
|
1365
|
+
return await Promise.race([validationPromise, timeoutPromise]);
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1368
|
+
private async validateFiles(featureId: string, files: string[], plan: Plan) {
|
|
1369
|
+
// Sampling for large changesets (>100 files)
|
|
1370
|
+
if (files.length > 100) {
|
|
1371
|
+
const highRiskFiles = files.filter(
|
|
1372
|
+
(f) => f.includes('schema') || f.includes('contract') || f.includes('migration'),
|
|
1373
|
+
);
|
|
1374
|
+
const sampledFiles = this.randomSample(files, 20);
|
|
1375
|
+
files = [...new Set([...highRiskFiles, ...sampledFiles])];
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
return await pMap(files, (file) => this.validateSingleFile(featureId, file, plan), {
|
|
1379
|
+
concurrency: 10,
|
|
1380
|
+
});
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
```
|
|
1384
|
+
|
|
1385
|
+
### 9.5 Risk: Incomplete Audit Trail
|
|
1386
|
+
|
|
1387
|
+
**Problem:** Checkpoints only capture state at intervals. Between checkpoints:
|
|
1388
|
+
|
|
1389
|
+
- No record of intermediate states
|
|
1390
|
+
- No record of which files were attempted but reverted
|
|
1391
|
+
- No record of agent's decision process
|
|
1392
|
+
- Cannot reconstruct exact sequence of changes
|
|
1393
|
+
|
|
1394
|
+
**Mitigation:**
|
|
1395
|
+
|
|
1396
|
+
- **Continuous event log:** Watchdog logs every file change event with timestamp
|
|
1397
|
+
- **Agent action log:** Capture agent's tool calls and responses (even in interactive mode)
|
|
1398
|
+
- **Validation history:** Log all validation attempts (pass/fail) with details
|
|
1399
|
+
- **Checkpoint chain:** Each checkpoint references previous checkpoint (linked list)
|
|
1400
|
+
- **Diff accumulation:** Store both incremental diffs (since last checkpoint) and cumulative diffs (since start)
|
|
1401
|
+
|
|
1402
|
+
**Implementation:**
|
|
1403
|
+
|
|
1404
|
+
```typescript
|
|
1405
|
+
interface CheckpointMetadata {
|
|
1406
|
+
checkpoint_id: string;
|
|
1407
|
+
timestamp: string;
|
|
1408
|
+
previous_checkpoint_id: string | null; // Chain checkpoints
|
|
1409
|
+
files_changed_since_last: string[]; // Incremental
|
|
1410
|
+
files_changed_total: string[]; // Cumulative
|
|
1411
|
+
diff_incremental: string; // Path to incremental diff
|
|
1412
|
+
diff_cumulative: string; // Path to cumulative diff
|
|
1413
|
+
validation_status: 'valid' | 'invalid' | 'skipped';
|
|
1414
|
+
violations: string[];
|
|
1415
|
+
severity: ViolationSeverity;
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
interface FileChangeEvent {
|
|
1419
|
+
timestamp: string;
|
|
1420
|
+
event_type: 'add' | 'change' | 'unlink';
|
|
1421
|
+
file_path: string;
|
|
1422
|
+
file_size: number;
|
|
1423
|
+
checkpoint_id: string | null; // Which checkpoint captured this change
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
class WorktreeWatchdogService {
|
|
1427
|
+
private eventLog = new Map<string, FileChangeEvent[]>();
|
|
1428
|
+
|
|
1429
|
+
async startWatching(featureId: string): Promise<void> {
|
|
1430
|
+
const watcher = chokidar.watch(worktreePath);
|
|
1431
|
+
|
|
1432
|
+
watcher.on('all', (event, path) => {
|
|
1433
|
+
const changeEvent: FileChangeEvent = {
|
|
1434
|
+
timestamp: new Date().toISOString(),
|
|
1435
|
+
event_type: event,
|
|
1436
|
+
file_path: path,
|
|
1437
|
+
file_size: fs.statSync(path).size,
|
|
1438
|
+
checkpoint_id: null, // Will be set when checkpoint captures it
|
|
1439
|
+
};
|
|
1440
|
+
|
|
1441
|
+
this.eventLog.get(featureId)?.push(changeEvent);
|
|
1442
|
+
|
|
1443
|
+
// Persist event log continuously
|
|
1444
|
+
this.persistEventLog(featureId);
|
|
1445
|
+
});
|
|
1446
|
+
}
|
|
1447
|
+
|
|
1448
|
+
private async persistEventLog(featureId: string): Promise<void> {
|
|
1449
|
+
const logPath = `.aop/features/${featureId}/logs/file-changes.jsonl`;
|
|
1450
|
+
const events = this.eventLog.get(featureId) || [];
|
|
1451
|
+
|
|
1452
|
+
// Append to JSONL file (one JSON object per line)
|
|
1453
|
+
await fs.appendFile(logPath, events.map((e) => JSON.stringify(e)).join('\n') + '\n');
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
```
|
|
1457
|
+
|
|
1458
|
+
### 9.6 Risk: Provider Incompatibility
|
|
1459
|
+
|
|
1460
|
+
**Problem:** Not all providers support interactive mode:
|
|
1461
|
+
|
|
1462
|
+
- API-based providers (Claude API, Codex API) cannot set `cwd`
|
|
1463
|
+
- Some CLI providers don't respect working directory
|
|
1464
|
+
- Agent may not understand pause/resume protocol
|
|
1465
|
+
|
|
1466
|
+
**Mitigation:**
|
|
1467
|
+
|
|
1468
|
+
- **Provider capability detection:** Query provider for `supportsInteractiveMode` before starting
|
|
1469
|
+
- **Automatic fallback:** If provider doesn't support interactive, fall back to deterministic mode
|
|
1470
|
+
- **Clear error messages:** Warn user if they request interactive mode with incompatible provider
|
|
1471
|
+
- **Provider adapter interface:** Standardize pause/resume/working_directory contract
|
|
1472
|
+
|
|
1473
|
+
**Implementation:**
|
|
1474
|
+
|
|
1475
|
+
```typescript
|
|
1476
|
+
interface WorkerProviderCapabilities {
|
|
1477
|
+
supportsInteractiveMode: boolean;
|
|
1478
|
+
supportsWorkingDirectory: boolean;
|
|
1479
|
+
supportsPauseResume: boolean;
|
|
1480
|
+
supportsMessagePassing: boolean;
|
|
1481
|
+
}
|
|
1482
|
+
|
|
1483
|
+
interface WorkerProvider {
|
|
1484
|
+
getCapabilities(): WorkerProviderCapabilities;
|
|
1485
|
+
// ... existing methods
|
|
1486
|
+
}
|
|
1487
|
+
|
|
1488
|
+
class SupervisorRuntime {
|
|
1489
|
+
private resolveExecutionMode(featureId: string, requestedMode: ExecutionMode): ExecutionMode {
|
|
1490
|
+
const capabilities = this.provider.getCapabilities();
|
|
1491
|
+
|
|
1492
|
+
if (requestedMode === 'interactive' && !capabilities.supportsInteractiveMode) {
|
|
1493
|
+
this.logger.warn(
|
|
1494
|
+
`Provider ${this.provider.name} does not support interactive mode, ` +
|
|
1495
|
+
`falling back to deterministic mode`,
|
|
1496
|
+
);
|
|
1497
|
+
return 'deterministic';
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
return requestedMode;
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
```
|
|
1504
|
+
|
|
1505
|
+
### 9.7 Risk: Security - Agent Escapes Worktree
|
|
1506
|
+
|
|
1507
|
+
**Problem:** Agent with direct file system access could:
|
|
1508
|
+
|
|
1509
|
+
- Write malicious code that executes before checkpoint
|
|
1510
|
+
- Modify `.git` directory to corrupt repository
|
|
1511
|
+
- Create symlinks to escape worktree
|
|
1512
|
+
- Exhaust disk space with large files
|
|
1513
|
+
- Execute arbitrary commands via shell scripts
|
|
1514
|
+
|
|
1515
|
+
**Mitigation:**
|
|
1516
|
+
|
|
1517
|
+
- **Path validation:** Reject any file operations outside worktree (see 9.3)
|
|
1518
|
+
- **`.git` protection:** Watchdog blocks all `.git` modifications immediately
|
|
1519
|
+
- **Disk quota:** Set file size limits per feature (e.g., 1GB max)
|
|
1520
|
+
- **Executable detection:** Flag creation of executable files for review
|
|
1521
|
+
- **Sandboxing (advanced):** Run agent in Docker container with read-only bind mounts except worktree
|
|
1522
|
+
|
|
1523
|
+
**Implementation:**
|
|
1524
|
+
|
|
1525
|
+
```typescript
|
|
1526
|
+
class WorktreeWatchdogService {
|
|
1527
|
+
private diskUsage = new Map<string, number>();
|
|
1528
|
+
private readonly MAX_DISK_USAGE = 1024 * 1024 * 1024; // 1GB
|
|
1529
|
+
|
|
1530
|
+
async startWatching(featureId: string): Promise<void> {
|
|
1531
|
+
const watcher = chokidar.watch(worktreePath);
|
|
1532
|
+
|
|
1533
|
+
watcher.on('add', async (path) => {
|
|
1534
|
+
// 1. Path validation
|
|
1535
|
+
if (!(await this.validatePath(featureId, path))) {
|
|
1536
|
+
await fs.unlink(path);
|
|
1537
|
+
return;
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
// 2. Disk quota
|
|
1541
|
+
const stats = await fs.stat(path);
|
|
1542
|
+
const currentUsage = this.diskUsage.get(featureId) || 0;
|
|
1543
|
+
|
|
1544
|
+
if (currentUsage + stats.size > this.MAX_DISK_USAGE) {
|
|
1545
|
+
await this.notifyAgent(
|
|
1546
|
+
featureId,
|
|
1547
|
+
[
|
|
1548
|
+
`Disk quota exceeded: ${currentUsage + stats.size} bytes > ${this.MAX_DISK_USAGE} bytes`,
|
|
1549
|
+
],
|
|
1550
|
+
'critical',
|
|
1551
|
+
);
|
|
1552
|
+
await fs.unlink(path);
|
|
1553
|
+
return;
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
this.diskUsage.set(featureId, currentUsage + stats.size);
|
|
1557
|
+
|
|
1558
|
+
// 3. Executable detection
|
|
1559
|
+
if (stats.mode & 0o111) {
|
|
1560
|
+
// Has execute bit
|
|
1561
|
+
await this.notifyAgent(
|
|
1562
|
+
featureId,
|
|
1563
|
+
[`Executable file created: ${path} - requires review`],
|
|
1564
|
+
'warning',
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
});
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
```
|
|
1571
|
+
|
|
1572
|
+
### 9.8 Risk: No Graceful Degradation
|
|
1573
|
+
|
|
1574
|
+
**Problem:** If watchdog fails, checkpoint service fails, or validation times out:
|
|
1575
|
+
|
|
1576
|
+
- Agent continues running blind (no monitoring)
|
|
1577
|
+
- Changes accumulate without validation
|
|
1578
|
+
- System state becomes inconsistent
|
|
1579
|
+
- No clear recovery path
|
|
1580
|
+
|
|
1581
|
+
**Mitigation:**
|
|
1582
|
+
|
|
1583
|
+
- **Circuit breakers:** After 3 consecutive checkpoint failures, pause agent and escalate to human
|
|
1584
|
+
- **Health checks:** Periodic health checks for watchdog, checkpoint service, validation service
|
|
1585
|
+
- **Fallback mode:** If interactive mode fails, automatically switch to deterministic mode
|
|
1586
|
+
- **Agent timeout:** If agent doesn't complete within max_execution_time, force checkpoint and terminate
|
|
1587
|
+
- **Recovery protocol:** On service failure, capture current state, notify human, wait for manual intervention
|
|
1588
|
+
|
|
1589
|
+
**Implementation:**
|
|
1590
|
+
|
|
1591
|
+
```typescript
|
|
1592
|
+
class InteractiveExecutionService {
|
|
1593
|
+
private checkpointFailures = new Map<string, number>();
|
|
1594
|
+
private readonly MAX_CHECKPOINT_FAILURES = 3;
|
|
1595
|
+
|
|
1596
|
+
async runInteractiveWorker(featureId: string): Promise<void> {
|
|
1597
|
+
try {
|
|
1598
|
+
// Health check before starting
|
|
1599
|
+
await this.healthCheck();
|
|
1600
|
+
|
|
1601
|
+
// Start agent with timeout
|
|
1602
|
+
const agentPromise = this.provider.runWorker({...});
|
|
1603
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
1604
|
+
setTimeout(() => reject(new Error('Agent timeout')),
|
|
1605
|
+
this.config.max_execution_time_ms)
|
|
1606
|
+
);
|
|
1607
|
+
|
|
1608
|
+
await Promise.race([agentPromise, timeoutPromise]);
|
|
1609
|
+
|
|
1610
|
+
} catch (error) {
|
|
1611
|
+
// Circuit breaker check
|
|
1612
|
+
const failures = this.checkpointFailures.get(featureId) || 0;
|
|
1613
|
+
|
|
1614
|
+
if (failures >= this.MAX_CHECKPOINT_FAILURES) {
|
|
1615
|
+
await this.escalateToHuman(featureId, error);
|
|
1616
|
+
throw new Error('Interactive mode failed, human intervention required');
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
// Fallback to deterministic mode
|
|
1620
|
+
this.logger.warn(
|
|
1621
|
+
`Interactive mode failed for ${featureId}, falling back to deterministic mode`
|
|
1622
|
+
);
|
|
1623
|
+
|
|
1624
|
+
await this.runDeterministicWorker(featureId);
|
|
1625
|
+
}
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
private async healthCheck(): Promise<void> {
|
|
1629
|
+
const checks = [
|
|
1630
|
+
this.watchdog.healthCheck(),
|
|
1631
|
+
this.checkpointService.healthCheck(),
|
|
1632
|
+
this.validationService.healthCheck(),
|
|
1633
|
+
];
|
|
1634
|
+
|
|
1635
|
+
const results = await Promise.allSettled(checks);
|
|
1636
|
+
const failures = results.filter(r => r.status === 'rejected');
|
|
1637
|
+
|
|
1638
|
+
if (failures.length > 0) {
|
|
1639
|
+
throw new Error(`Health check failed: ${failures.length} services unhealthy`);
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1643
|
+
private async escalateToHuman(featureId: string, error: Error): Promise<void> {
|
|
1644
|
+
// 1. Capture current state
|
|
1645
|
+
const state = await this.captureState(featureId);
|
|
1646
|
+
|
|
1647
|
+
// 2. Pause agent
|
|
1648
|
+
await this.provider.pauseAgent(featureId);
|
|
1649
|
+
|
|
1650
|
+
// 3. Notify human via all channels
|
|
1651
|
+
await this.notificationService.send({
|
|
1652
|
+
severity: 'critical',
|
|
1653
|
+
title: `Interactive mode failure: ${featureId}`,
|
|
1654
|
+
message: `Agent execution failed after ${this.MAX_CHECKPOINT_FAILURES} checkpoint failures`,
|
|
1655
|
+
details: { error: error.message, state },
|
|
1656
|
+
actions: [
|
|
1657
|
+
{ label: 'Review State', url: `/dashboard/features/${featureId}` },
|
|
1658
|
+
{ label: 'Force Checkpoint', action: 'force_checkpoint' },
|
|
1659
|
+
{ label: 'Terminate Agent', action: 'terminate' },
|
|
1660
|
+
],
|
|
1661
|
+
});
|
|
1662
|
+
|
|
1663
|
+
// 4. Update feature state to BLOCKED
|
|
1664
|
+
await this.kernel.updateFeatureState(featureId, {
|
|
1665
|
+
status: 'blocked',
|
|
1666
|
+
blocked_reason: 'interactive_mode_failure',
|
|
1667
|
+
requires_human_intervention: true,
|
|
1668
|
+
});
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
```
|
|
1672
|
+
|
|
1673
|
+
### 9.9 Risk: Concurrent Feature Resource Contention
|
|
1674
|
+
|
|
1675
|
+
- Fallback to deterministic mode if provider doesn't support `working_directory`
|
|
1676
|
+
|
|
1677
|
+
---
|
|
1678
|
+
|
|
1679
|
+
### 9.9 Risk: Concurrent Feature Resource Contention
|
|
1680
|
+
|
|
1681
|
+
**Problem:** Multiple features in interactive mode simultaneously:
|
|
1682
|
+
|
|
1683
|
+
- Watchdog service is singleton with shared state
|
|
1684
|
+
- Checkpoint service has no concurrency control
|
|
1685
|
+
- Validation service could be bottlenecked
|
|
1686
|
+
- File system events could be misattributed
|
|
1687
|
+
|
|
1688
|
+
**Mitigation:**
|
|
1689
|
+
|
|
1690
|
+
- **Per-feature service instances:** Each feature gets isolated watchdog/checkpoint instances
|
|
1691
|
+
- **Resource pooling:** Limit concurrent interactive features (e.g., max 5)
|
|
1692
|
+
- **Event attribution:** Watchdog uses separate `chokidar` instance per feature
|
|
1693
|
+
- **Validation queue:** Serialize validation requests with priority queue
|
|
1694
|
+
- **Backpressure:** If validation queue is full, pause low-priority features
|
|
1695
|
+
|
|
1696
|
+
**Implementation:**
|
|
1697
|
+
|
|
1698
|
+
```typescript
|
|
1699
|
+
class WorktreeWatchdogService {
|
|
1700
|
+
private watchers = new Map<string, FSWatcher>(); // Per-feature watchers
|
|
1701
|
+
private eventLogs = new Map<string, FileChangeEvent[]>();
|
|
1702
|
+
|
|
1703
|
+
async startWatching(featureId: string): Promise<void> {
|
|
1704
|
+
const worktreePath = this.kernel.worktreePath(featureId);
|
|
1705
|
+
const watcher = chokidar.watch(worktreePath, {
|
|
1706
|
+
ignored: /(^|[\/\\])\../,
|
|
1707
|
+
persistent: true,
|
|
1708
|
+
ignoreInitial: true,
|
|
1709
|
+
});
|
|
1710
|
+
|
|
1711
|
+
this.watchers.set(featureId, watcher);
|
|
1712
|
+
this.eventLogs.set(featureId, []);
|
|
1713
|
+
|
|
1714
|
+
watcher.on('all', (event, path) => {
|
|
1715
|
+
this.handleFileChange(featureId, event, path);
|
|
1716
|
+
});
|
|
1717
|
+
}
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
class CheckpointService {
|
|
1721
|
+
private validationQueue = new PQueue({ concurrency: 3 });
|
|
1722
|
+
|
|
1723
|
+
async validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult> {
|
|
1724
|
+
return this.validationQueue.add(() => this._validateCheckpoint(featureId, diff), {
|
|
1725
|
+
priority: this.getFeaturePriority(featureId),
|
|
1726
|
+
});
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
```
|
|
1730
|
+
|
|
1731
|
+
---
|
|
1732
|
+
|
|
1733
|
+
## 10. Performance Requirements
|
|
1734
|
+
|
|
1735
|
+
### 10.1 Latency Budgets
|
|
1736
|
+
|
|
1737
|
+
| Operation | Target | Maximum | Degradation Strategy |
|
|
1738
|
+
| ------------------------- | ------- | ------- | ---------------------------- |
|
|
1739
|
+
| Checkpoint creation | < 500ms | 2s | Skip if timeout, log warning |
|
|
1740
|
+
| Single file validation | < 50ms | 200ms | Use cached result if timeout |
|
|
1741
|
+
| Full diff validation | < 1s | 5s | Sample validation if timeout |
|
|
1742
|
+
| Agent pause/resume | < 100ms | 500ms | Force checkpoint if no ACK |
|
|
1743
|
+
| Watchdog event processing | < 10ms | 50ms | Buffer events if slow |
|
|
1744
|
+
|
|
1745
|
+
### 10.2 Throughput Requirements
|
|
1746
|
+
|
|
1747
|
+
| Metric | Target | Maximum |
|
|
1748
|
+
| -------------------------------- | --------------- | --------------- |
|
|
1749
|
+
| File changes per second | 100 | 500 |
|
|
1750
|
+
| Concurrent interactive features | 5 | 10 |
|
|
1751
|
+
| Checkpoints per feature per hour | 120 (every 30s) | 360 (every 10s) |
|
|
1752
|
+
| Validation cache hit rate | > 80% | N/A |
|
|
1753
|
+
|
|
1754
|
+
### 10.3 Resource Limits
|
|
1755
|
+
|
|
1756
|
+
| Resource | Limit per Feature | Limit Global |
|
|
1757
|
+
| --------------------- | ----------------- | ------------ |
|
|
1758
|
+
| Disk usage | 1 GB | 10 GB |
|
|
1759
|
+
| Memory (watchdog) | 50 MB | 500 MB |
|
|
1760
|
+
| CPU (validation) | 10% | 50% |
|
|
1761
|
+
| Open file descriptors | 1000 | 10000 |
|
|
1762
|
+
|
|
1763
|
+
### 10.4 Monitoring and Alerts
|
|
1764
|
+
|
|
1765
|
+
**Metrics to track:**
|
|
1766
|
+
|
|
1767
|
+
- Checkpoint latency (p50, p95, p99)
|
|
1768
|
+
- Validation latency (p50, p95, p99)
|
|
1769
|
+
- Watchdog event processing latency
|
|
1770
|
+
- Validation cache hit rate
|
|
1771
|
+
- Checkpoint failure rate
|
|
1772
|
+
- Agent pause/resume success rate
|
|
1773
|
+
- Disk usage per feature
|
|
1774
|
+
- Memory usage per watchdog instance
|
|
1775
|
+
|
|
1776
|
+
**Alerts:**
|
|
1777
|
+
|
|
1778
|
+
- Checkpoint latency > 2s for 3 consecutive checkpoints
|
|
1779
|
+
- Validation failure rate > 10%
|
|
1780
|
+
- Disk usage > 80% of limit
|
|
1781
|
+
- Memory usage > 80% of limit
|
|
1782
|
+
- Watchdog event queue depth > 1000
|
|
1783
|
+
|
|
1784
|
+
---
|
|
1785
|
+
|
|
1786
|
+
## 11. Success Metrics
|
|
1787
|
+
|
|
1788
|
+
### 10.1 Functional Metrics
|
|
1789
|
+
|
|
1790
|
+
- [x] Both execution modes pass all tests
|
|
1791
|
+
- [x] No regressions in deterministic mode
|
|
1792
|
+
- [x] Interactive mode enforces all plan/policy/lock constraints
|
|
1793
|
+
- [x] Audit trail complete for both modes
|
|
1794
|
+
|
|
1795
|
+
### 10.2 Quality Metrics
|
|
1796
|
+
|
|
1797
|
+
- [ ] Agent output quality improves in interactive mode (subjective, user feedback)
|
|
1798
|
+
- [ ] Iteration speed increases in interactive mode (measured by time-to-completion)
|
|
1799
|
+
- [ ] Context window usage decreases in interactive mode (fewer `repo.read_file` calls)
|
|
1800
|
+
|
|
1801
|
+
### 10.3 Performance Metrics
|
|
1802
|
+
|
|
1803
|
+
- [ ] Checkpoint validation completes in < 500ms
|
|
1804
|
+
- [ ] Watchdog overhead < 5% CPU per active feature
|
|
1805
|
+
- [ ] No memory leaks in long-running interactive sessions
|
|
1806
|
+
|
|
1807
|
+
---
|
|
1808
|
+
|
|
1809
|
+
## 11. Future Enhancements
|
|
1810
|
+
|
|
1811
|
+
### 11.1 Hybrid Mode
|
|
1812
|
+
|
|
1813
|
+
**Concept:** Agent can choose execution mode per task.
|
|
1814
|
+
|
|
1815
|
+
**Example:**
|
|
1816
|
+
|
|
1817
|
+
- Use deterministic mode for critical contract changes (requires explicit approval)
|
|
1818
|
+
- Use interactive mode for implementation details (faster iteration)
|
|
1819
|
+
|
|
1820
|
+
### 11.2 Checkpoint Branching
|
|
1821
|
+
|
|
1822
|
+
**Concept:** Create git branches at checkpoints for easy rollback.
|
|
1823
|
+
|
|
1824
|
+
**Benefit:** Agent can experiment freely, supervisor can revert to any checkpoint.
|
|
1825
|
+
|
|
1826
|
+
### 11.3 Multi-Agent Interactive Mode
|
|
1827
|
+
|
|
1828
|
+
**Concept:** Multiple agents work in same worktree with conflict resolution.
|
|
1829
|
+
|
|
1830
|
+
**Challenge:** Requires sophisticated merge logic and coordination.
|
|
1831
|
+
|
|
1832
|
+
### 11.4 Real-Time Collaboration
|
|
1833
|
+
|
|
1834
|
+
**Concept:** Human operator can edit files in worktree while agent is running.
|
|
1835
|
+
|
|
1836
|
+
**Benefit:** Pair programming with AI agent.
|
|
1837
|
+
|
|
1838
|
+
**Challenge:** Requires conflict detection and resolution.
|
|
1839
|
+
|
|
1840
|
+
---
|
|
1841
|
+
|
|
1842
|
+
## 12. Appendix
|
|
1843
|
+
|
|
1844
|
+
### 12.1 Related Specifications
|
|
1845
|
+
|
|
1846
|
+
- **Shadow Workspace Implementation:** [Shadow Workspace Implementation Specification](../outstanding/shadow_workspace_implementation_spec.md) - Detailed implementation of shadow workspace strategy for validation-before-write guarantees
|
|
1847
|
+
- **Runtime Inspection:** [Runtime Inspection Specification](./agentic_orchestrator_runtime_inspection_spec.md) - Dashboard integration for execution mode monitoring
|
|
1848
|
+
|
|
1849
|
+
### 12.2 File System Watchdog Libraries
|
|
1850
|
+
|
|
1851
|
+
**Options:**
|
|
1852
|
+
|
|
1853
|
+
- `chokidar` - Cross-platform, battle-tested, 20M+ downloads/week
|
|
1854
|
+
- `fs.watch` - Native Node.js, no dependencies
|
|
1855
|
+
- `watchman` - Facebook's file watching service (requires external daemon)
|
|
1856
|
+
|
|
1857
|
+
**Recommendation:** `chokidar` for reliability and cross-platform support.
|
|
1858
|
+
|
|
1859
|
+
### 12.2 Checkpoint Storage Format
|
|
1860
|
+
|
|
1861
|
+
**Diff snapshot format:** Unified diff (same as `git diff` output)
|
|
1862
|
+
|
|
1863
|
+
**Metadata format:** JSON in `state.md` frontmatter
|
|
1864
|
+
|
|
1865
|
+
**Example:**
|
|
1866
|
+
|
|
1867
|
+
```yaml
|
|
1868
|
+
checkpoints:
|
|
1869
|
+
- checkpoint_id: 'ckpt-001-a3f2b9c4'
|
|
1870
|
+
timestamp: '2026-03-05T16:28:19.503Z'
|
|
1871
|
+
files_changed: ['src/app.ts', 'src/utils.ts']
|
|
1872
|
+
validation_status: 'valid'
|
|
1873
|
+
violations: []
|
|
1874
|
+
severity: 'info'
|
|
1875
|
+
diff_snapshot: '.aop/features/my_feature/checkpoints/ckpt-001-a3f2b9c4.diff'
|
|
1876
|
+
- checkpoint_id: 'ckpt-002-b7e4c1d9'
|
|
1877
|
+
timestamp: '2026-03-05T16:29:05.127Z'
|
|
1878
|
+
files_changed: ['src/config.ts']
|
|
1879
|
+
validation_status: 'invalid'
|
|
1880
|
+
violations: ["Path 'src/config.ts' not in allowed_areas"]
|
|
1881
|
+
severity: 'warning'
|
|
1882
|
+
diff_snapshot: '.aop/features/my_feature/checkpoints/ckpt-002-b7e4c1d9.diff'
|
|
1883
|
+
```
|
|
1884
|
+
|
|
1885
|
+
### 12.3 Validation Reuse
|
|
1886
|
+
|
|
1887
|
+
**Shared validation logic:**
|
|
1888
|
+
|
|
1889
|
+
- `PatchService.validateDiff(featureId, parsedDiff)`
|
|
1890
|
+
- Used by:
|
|
1891
|
+
- `PatchService.repoApplyPatch` (deterministic mode)
|
|
1892
|
+
- `CheckpointService.validateCheckpoint` (interactive mode)
|
|
1893
|
+
|
|
1894
|
+
**Validation steps:**
|
|
1895
|
+
|
|
1896
|
+
1. Parse diff into file operations (create/modify/delete)
|
|
1897
|
+
2. Load accepted plan
|
|
1898
|
+
3. Validate paths against `allowed_areas` and `forbidden_areas`
|
|
1899
|
+
4. Validate locks held for contract modifications
|
|
1900
|
+
5. Validate against `protected_areas` and `path_rules`
|
|
1901
|
+
6. Return validation result with violations array
|
|
1902
|
+
|
|
1903
|
+
---
|
|
1904
|
+
|
|
1905
|
+
**End of Specification**
|