agentic-orchestrator 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/AGENTS.md +2 -2
  2. package/CLAUDE.md +2 -2
  3. package/README.md +47 -14
  4. package/agentic/orchestrator/agents.yaml +13 -0
  5. package/agentic/orchestrator/policy.yaml +3 -0
  6. package/agentic/orchestrator/schemas/agents.schema.json +76 -0
  7. package/agentic/orchestrator/schemas/policy.schema.json +16 -0
  8. package/agentic/orchestrator/schemas/policy.user.schema.json +16 -0
  9. package/agentic/orchestrator/schemas/state.schema.json +53 -0
  10. package/apps/control-plane/src/application/configuration-service.ts +181 -0
  11. package/apps/control-plane/src/application/kernel-tool-wiring.ts +292 -0
  12. package/apps/control-plane/src/application/services/checkpoint-service.ts +523 -0
  13. package/apps/control-plane/src/application/services/feature-send-message-service.ts +132 -0
  14. package/apps/control-plane/src/application/services/patch-service.ts +29 -5
  15. package/apps/control-plane/src/application/services/repo-operations-service.ts +276 -0
  16. package/apps/control-plane/src/application/services/worktree-watchdog-service.ts +156 -0
  17. package/apps/control-plane/src/cli/cli-argument-parser.ts +12 -0
  18. package/apps/control-plane/src/cli/help-command-handler.ts +17 -0
  19. package/apps/control-plane/src/cli/init-command-handler.ts +31 -0
  20. package/apps/control-plane/src/cli/resume-command-handler.ts +31 -4
  21. package/apps/control-plane/src/cli/rollback-command-handler.ts +217 -0
  22. package/apps/control-plane/src/cli/run-command-handler.ts +8 -0
  23. package/apps/control-plane/src/cli/types.ts +3 -0
  24. package/apps/control-plane/src/core/kernel-types.ts +55 -0
  25. package/apps/control-plane/src/core/kernel.ts +61 -878
  26. package/apps/control-plane/src/core/tool-caller.ts +10 -0
  27. package/apps/control-plane/src/core/utils/field-readers.ts +38 -0
  28. package/apps/control-plane/src/core/utils/index-normalizer.ts +119 -0
  29. package/apps/control-plane/src/core/utils/path-normalizers.ts +22 -0
  30. package/apps/control-plane/src/interfaces/cli/bootstrap.ts +15 -0
  31. package/apps/control-plane/src/providers/api-worker-provider.ts +14 -12
  32. package/apps/control-plane/src/providers/cli-worker-provider.ts +82 -12
  33. package/apps/control-plane/src/providers/providers.ts +45 -24
  34. package/apps/control-plane/src/providers/worker-provider-factory.ts +36 -1
  35. package/apps/control-plane/src/supervisor/run-coordinator.ts +91 -36
  36. package/apps/control-plane/src/supervisor/runtime.ts +107 -1
  37. package/apps/control-plane/src/supervisor/types.ts +9 -0
  38. package/apps/control-plane/src/supervisor/worker-decision-loop.ts +253 -14
  39. package/apps/control-plane/test/checkpoint-service.spec.ts +537 -0
  40. package/apps/control-plane/test/cli-helpers.spec.ts +28 -0
  41. package/apps/control-plane/test/cli.unit.spec.ts +52 -0
  42. package/apps/control-plane/test/configuration-service.spec.ts +466 -0
  43. package/apps/control-plane/test/dashboard-api.integration.spec.ts +537 -0
  44. package/apps/control-plane/test/dashboard-client.spec.ts +233 -0
  45. package/apps/control-plane/test/feature-send-message-service.spec.ts +314 -0
  46. package/apps/control-plane/test/init-wizard.spec.ts +35 -0
  47. package/apps/control-plane/test/path-normalizers.spec.ts +41 -0
  48. package/apps/control-plane/test/repo-operations-service.spec.ts +339 -0
  49. package/apps/control-plane/test/resume-command.spec.ts +33 -0
  50. package/apps/control-plane/test/review-workspace-logic.spec.ts +130 -0
  51. package/apps/control-plane/test/rollback-command.spec.ts +208 -0
  52. package/apps/control-plane/test/run-coordinator.spec.ts +119 -0
  53. package/apps/control-plane/test/worker-decision-loop.spec.ts +209 -0
  54. package/apps/control-plane/test/worker-provider-adapters.spec.ts +102 -0
  55. package/apps/control-plane/test/worker-provider-factory.spec.ts +14 -0
  56. package/apps/control-plane/test/worktree-watchdog-service.spec.ts +147 -0
  57. package/config/agentic/orchestrator/agents.yaml +13 -0
  58. package/dist/apps/control-plane/application/configuration-service.d.ts +19 -0
  59. package/dist/apps/control-plane/application/configuration-service.js +123 -0
  60. package/dist/apps/control-plane/application/configuration-service.js.map +1 -0
  61. package/dist/apps/control-plane/application/kernel-tool-wiring.d.ts +39 -0
  62. package/dist/apps/control-plane/application/kernel-tool-wiring.js +38 -0
  63. package/dist/apps/control-plane/application/kernel-tool-wiring.js.map +1 -0
  64. package/dist/apps/control-plane/application/services/checkpoint-service.d.ts +84 -0
  65. package/dist/apps/control-plane/application/services/checkpoint-service.js +367 -0
  66. package/dist/apps/control-plane/application/services/checkpoint-service.js.map +1 -0
  67. package/dist/apps/control-plane/application/services/feature-send-message-service.d.ts +25 -0
  68. package/dist/apps/control-plane/application/services/feature-send-message-service.js +105 -0
  69. package/dist/apps/control-plane/application/services/feature-send-message-service.js.map +1 -0
  70. package/dist/apps/control-plane/application/services/patch-service.d.ts +6 -0
  71. package/dist/apps/control-plane/application/services/patch-service.js +11 -2
  72. package/dist/apps/control-plane/application/services/patch-service.js.map +1 -1
  73. package/dist/apps/control-plane/application/services/repo-operations-service.d.ts +70 -0
  74. package/dist/apps/control-plane/application/services/repo-operations-service.js +213 -0
  75. package/dist/apps/control-plane/application/services/repo-operations-service.js.map +1 -0
  76. package/dist/apps/control-plane/application/services/worktree-watchdog-service.d.ts +23 -0
  77. package/dist/apps/control-plane/application/services/worktree-watchdog-service.js +119 -0
  78. package/dist/apps/control-plane/application/services/worktree-watchdog-service.js.map +1 -0
  79. package/dist/apps/control-plane/cli/cli-argument-parser.js +12 -0
  80. package/dist/apps/control-plane/cli/cli-argument-parser.js.map +1 -1
  81. package/dist/apps/control-plane/cli/help-command-handler.js +17 -0
  82. package/dist/apps/control-plane/cli/help-command-handler.js.map +1 -1
  83. package/dist/apps/control-plane/cli/init-command-handler.js +23 -0
  84. package/dist/apps/control-plane/cli/init-command-handler.js.map +1 -1
  85. package/dist/apps/control-plane/cli/resume-command-handler.js +25 -5
  86. package/dist/apps/control-plane/cli/resume-command-handler.js.map +1 -1
  87. package/dist/apps/control-plane/cli/rollback-command-handler.d.ts +6 -0
  88. package/dist/apps/control-plane/cli/rollback-command-handler.js +177 -0
  89. package/dist/apps/control-plane/cli/rollback-command-handler.js.map +1 -0
  90. package/dist/apps/control-plane/cli/run-command-handler.js +7 -1
  91. package/dist/apps/control-plane/cli/run-command-handler.js.map +1 -1
  92. package/dist/apps/control-plane/cli/types.d.ts +3 -0
  93. package/dist/apps/control-plane/cli/types.js +1 -0
  94. package/dist/apps/control-plane/cli/types.js.map +1 -1
  95. package/dist/apps/control-plane/core/configuration-service.d.ts +25 -0
  96. package/dist/apps/control-plane/core/configuration-service.js +130 -0
  97. package/dist/apps/control-plane/core/configuration-service.js.map +1 -0
  98. package/dist/apps/control-plane/core/kernel-tool-wiring.d.ts +50 -0
  99. package/dist/apps/control-plane/core/kernel-tool-wiring.js +44 -0
  100. package/dist/apps/control-plane/core/kernel-tool-wiring.js.map +1 -0
  101. package/dist/apps/control-plane/core/kernel-types.d.ts +48 -0
  102. package/dist/apps/control-plane/core/kernel-types.js +2 -0
  103. package/dist/apps/control-plane/core/kernel-types.js.map +1 -0
  104. package/dist/apps/control-plane/core/kernel.d.ts +17 -48
  105. package/dist/apps/control-plane/core/kernel.js +44 -539
  106. package/dist/apps/control-plane/core/kernel.js.map +1 -1
  107. package/dist/apps/control-plane/core/tool-caller.d.ts +10 -0
  108. package/dist/apps/control-plane/core/utils/error-normalizer.d.ts +2 -0
  109. package/dist/apps/control-plane/core/utils/error-normalizer.js +51 -0
  110. package/dist/apps/control-plane/core/utils/error-normalizer.js.map +1 -0
  111. package/dist/apps/control-plane/core/utils/field-readers.d.ts +9 -0
  112. package/dist/apps/control-plane/core/utils/field-readers.js +30 -0
  113. package/dist/apps/control-plane/core/utils/field-readers.js.map +1 -0
  114. package/dist/apps/control-plane/core/utils/index-normalizer.d.ts +7 -0
  115. package/dist/apps/control-plane/core/utils/index-normalizer.js +92 -0
  116. package/dist/apps/control-plane/core/utils/index-normalizer.js.map +1 -0
  117. package/dist/apps/control-plane/core/utils/path-normalizers.d.ts +2 -0
  118. package/dist/apps/control-plane/core/utils/path-normalizers.js +17 -0
  119. package/dist/apps/control-plane/core/utils/path-normalizers.js.map +1 -0
  120. package/dist/apps/control-plane/interfaces/cli/bootstrap.js +13 -1
  121. package/dist/apps/control-plane/interfaces/cli/bootstrap.js.map +1 -1
  122. package/dist/apps/control-plane/providers/api-worker-provider.d.ts +4 -13
  123. package/dist/apps/control-plane/providers/api-worker-provider.js +10 -0
  124. package/dist/apps/control-plane/providers/api-worker-provider.js.map +1 -1
  125. package/dist/apps/control-plane/providers/cli-worker-provider.d.ts +11 -13
  126. package/dist/apps/control-plane/providers/cli-worker-provider.js +64 -0
  127. package/dist/apps/control-plane/providers/cli-worker-provider.js.map +1 -1
  128. package/dist/apps/control-plane/providers/providers.d.ts +31 -24
  129. package/dist/apps/control-plane/providers/providers.js +10 -0
  130. package/dist/apps/control-plane/providers/providers.js.map +1 -1
  131. package/dist/apps/control-plane/providers/worker-provider-factory.d.ts +11 -0
  132. package/dist/apps/control-plane/providers/worker-provider-factory.js +20 -1
  133. package/dist/apps/control-plane/providers/worker-provider-factory.js.map +1 -1
  134. package/dist/apps/control-plane/supervisor/run-coordinator.d.ts +3 -0
  135. package/dist/apps/control-plane/supervisor/run-coordinator.js +81 -33
  136. package/dist/apps/control-plane/supervisor/run-coordinator.js.map +1 -1
  137. package/dist/apps/control-plane/supervisor/runtime.d.ts +8 -1
  138. package/dist/apps/control-plane/supervisor/runtime.js +90 -0
  139. package/dist/apps/control-plane/supervisor/runtime.js.map +1 -1
  140. package/dist/apps/control-plane/supervisor/types.d.ts +11 -0
  141. package/dist/apps/control-plane/supervisor/types.js.map +1 -1
  142. package/dist/apps/control-plane/supervisor/worker-decision-loop.d.ts +21 -1
  143. package/dist/apps/control-plane/supervisor/worker-decision-loop.js +207 -13
  144. package/dist/apps/control-plane/supervisor/worker-decision-loop.js.map +1 -1
  145. package/package.json +1 -1
  146. package/packages/web-dashboard/package.json +2 -0
  147. package/packages/web-dashboard/src/app/analytics/page.tsx +83 -2
  148. package/packages/web-dashboard/src/app/api/actions/route.ts +92 -1
  149. package/packages/web-dashboard/src/app/api/analytics/route.ts +5 -2
  150. package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/[checkpointId]/diff/route.ts +43 -0
  151. package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/compare/route.ts +45 -0
  152. package/packages/web-dashboard/src/app/api/features/[id]/checkpoints/stream/route.ts +170 -0
  153. package/packages/web-dashboard/src/app/api/features/[id]/file-diff/route.ts +144 -0
  154. package/packages/web-dashboard/src/app/api/features/[id]/log-stream/route.ts +167 -0
  155. package/packages/web-dashboard/src/app/api/features/[id]/raw-logs/[filename]/route.ts +65 -0
  156. package/packages/web-dashboard/src/app/api/features/[id]/raw-logs/route.ts +63 -0
  157. package/packages/web-dashboard/src/app/api/features/[id]/timeline/route.ts +60 -0
  158. package/packages/web-dashboard/src/app/feature/[id]/page.tsx +32 -11
  159. package/packages/web-dashboard/src/app/globals.css +2 -0
  160. package/packages/web-dashboard/src/components/detail-panel.tsx +483 -0
  161. package/packages/web-dashboard/src/components/review-workspace.tsx +1162 -0
  162. package/packages/web-dashboard/src/lib/aop-client.ts +725 -0
  163. package/packages/web-dashboard/src/lib/review-contracts.ts +182 -0
  164. package/packages/web-dashboard/src/lib/review-workspace-logic.ts +64 -0
  165. package/packages/web-dashboard/src/lib/types.ts +131 -0
  166. package/packages/web-dashboard/src/styles/dashboard.module.css +333 -0
  167. package/spec-files/completed/agentic_orchestrator_execution_mode_spec.md +1905 -0
  168. package/spec-files/outstanding/agentic_orchestrator_runtime_inspection_spec.md +940 -0
  169. package/spec-files/outstanding/execution_mode_critical_review.md +355 -0
  170. package/spec-files/outstanding/shadow_workspace_implementation_spec.md +1271 -0
  171. package/spec-files/outstanding/shadow_workspace_spec_summary.md +222 -0
  172. package/spec-files/progress.md +269 -1
@@ -0,0 +1,1905 @@
1
+ # Agentic Orchestrator Execution Mode Specification
2
+
3
+ **Status:** Completed
4
+ **Created:** 2026-03-05
5
+ **Author:** System
6
+ **Scope:** Architectural refactor to support configurable execution modes
7
+
8
+ ---
9
+
10
+ ## 0. Executive Summary
11
+
12
+ ### 0.1 Problem Statement
13
+
14
+ The current architecture enforces a **deterministic, API-first execution model** where agents:
15
+
16
+ - Receive context bundles via tool calls (`feature.get_context`, `repo.read_file`, `repo.search`)
17
+ - Return structured patches via `repo.apply_patch` tool calls
18
+ - Cannot directly modify files in the worktree
19
+ - Must isolate all changes in their context window
20
+
21
+ This approach guarantees determinism and auditability but **handicaps modern coding agents** (Claude, Codex, Copilot, Kiro-CLI) that work best when they can:
22
+
23
+ - Directly edit files in their working directory
24
+ - Use native file system operations
25
+ - Iterate rapidly without context window constraints
26
+ - Leverage their built-in file editing capabilities
27
+
28
+ ### 0.2 Proposed Solution
29
+
30
+ Refactor the orchestrator to support **two configurable execution modes**:
31
+
32
+ 1. **Deterministic Mode (API-first)** - Current behavior
33
+ - Agent receives context via tool calls
34
+ - Agent returns patches via `repo.apply_patch`
35
+ - Supervisor validates patches against plan/policy before applying
36
+ - Full audit trail of every change
37
+
38
+ 2. **Interactive Mode (Direct worktree access)** - New behavior
39
+ - Agent works directly in feature worktree (or shadow workspace)
40
+ - Agent modifies files using native file operations
41
+ - Supervisor monitors worktree with file system watchdog
42
+ - Periodic validation checkpoints instead of per-patch validation
43
+ - Supervisor captures diffs at checkpoints for audit trail
44
+ - **Two strategies:** Direct worktree (simple) or Shadow workspace (safe)
45
+
46
+ ### 0.3 Key Design Principles
47
+
48
+ - **Configurable, not exclusive:** Users choose mode per feature or globally
49
+ - **Preserve determinism guarantees:** Both modes enforce plan/policy/lock constraints
50
+ - **Backward compatible:** Existing deterministic workflows unchanged
51
+ - **Provider-agnostic:** Mode selection independent of provider (Claude, Codex, etc.)
52
+ - **Audit trail maintained:** Both modes produce complete change history
53
+ - **Safety first:** Shadow workspace strategy available for high-risk features
54
+ - **Graceful degradation:** Circuit breakers, fallbacks, and health checks throughout
55
+ - **Performance budgets:** Explicit latency/throughput targets with degradation strategies
56
+
57
+ ### 0.4 User Benefits
58
+
59
+ **For API-first providers (Codex API, Claude API):**
60
+
61
+ - Continue using deterministic mode
62
+ - No changes required
63
+
64
+ **For interactive CLI providers (Kiro-CLI, local Claude, Copilot):**
65
+
66
+ - Enable interactive mode
67
+ - Agent works naturally in worktree
68
+ - Higher quality output (agent uses native capabilities)
69
+ - Faster iteration (no context window bottleneck)
70
+ - Choose safety level (direct worktree vs shadow workspace)
71
+
72
+ ### 0.5 Critical Design Improvements (Post-Review)
73
+
74
+ This spec has been enhanced with critical mitigations for production readiness:
75
+
76
+ 1. **Race condition handling:** Agent pause/resume protocol with acknowledgment
77
+ 2. **Validation timing:** Shadow workspace strategy for validation-before-write
78
+ 3. **Rollback strategies:** Full, partial, and smart rollback capabilities
79
+ 4. **Security hardening:** Path validation, symlink blocking, disk quotas, `.git` protection
80
+ 5. **Performance budgets:** Explicit latency targets with timeout and degradation strategies
81
+ 6. **Graceful degradation:** Circuit breakers, health checks, automatic fallback to deterministic mode
82
+ 7. **Audit trail completeness:** Continuous event log, checkpoint chains, incremental + cumulative diffs
83
+ 8. **Concurrency control:** Per-feature service instances, validation queues, backpressure
84
+ 9. **Agent communication:** Structured message protocol with acknowledgment and retry
85
+ 10. **Monitoring and alerts:** Comprehensive metrics and alert thresholds
86
+
87
+ ---
88
+
89
+ ## 1. Current Architecture Analysis
90
+
91
+ ### 1.1 Current Execution Flow
92
+
93
+ ```
94
+ SupervisorRuntime
95
+ └─> WorkerDecisionLoop
96
+ └─> WorkerProvider.runWorker(context_bundle, instructions)
97
+ └─> Agent receives:
98
+ - feature.get_context (spec, state, plan, evidence)
99
+ - repo.read_file (specific files)
100
+ - repo.search (grep results)
101
+ └─> Agent returns:
102
+ - tool_calls: [{ name: "repo.apply_patch", args: { unified_diff: "..." } }]
103
+ └─> Supervisor validates patch:
104
+ - Plan enforcement (allowed_areas, forbidden_areas)
105
+ - Policy enforcement (patch_policy.enforce_allowed_areas)
106
+ - Lock enforcement (contracts require held locks)
107
+ - Path rules (protected_areas)
108
+ └─> Supervisor applies patch:
109
+ - git apply in feature worktree
110
+ - Update QA index
111
+ - Update feature state
112
+ ```
113
+
114
+ ### 1.2 Key Components
115
+
116
+ **WorkerProvider Interface** (`apps/control-plane/src/providers/providers.ts`)
117
+
118
+ - `runWorker(input)` - Executes agent with context bundle
119
+ - Returns structured output with `tool_calls` array
120
+ - Implementations: `CliWorkerProvider`, `ApiWorkerProvider`, `NullWorkerProvider`
121
+
122
+ **PatchService** (`apps/control-plane/src/application/services/patch-service.ts`)
123
+
124
+ - `repoApplyPatch(featureId, unifiedDiff)` - Validates and applies patches
125
+ - Enforces plan, policy, locks, path rules
126
+ - Updates QA index after successful apply
127
+
128
+ **WorkerDecisionLoop** (`apps/control-plane/src/supervisor/worker-decision-loop.ts`)
129
+
130
+ - Orchestrates agent execution cycles
131
+ - Handles tool call dispatch
132
+ - Detects stalls, loops, no-progress conditions
133
+
134
+ **ToolRuntime** (`apps/control-plane/src/mcp/tool-runtime.ts`)
135
+
136
+ - Validates tool inputs/outputs against schemas
137
+ - Enforces RBAC (role-based access control)
138
+ - Tracks operation_id for idempotency
139
+
140
+ ### 1.3 Constraints Enforced
141
+
142
+ **Plan Constraints:**
143
+
144
+ - `allowed_areas` - Paths agent may modify
145
+ - `forbidden_areas` - Paths agent must not touch
146
+ - `contracts` - Locks required (openapi, events, db)
147
+
148
+ **Policy Constraints:**
149
+
150
+ - `patch_policy.enforce_plan` - Require accepted plan before patches
151
+ - `patch_policy.enforce_allowed_areas` - Validate patch paths against plan
152
+ - `protected_areas` - Global read-only paths
153
+ - `path_rules` - Per-path RBAC rules
154
+
155
+ **Lock Constraints:**
156
+
157
+ - Features must hold locks for contract resources before modifying them
158
+ - Lock leases expire after TTL (default 300s)
159
+ - Heartbeat service renews leases during execution
160
+
161
+ ---
162
+
163
+ ## 2. Proposed Architecture
164
+
165
+ ### 2.1 Execution Mode Enum
166
+
167
+ **New type:** `ExecutionMode`
168
+
169
+ ```typescript
170
+ type ExecutionMode = 'deterministic' | 'interactive';
171
+ ```
172
+
173
+ **Configuration location:** `agents.yaml`
174
+
175
+ ```yaml
176
+ runtime:
177
+ execution_mode: deterministic # or 'interactive'
178
+ interactive:
179
+ strategy: direct_worktree # or 'shadow_workspace'
180
+ watchdog_poll_interval_ms: 2000
181
+ checkpoint_interval_ms: 30000 # Hybrid trigger: time-based
182
+ max_uncommitted_changes: 50 # Hybrid trigger: change-based
183
+ validation_on_checkpoint: true
184
+ revert_on_violation: false
185
+ violation_severity: warning # info | warning | error | critical
186
+ shadow_workspace:
187
+ enabled: false # Enable shadow workspace strategy
188
+ promotion_strategy: atomic # atomic | incremental
189
+ cleanup_on_failure: true # Delete shadow after validation failure
190
+ max_shadow_size_mb: 2048 # Disk quota for shadow workspace
191
+ ```
192
+
193
+ **CLI override:** `--execution-mode <deterministic|interactive>`
194
+
195
+ ### 2.2 Mode Selection Precedence
196
+
197
+ 1. CLI flag `--execution-mode`
198
+ 2. Feature-level override in `state.md` frontmatter (`execution_mode`)
199
+ 3. `agents.yaml runtime.execution_mode`
200
+ 4. Default: `deterministic`
201
+
202
+ ### 2.3 Deterministic Mode (Current Behavior)
203
+
204
+ **No changes required.** This is the existing implementation.
205
+
206
+ **Flow:**
207
+
208
+ 1. Agent receives context via tool calls
209
+ 2. Agent returns `repo.apply_patch` tool calls
210
+ 3. Supervisor validates patch before applying
211
+ 4. Supervisor applies patch with `git apply`
212
+ 5. Supervisor updates QA index and state
213
+
214
+ **Guarantees:**
215
+
216
+ - Every change validated before application
217
+ - Complete audit trail in operation ledger
218
+ - Atomic patch application
219
+ - Immediate policy/plan enforcement
220
+
221
+ ### 2.4 Interactive Mode (New Behavior)
222
+
223
+ **Two implementation strategies:**
224
+
225
+ #### Strategy A: Direct Worktree with Checkpoints (Proposed)
226
+
227
+ **Flow:**
228
+
229
+ 1. Supervisor spawns agent with `cwd` set to feature worktree
230
+ 2. Agent works directly in worktree (native file operations)
231
+ 3. Supervisor monitors worktree with file system watchdog
232
+ 4. Watchdog detects file changes in real-time
233
+ 5. At checkpoints (time-based or change-count-based):
234
+ - Supervisor pauses agent
235
+ - Supervisor captures `git diff` snapshot
236
+ - Supervisor validates diff against plan/policy/locks
237
+ - If validation fails: notify agent, optionally revert changes
238
+ - If validation passes: record checkpoint in audit log
239
+ - Supervisor resumes agent
240
+ 6. On agent completion:
241
+ - Final checkpoint validation
242
+ - Commit changes if valid
243
+ - Update QA index and state
244
+
245
+ **Pros:**
246
+
247
+ - Agent works in real worktree (no sync overhead)
248
+ - Simple implementation
249
+ - Fast iteration for agent
250
+
251
+ **Cons:**
252
+
253
+ - Validation happens after writes (risky)
254
+ - Revert is destructive
255
+ - Race conditions during checkpoint
256
+ - Agent could corrupt worktree before validation
257
+
258
+ #### Strategy B: Shadow Workspace with Promotion (Alternative - RECOMMENDED)
259
+
260
+ **See detailed specification:** [Shadow Workspace Implementation Specification](../outstanding/shadow_workspace_implementation_spec.md)
261
+
262
+ **Flow:**
263
+
264
+ 1. Supervisor creates shadow workspace (copy of feature worktree)
265
+ 2. Supervisor spawns agent with `cwd` set to shadow workspace
266
+ 3. Agent works in shadow (isolated from real worktree)
267
+ 4. Supervisor monitors shadow with file system watchdog
268
+ 5. At checkpoints:
269
+ - Supervisor pauses agent
270
+ - Supervisor captures diff between real worktree and shadow
271
+ - Supervisor validates diff
272
+ - If validation passes: promote shadow changes to real worktree (atomic)
273
+ - If validation fails: discard shadow changes, notify agent
274
+ - Supervisor resumes agent in fresh shadow
275
+ 6. On agent completion:
276
+ - Final validation
277
+ - Promote shadow to real worktree if valid
278
+ - Commit changes
279
+ - Update QA index and state
280
+
281
+ **Pros:**
282
+
283
+ - Validation before promotion (safe)
284
+ - Real worktree never corrupted
285
+ - Easy rollback (just discard shadow)
286
+ - No race conditions (shadow is isolated)
287
+
288
+ **Cons:**
289
+
290
+ - Disk space overhead (2x worktree size)
291
+ - Promotion overhead (copy operation)
292
+ - More complex implementation
293
+ - Agent must restart in fresh shadow after revert
294
+
295
+ **Recommendation:** Start with Strategy A (simpler), migrate to Strategy B if validation-after-write proves problematic in practice.
296
+
297
+ **Hybrid approach:** Use Strategy A by default, Strategy B for high-risk features (e.g., features modifying contracts or schemas).
298
+
299
+ **Key Differences from Deterministic Mode:**
300
+
301
+ - Agent has direct file system access
302
+ - Validation happens at checkpoints, not per-patch
303
+ - Supervisor is reactive (watchdog) instead of proactive (gate-keeping)
304
+ - Agent can iterate freely between checkpoints
305
+
306
+ **Guarantees:**
307
+
308
+ - Plan/policy/lock constraints enforced at checkpoints
309
+ - Audit trail captured at checkpoints (not per-change)
310
+ - Rollback capability if validation fails
311
+ - Final state is deterministic (validated before commit)
312
+
313
+ ---
314
+
315
+ ## 3. Implementation Plan
316
+
317
+ ### 3.1 Schema Changes
318
+
319
+ **File:** `agentic/orchestrator/schemas/agents.schema.json`
320
+
321
+ Add to `runtime` properties:
322
+
323
+ ```json
324
+ {
325
+ "execution_mode": {
326
+ "type": "string",
327
+ "enum": ["deterministic", "interactive"],
328
+ "default": "deterministic",
329
+ "description": "Execution mode for agent workers"
330
+ },
331
+ "interactive": {
332
+ "type": "object",
333
+ "description": "Configuration for interactive execution mode",
334
+ "properties": {
335
+ "watchdog_poll_interval_ms": {
336
+ "type": "number",
337
+ "default": 2000,
338
+ "description": "File system watchdog polling interval"
339
+ },
340
+ "checkpoint_interval_ms": {
341
+ "type": "number",
342
+ "default": 30000,
343
+ "description": "Time between validation checkpoints (hybrid trigger)"
344
+ },
345
+ "max_uncommitted_changes": {
346
+ "type": "number",
347
+ "default": 50,
348
+ "description": "Trigger checkpoint after N file changes (hybrid trigger)"
349
+ },
350
+ "validation_on_checkpoint": {
351
+ "type": "boolean",
352
+ "default": true,
353
+ "description": "Validate diffs at checkpoints"
354
+ },
355
+ "revert_on_violation": {
356
+ "type": "boolean",
357
+ "default": false,
358
+ "description": "Auto-revert changes that violate plan/policy"
359
+ },
360
+ "violation_severity": {
361
+ "type": "string",
362
+ "enum": ["info", "warning", "error", "critical"],
363
+ "default": "warning",
364
+ "description": "Severity level for violation notifications sent to agent"
365
+ }
366
+ }
367
+ }
368
+ }
369
+ ```
370
+
371
+ **File:** `agentic/orchestrator/schemas/state.schema.json`
372
+
373
+ Add to properties:
374
+
375
+ ```json
376
+ {
377
+ "execution_mode": {
378
+ "type": "string",
379
+ "enum": ["deterministic", "interactive"],
380
+ "description": "Execution mode override for this feature (optional)"
381
+ },
382
+ "checkpoints": {
383
+ "type": "array",
384
+ "description": "Validation checkpoints recorded during interactive mode",
385
+ "items": {
386
+ "type": "object",
387
+ "required": ["checkpoint_id", "timestamp", "files_changed", "validation_status"],
388
+ "properties": {
389
+ "checkpoint_id": { "type": "string" },
390
+ "timestamp": { "type": "string", "format": "date-time" },
391
+ "files_changed": { "type": "array", "items": { "type": "string" } },
392
+ "validation_status": { "type": "string", "enum": ["valid", "invalid", "skipped"] },
393
+ "violations": { "type": "array", "items": { "type": "string" } },
394
+ "severity": { "type": "string", "enum": ["info", "warning", "error", "critical"] },
395
+ "diff_snapshot": { "type": "string", "description": "Path to diff file" }
396
+ }
397
+ }
398
+ }
399
+ }
400
+ ```
401
+
402
+ ### 3.2 New Services
403
+
404
+ **File:** `apps/control-plane/src/application/services/worktree-watchdog-service.ts`
405
+
406
+ ```typescript
407
+ interface WorktreeWatchdogService {
408
+ startWatching(featureId: string): Promise<void>;
409
+ stopWatching(featureId: string): Promise<void>;
410
+ getChangedFiles(featureId: string): Promise<string[]>;
411
+ getChangeCount(featureId: string): number;
412
+ resetChangeCount(featureId: string): void;
413
+ on(event: 'changeThreshold', handler: (featureId: string) => void): void;
414
+ }
415
+ ```
416
+
417
+ **Responsibilities:**
418
+
419
+ - Monitor feature worktree for file changes
420
+ - Track changed file paths
421
+ - Emit `changeThreshold` event when `max_uncommitted_changes` reached
422
+ - Use `chokidar` for cross-platform file system monitoring
423
+ - Support concurrent monitoring of multiple feature worktrees
424
+
425
+ **File:** `apps/control-plane/src/application/services/checkpoint-service.ts`
426
+
427
+ ```typescript
428
+ interface CheckpointService {
429
+ createCheckpoint(featureId: string): Promise<CheckpointResult>;
430
+ validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult>;
431
+ recordCheckpoint(featureId: string, checkpoint: Checkpoint): Promise<void>;
432
+ getCheckpoints(featureId: string): Promise<Checkpoint[]>;
433
+ notifyAgent(featureId: string, violations: string[], severity: ViolationSeverity): Promise<void>;
434
+ rollbackToCheckpoint(featureId: string, checkpointId: string): Promise<void>;
435
+ rollbackFiles(featureId: string, filePaths: string[]): Promise<void>;
436
+ }
437
+
438
+ type ViolationSeverity = 'info' | 'warning' | 'error' | 'critical';
439
+
440
+ interface CheckpointResult {
441
+ checkpoint_id: string;
442
+ timestamp: string;
443
+ files_changed: string[];
444
+ diff_snapshot: string;
445
+ validation_status: 'valid' | 'invalid' | 'skipped';
446
+ violations: string[];
447
+ severity?: ViolationSeverity;
448
+ }
449
+ ```
450
+
451
+ **Responsibilities:**
452
+
453
+ - Capture `git diff` snapshots at checkpoints (triggered by time OR change count)
454
+ - Validate diffs against plan/policy/locks (reuse PatchService validation logic)
455
+ - Record checkpoint metadata in feature state
456
+ - Store diff snapshots in `.aop/features/<id>/checkpoints/`
457
+ - Notify agent of violations with configurable severity level
458
+ - Optionally revert changes if `revert_on_violation: true`
459
+ - **Rollback to specific checkpoint (full or partial)**
460
+ - **Rollback specific files only (surgical revert)**
461
+
462
+ **Rollback strategies:**
463
+
464
+ 1. **Full checkpoint rollback:** Restore worktree to exact state at checkpoint
465
+
466
+ ```typescript
467
+ async rollbackToCheckpoint(featureId: string, checkpointId: string): Promise<void> {
468
+ const checkpoint = await this.getCheckpoint(featureId, checkpointId);
469
+ const worktreePath = this.kernel.worktreePath(featureId);
470
+
471
+ // Apply inverse diff to restore state
472
+ const inverseDiff = this.invertDiff(checkpoint.diff_snapshot);
473
+ await this.git.apply(worktreePath, inverseDiff);
474
+
475
+ // Update state to reflect rollback
476
+ await this.kernel.updateFeatureState(featureId, {
477
+ last_checkpoint_id: checkpointId,
478
+ rollback_count: (state.rollback_count || 0) + 1,
479
+ });
480
+ }
481
+ ```
482
+
483
+ 2. **Partial file rollback:** Revert only specific files that violated constraints
484
+
485
+ ```typescript
486
+ async rollbackFiles(featureId: string, filePaths: string[]): Promise<void> {
487
+ const worktreePath = this.kernel.worktreePath(featureId);
488
+
489
+ // Get last valid checkpoint
490
+ const lastValidCheckpoint = await this.getLastValidCheckpoint(featureId);
491
+
492
+ for (const filePath of filePaths) {
493
+ // Extract file content from checkpoint diff
494
+ const fileContent = this.extractFileFromDiff(
495
+ lastValidCheckpoint.diff_snapshot,
496
+ filePath
497
+ );
498
+
499
+ // Restore file
500
+ await fs.writeFile(path.join(worktreePath, filePath), fileContent);
501
+ }
502
+
503
+ // Notify agent which files were reverted
504
+ await this.notifyAgent(featureId, [
505
+ `Reverted ${filePaths.length} files: ${filePaths.join(', ')}`
506
+ ], 'warning');
507
+ }
508
+ ```
509
+
510
+ 3. **Smart rollback:** Keep valid changes, revert only violations
511
+
512
+ ```typescript
513
+ async smartRollback(featureId: string, violations: ValidationViolation[]): Promise<void> {
514
+ // Group violations by file
515
+ const violatedFiles = new Set(violations.map(v => v.file_path));
516
+
517
+ // Get all changed files
518
+ const allChangedFiles = await this.watchdog.getChangedFiles(featureId);
519
+
520
+ // Rollback only violated files, keep valid changes
521
+ await this.rollbackFiles(featureId, Array.from(violatedFiles));
522
+
523
+ // Notify agent
524
+ await this.notifyAgent(featureId, [
525
+ `Reverted ${violatedFiles.size} files with violations`,
526
+ `Kept ${allChangedFiles.length - violatedFiles.size} valid changes`
527
+ ], 'warning');
528
+ }
529
+ ```
530
+
531
+ **Notification message format:**
532
+
533
+ ```json
534
+ {
535
+ "type": "checkpoint_violation",
536
+ "severity": "warning",
537
+ "checkpoint_id": "ckpt-002-b7e4c1d9",
538
+ "timestamp": "2026-03-05T16:29:05.127Z",
539
+ "violations": [
540
+ {
541
+ "file": "src/config.ts",
542
+ "constraint": "allowed_areas",
543
+ "message": "Path 'src/config.ts' not in allowed_areas",
544
+ "suggestion": "Add 'src/config.ts' to plan.allowed_areas or move changes to allowed path"
545
+ }
546
+ ],
547
+ "files_changed": ["src/config.ts"],
548
+ "action_taken": "none" // or "reverted" if revert_on_violation: true
549
+ }
550
+ ```
551
+
552
+ ### 3.3 Modified Services
553
+
554
+ **File:** `apps/control-plane/src/supervisor/worker-decision-loop.ts`
555
+
556
+ **Changes:**
557
+
558
+ - Add `executionMode` parameter to constructor
559
+ - Branch execution logic based on mode:
560
+ - **Deterministic:** Current behavior (tool call loop)
561
+ - **Interactive:** New behavior (spawn agent, start watchdog, checkpoint loop)
562
+
563
+ **New method:** `runInteractiveWorker(input: WorkerDecisionInput)`
564
+
565
+ ```typescript
566
+ async runInteractiveWorker(input: WorkerDecisionInput): Promise<WorkerDecisionResult> {
567
+ const { featureId, role, instructions } = input;
568
+
569
+ // 1. Start watchdog
570
+ await this.watchdog.startWatching(featureId);
571
+
572
+ // 2. Spawn agent with cwd = worktree
573
+ const worktreePath = this.kernel.worktreePath(featureId);
574
+ const session = await this.provider.createSession(role, featureId, systemPrompt);
575
+
576
+ // 3. Start checkpoint loop (hybrid: time-based OR change-based)
577
+ const checkpointTimer = setInterval(async () => {
578
+ await this.maybeCreateCheckpoint(featureId);
579
+ }, this.checkpointIntervalMs);
580
+
581
+ // Also trigger on change threshold
582
+ this.watchdog.on('changeThreshold', async (fid) => {
583
+ if (fid === featureId) {
584
+ await this.maybeCreateCheckpoint(featureId);
585
+ }
586
+ });
587
+
588
+ // 4. Run agent (non-blocking, agent works in worktree)
589
+ const result = await this.provider.runWorker({
590
+ role,
591
+ feature_id: featureId,
592
+ working_directory: worktreePath,
593
+ instructions,
594
+ execution_mode: 'interactive',
595
+ });
596
+
597
+ // 5. Stop checkpoint loop
598
+ clearInterval(checkpointTimer);
599
+
600
+ // 6. Final checkpoint
601
+ await this.checkpointService.createCheckpoint(featureId);
602
+
603
+ // 7. Stop watchdog
604
+ await this.watchdog.stopWatching(featureId);
605
+
606
+ return result;
607
+ }
608
+ ```
609
+
610
+ **File:** `apps/control-plane/src/application/services/patch-service.ts`
611
+
612
+ **Changes:**
613
+
614
+ - Extract validation logic into reusable method: `validateDiff(featureId, parsedDiff)`
615
+ - Used by both `repoApplyPatch` (deterministic) and `CheckpointService.validateCheckpoint` (interactive)
616
+
617
+ **New method:** `validateDiff(featureId: string, parsedDiff: ParsedDiff): Promise<ValidationResult>`
618
+
619
+ ```typescript
620
+ async validateDiff(featureId: string, parsedDiff: ParsedDiff): Promise<ValidationResult> {
621
+ const plan = await this.loadAcceptedPlan(featureId);
622
+
623
+ // Validate paths against plan
624
+ await this.validatePatchPaths(featureId, parsedDiff, plan);
625
+
626
+ // Validate locks held
627
+ await this.assertPlanLocksHeld(featureId, plan);
628
+
629
+ return { valid: true, violations: [] };
630
+ }
631
+ ```
632
+
633
+ ### 3.4 Provider Interface Changes
634
+
635
+ **File:** `apps/control-plane/src/providers/providers.ts`
636
+
637
+ **Extend `WorkerProvider` interface:**
638
+
639
+ ```typescript
640
+ interface WorkerProvider {
641
+ // Existing methods
642
+ runWorker(input: WorkerRunInput): Promise<WorkerRunOutput>;
643
+ createSession(role: string, featureId: string, systemPrompt: string): Promise<SessionInfo>;
644
+
645
+ // NEW: Interactive mode support
646
+ pauseAgent(featureId: string, timeout?: number): Promise<PauseAckResult>;
647
+ resumeAgent(featureId: string): Promise<void>;
648
+ sendMessage(featureId: string, message: AgentMessage): Promise<MessageAckResult>;
649
+ getCapabilities(): WorkerProviderCapabilities;
650
+ }
651
+
652
+ interface PauseAckResult {
653
+ acknowledged: boolean;
654
+ timeout: boolean;
655
+ latency_ms: number;
656
+ }
657
+
658
+ interface MessageAckResult {
659
+ delivered: boolean;
660
+ acknowledged: boolean;
661
+ agent_response?: string;
662
+ }
663
+
664
+ interface AgentMessage {
665
+ type: 'checkpoint_violation' | 'info' | 'warning' | 'error';
666
+ severity: ViolationSeverity;
667
+ content: string;
668
+ structured_data?: Record<string, unknown>;
669
+ requires_acknowledgment: boolean;
670
+ }
671
+
672
+ interface WorkerProviderCapabilities {
673
+ supportsInteractiveMode: boolean;
674
+ supportsWorkingDirectory: boolean;
675
+ supportsPauseResume: boolean;
676
+ supportsMessagePassing: boolean;
677
+ supportsAcknowledgment: boolean;
678
+ }
679
+ ```
680
+
681
+ **Extend `WorkerRunInput`:**
682
+
683
+ ```typescript
684
+ interface WorkerRunInput {
685
+ role: string;
686
+ feature_id: string;
687
+ context_bundle?: Record<string, unknown>;
688
+ instructions?: string;
689
+ last_tool_results?: Array<Record<string, unknown>>;
690
+ runtime_selection?: {
691
+ provider: string;
692
+ model: string;
693
+ provider_config_ref: string | null;
694
+ };
695
+ // NEW: Interactive mode fields
696
+ execution_mode?: 'deterministic' | 'interactive';
697
+ working_directory?: string; // Set to worktree path in interactive mode
698
+ pause_resume_protocol?: 'signal' | 'message' | 'none'; // How to pause agent
699
+ }
700
+ ```
701
+
702
+ **Provider implementations:**
703
+
704
+ - **CliWorkerProvider:**
705
+ - Set `cwd` to `working_directory` when spawning agent process
706
+ - Implement `pauseAgent` via SIGSTOP/SIGCONT or stdin message
707
+ - Implement `sendMessage` via stdin JSON messages
708
+ - Implement `getCapabilities` returning full support
709
+ - **ApiWorkerProvider:**
710
+ - Include `working_directory` in context (API may not support it)
711
+ - `pauseAgent` not supported (return `{ acknowledged: false, timeout: true }`)
712
+ - `sendMessage` via API message endpoint (if available)
713
+ - `getCapabilities` returning limited support
714
+ - **NullWorkerProvider:**
715
+ - No changes (stub)
716
+ - `getCapabilities` returning no support
717
+
718
+ **Agent-side protocol (for CLI agents):**
719
+
720
+ Agents must implement stdin message handling:
721
+
722
+ ```json
723
+ // Supervisor -> Agent: Pause request
724
+ {
725
+ "type": "pause_request",
726
+ "checkpoint_id": "ckpt-003-xyz",
727
+ "reason": "checkpoint_validation",
728
+ "timeout_ms": 10000
729
+ }
730
+
731
+ // Agent -> Supervisor: Pause acknowledgment
732
+ {
733
+ "type": "pause_ack",
734
+ "checkpoint_id": "ckpt-003-xyz",
735
+ "status": "paused",
736
+ "pending_operations": []
737
+ }
738
+
739
+ // Supervisor -> Agent: Resume
740
+ {
741
+ "type": "resume",
742
+ "checkpoint_id": "ckpt-003-xyz"
743
+ }
744
+
745
+ // Supervisor -> Agent: Violation notification
746
+ {
747
+ "type": "checkpoint_violation",
748
+ "severity": "warning",
749
+ "checkpoint_id": "ckpt-003-xyz",
750
+ "violations": [
751
+ {
752
+ "file": "src/config.ts",
753
+ "constraint": "allowed_areas",
754
+ "message": "Path not in allowed_areas",
755
+ "suggestion": "Add to plan.allowed_areas or move to allowed path"
756
+ }
757
+ ],
758
+ "action_taken": "none",
759
+ "requires_acknowledgment": true
760
+ }
761
+
762
+ // Agent -> Supervisor: Violation acknowledgment
763
+ {
764
+ "type": "violation_ack",
765
+ "checkpoint_id": "ckpt-003-xyz",
766
+ "understood": true,
767
+ "corrective_action": "will_move_file_to_allowed_path"
768
+ }
769
+ ```
770
+
771
+ ### 3.5 Supervisor Integration
772
+
773
+ **File:** `apps/control-plane/src/supervisor/runtime.ts`
774
+
775
+ **Changes:**
776
+
777
+ - Resolve `execution_mode` from config/CLI/state
778
+ - Pass `execution_mode` to `WorkerDecisionLoop`
779
+ - Pass `interactive` config to `CheckpointService`
780
+
781
+ **New method:** `resolveExecutionMode(featureId: string): ExecutionMode`
782
+
783
+ ```typescript
784
+ private resolveExecutionMode(featureId: string): ExecutionMode {
785
+ // 1. Check CLI override
786
+ if (this.cliExecutionMode) {
787
+ return this.cliExecutionMode;
788
+ }
789
+
790
+ // 2. Check feature state override
791
+ const state = await this.kernel.readState(featureId);
792
+ if (state.frontMatter.execution_mode) {
793
+ return state.frontMatter.execution_mode;
794
+ }
795
+
796
+ // 3. Check agents.yaml
797
+ const agentsConfig = this.kernel.getAgentsConfig();
798
+ if (agentsConfig.runtime?.execution_mode) {
799
+ return agentsConfig.runtime.execution_mode;
800
+ }
801
+
802
+ // 4. Default
803
+ return 'deterministic';
804
+ }
805
+ ```
806
+
807
+ ### 3.6 CLI Changes
808
+
809
+ **File:** `apps/control-plane/src/cli/types.ts`
810
+
811
+ Add to `CliArgs`:
812
+
813
+ ```typescript
814
+ interface CliArgs {
815
+ // ... existing fields
816
+ execution_mode?: 'deterministic' | 'interactive';
817
+ }
818
+ ```
819
+
820
+ **File:** `apps/control-plane/src/cli/cli-argument-parser.ts`
821
+
822
+ Parse `--execution-mode` flag.
823
+
824
+ **File:** `apps/control-plane/src/cli/help-command-handler.ts`
825
+
826
+ Add to `run` command help:
827
+
828
+ ```
829
+ --execution-mode <deterministic|interactive>
830
+ Execution mode for agent workers (default: deterministic)
831
+ ```
832
+
833
+ ---
834
+
835
+ ## 4. Validation and Audit Trail
836
+
837
+ ### 4.1 Deterministic Mode Audit Trail
838
+
839
+ **Current behavior (unchanged):**
840
+
841
+ - Every `repo.apply_patch` call logged in operation ledger
842
+ - Full unified diff captured per operation
843
+ - Validation errors logged with rejection reason
844
+ - Complete history in `.aop/runtime/operation-ledger/run:<run_id>.json`
845
+
846
+ ### 4.2 Interactive Mode Audit Trail
847
+
848
+ **New behavior:**
849
+
850
+ - Checkpoints logged in feature state (`checkpoints` array)
851
+ - Diff snapshots stored in `.aop/features/<id>/checkpoints/<checkpoint_id>.diff`
852
+ - Validation results recorded per checkpoint
853
+ - Final diff captured on agent completion
854
+ - Checkpoint metadata includes:
855
+ - `checkpoint_id` (UUID)
856
+ - `timestamp` (ISO 8601)
857
+ - `files_changed` (array of paths)
858
+ - `validation_status` (valid/invalid/skipped)
859
+ - `violations` (array of error messages)
860
+ - `severity` (info/warning/error/critical)
861
+ - `diff_snapshot` (path to diff file)
862
+
863
+ **Audit trail location:**
864
+
865
+ ```
866
+ .aop/features/<feature_id>/
867
+ ├── state.md (includes checkpoints array)
868
+ └── checkpoints/
869
+ ├── checkpoint-001.diff
870
+ ├── checkpoint-002.diff
871
+ └── checkpoint-003.diff
872
+ ```
873
+
874
+ ### 4.3 Validation Enforcement
875
+
876
+ **Both modes enforce:**
877
+
878
+ - Plan constraints (allowed_areas, forbidden_areas)
879
+ - Policy constraints (patch_policy, protected_areas)
880
+ - Lock constraints (contracts require held locks)
881
+ - Path rules (RBAC per path)
882
+
883
+ **Difference:**
884
+
885
+ - **Deterministic:** Validation before application (gate-keeping)
886
+ - **Interactive:** Validation at checkpoints (monitoring)
887
+
888
+ **Interactive mode violation handling:**
889
+
890
+ 1. **Notify agent:** Send message via `WorkerProvider.sendMessage` with:
891
+ - Violation details (which files, which constraints violated)
892
+ - Severity level (info/warning/error/critical) from `violation_severity` config
893
+ - Suggested remediation actions
894
+ 2. **Optional revert:** If `revert_on_violation: true`, run `git checkout -- <files>` to undo changes
895
+ 3. **Block merge:** Feature cannot reach `ready_to_merge` if any checkpoint has `validation_status: invalid` with severity >= `error`
896
+
897
+ **Severity level behavior:**
898
+
899
+ - `info`: Log only, no blocking
900
+ - `warning`: Notify agent, no blocking (default)
901
+ - `error`: Notify agent, block merge
902
+ - `critical`: Notify agent, block merge, optionally revert
903
+
904
+ ---
905
+
906
+ ## 5. Migration Path
907
+
908
+ ### 5.1 Phase 1: Schema and Config (Week 1)
909
+
910
+ - Add `execution_mode` to `agents.schema.json`
911
+ - Add `interactive` config to `agents.schema.json`
912
+ - Add `execution_mode` and `checkpoints` to `state.schema.json`
913
+ - Update `agents.yaml` with default `execution_mode: deterministic`
914
+ - Add CLI flag `--execution-mode`
915
+
916
+ ### 5.2 Phase 2: Watchdog and Checkpoint Services (Week 2)
917
+
918
+ - Implement `WorktreeWatchdogService`
919
+ - Implement `CheckpointService`
920
+ - Extract `PatchService.validateDiff` for reuse
921
+ - Add unit tests (>= 90% coverage)
922
+
923
+ ### 5.3 Phase 3: Interactive Mode Execution (Week 3)
924
+
925
+ - Add `runInteractiveWorker` to `WorkerDecisionLoop`
926
+ - Update `SupervisorRuntime.resolveExecutionMode`
927
+ - Update provider implementations to support `working_directory`
928
+ - Add integration tests
929
+
930
+ ### 5.4 Phase 4: Audit Trail and Dashboard (Week 4)
931
+
932
+ - Store checkpoint diffs in `.aop/features/<id>/checkpoints/`
933
+ - Update dashboard to display checkpoints (new tab in RuntimeInspector)
934
+ - Add checkpoint timeline view
935
+ - Add validation status indicators
936
+
937
+ ### 5.5 Phase 5: Documentation and Rollout (Week 5)
938
+
939
+ - Update README with execution mode documentation
940
+ - Update AGENTS.md and CLAUDE.md
941
+ - Add example configurations for interactive mode
942
+ - Update `aop init` wizard to ask about execution mode preference
943
+
944
+ ---
945
+
946
+ ## 6. Testing Strategy
947
+
948
+ ### 6.1 Unit Tests
949
+
950
+ **WorktreeWatchdogService:**
951
+
952
+ - Detects file creation
953
+ - Detects file modification
954
+ - Detects file deletion
955
+ - Tracks change count correctly
956
+ - Resets change count on demand
957
+
958
+ **CheckpointService:**
959
+
960
+ - Creates checkpoint with valid diff
961
+ - Validates diff against plan
962
+ - Records checkpoint in state
963
+ - Stores diff snapshot in correct location
964
+ - Handles validation failures
965
+
966
+ **PatchService.validateDiff:**
967
+
968
+ - Rejects paths outside allowed_areas
969
+ - Rejects paths in forbidden_areas
970
+ - Requires locks for contract modifications
971
+ - Enforces protected_areas
972
+
973
+ ### 6.2 Integration Tests
974
+
975
+ **Deterministic Mode (Regression):**
976
+
977
+ - Existing tests continue to pass
978
+ - No behavior changes
979
+
980
+ **Interactive Mode:**
981
+
982
+ - Agent spawns with correct working directory
983
+ - Watchdog detects agent file changes
984
+ - Checkpoints created at intervals
985
+ - Validation runs at checkpoints
986
+ - Violations notify agent
987
+ - Final checkpoint validates before commit
988
+ - Audit trail captured correctly
989
+
990
+ ### 6.3 End-to-End Tests
991
+
992
+ **Scenario 1: Interactive mode with valid changes**
993
+
994
+ 1. Start feature with `--execution-mode interactive`
995
+ 2. Agent modifies files in worktree
996
+ 3. Checkpoints validate successfully
997
+ 4. Feature completes with all changes committed
998
+
999
+ **Scenario 2: Interactive mode with policy violation**
1000
+
1001
+ 1. Start feature with `--execution-mode interactive`
1002
+ 2. Agent modifies file outside allowed_areas
1003
+ 3. Checkpoint validation fails
1004
+ 4. Agent receives violation notification
1005
+ 5. Agent corrects violation
1006
+ 6. Next checkpoint validates successfully
1007
+
1008
+ **Scenario 3: Mode switching**
1009
+
1010
+ 1. Start feature in deterministic mode
1011
+ 2. Update state with `execution_mode: interactive`
1012
+ 3. Resume feature
1013
+ 4. Execution switches to interactive mode
1014
+
1015
+ ---
1016
+
1017
+ ## 7. Acceptance Criteria
1018
+
1019
+ ### 7.1 Must Have
1020
+
1021
+ - [x] Schema changes for `execution_mode` and `checkpoints`
1022
+ - [x] CLI flag `--execution-mode` implemented
1023
+ - [x] `WorktreeWatchdogService` implemented
1024
+ - [x] `CheckpointService` implemented
1025
+ - [x] `PatchService.validateDiff` extracted and reused
1026
+ - [x] `WorkerDecisionLoop.runInteractiveWorker` implemented
1027
+ - [x] `SupervisorRuntime.resolveExecutionMode` implemented
1028
+ - [x] Provider interface supports `working_directory`
1029
+ - [x] Checkpoint diffs stored in `.aop/features/<id>/checkpoints/`
1030
+ - [x] Validation enforced at checkpoints
1031
+ - [x] Audit trail complete for both modes
1032
+ - [x] All existing tests pass (deterministic mode regression)
1033
+ - [x] New tests for interactive mode (>= 90% coverage)
1034
+ - [x] TypeScript strict mode passes
1035
+ - [x] ESLint zero warnings
1036
+
1037
+ ### 7.2 Should Have
1038
+
1039
+ - [x] Dashboard displays checkpoints in RuntimeInspector
1040
+ - [x] Checkpoint timeline view
1041
+ - [x] Validation status indicators
1042
+ - [x] Agent notification on violation
1043
+ - [x] Optional auto-revert on violation
1044
+ - [x] Documentation in README
1045
+ - [x] Example configurations
1046
+
1047
+ ### 7.3 Nice to Have
1048
+
1049
+ - [x] Real-time checkpoint streaming to dashboard
1050
+ - [x] Diff viewer for checkpoint snapshots
1051
+ - [x] Checkpoint comparison (diff between checkpoints)
1052
+ - [x] Checkpoint rollback command (`aop rollback --checkpoint <id>`)
1053
+ - [x] Interactive mode performance metrics
1054
+
1055
+ ---
1056
+
1057
+ ## 8. Open Questions
1058
+
1059
+ ### 8.1 Checkpoint Frequency ✅ RESOLVED
1060
+
1061
+ **Question:** What is the optimal checkpoint interval?
1062
+
1063
+ **Options:**
1064
+
1065
+ - Time-based: Every 30 seconds (default)
1066
+ - Change-based: Every 50 file changes (default)
1067
+ - Hybrid: Whichever comes first
1068
+
1069
+ **Decision:** Hybrid approach with configurable thresholds.
1070
+
1071
+ - `checkpoint_interval_ms: 30000` (time trigger)
1072
+ - `max_uncommitted_changes: 50` (change trigger)
1073
+ - Checkpoint created when either threshold is reached
1074
+
1075
+ ### 8.2 Violation Handling ✅ RESOLVED
1076
+
1077
+ **Question:** Should violations auto-revert or just notify?
1078
+
1079
+ **Options:**
1080
+
1081
+ - **Notify only:** Agent receives message, decides how to fix
1082
+ - **Auto-revert:** Supervisor reverts changes, agent starts fresh
1083
+ - **Configurable:** User chooses via `revert_on_violation` flag
1084
+
1085
+ **Decision:** Configurable with default `false` (notify only).
1086
+
1087
+ **Severity levels added:**
1088
+
1089
+ - `violation_severity: info | warning | error | critical`
1090
+ - Default: `warning`
1091
+ - Behavior varies by severity (see Section 4.3)
1092
+
1093
+ ### 8.3 Provider Compatibility ✅ RESOLVED
1094
+
1095
+ **Question:** Which providers support interactive mode?
1096
+
1097
+ **Analysis:**
1098
+
1099
+ - **CliWorkerProvider:** Full support (can set `cwd`)
1100
+ - **ApiWorkerProvider:** Limited support (API may not honor `working_directory`)
1101
+ - **NullWorkerProvider:** N/A (stub)
1102
+
1103
+ **Decision:** Use recommendation - document provider compatibility in README. Warn users if provider doesn't support interactive mode.
1104
+
1105
+ ### 8.4 Concurrent Checkpoints ✅ RESOLVED
1106
+
1107
+ **Question:** Can multiple features run in interactive mode simultaneously?
1108
+
1109
+ **Decision:** Yes, confirmed understanding is correct.
1110
+
1111
+ - Each feature has isolated worktree and watchdog instance
1112
+ - Watchdog service maintains map of `featureId -> WatcherInstance`
1113
+ - Checkpoint service handles concurrent validation requests
1114
+ - File system watchdog overhead scales linearly with active features
1115
+ - Monitor performance with `max_active_features > 5`
1116
+
1117
+ ---
1118
+
1119
+ ## 9. Risks and Mitigations
1120
+
1121
+ ### 9.1 Risk: Race Conditions During Checkpoint Validation
1122
+
1123
+ **Problem:** Agent continues writing while checkpoint validation runs, causing:
1124
+
1125
+ - Validation of incomplete/inconsistent state
1126
+ - File corruption if revert happens mid-write
1127
+ - Duplicate checkpoints if change threshold triggers during time-based checkpoint
1128
+
1129
+ **Mitigation:**
1130
+
1131
+ - **Agent pause protocol:** Send `PAUSE` signal to agent before checkpoint, wait for `ACK`, then validate
1132
+ - **Debounce checkpoint triggers:** Minimum 5s between checkpoints regardless of trigger source
1133
+ - **File write detection:** Use `chokidar` `awaitWriteFinish` option (waits for file size to stabilize)
1134
+ - **Checkpoint queue:** Serialize checkpoint operations per feature (no concurrent checkpoints)
1135
+ - **Timeout:** If agent doesn't ACK pause within 10s, force checkpoint anyway and log warning
1136
+
1137
+ **Implementation:**
1138
+
1139
+ ```typescript
1140
+ interface WorkerProvider {
1141
+ pauseAgent(featureId: string): Promise<void>; // Send PAUSE signal
1142
+ resumeAgent(featureId: string): Promise<void>; // Send RESUME signal
1143
+ }
1144
+
1145
+ class CheckpointService {
1146
+ private checkpointLocks = new Map<string, Promise<void>>();
1147
+
1148
+ async createCheckpoint(featureId: string): Promise<CheckpointResult> {
1149
+ // Serialize checkpoints per feature
1150
+ const existingCheckpoint = this.checkpointLocks.get(featureId);
1151
+ if (existingCheckpoint) {
1152
+ await existingCheckpoint;
1153
+ }
1154
+
1155
+ const checkpointPromise = this._doCheckpoint(featureId);
1156
+ this.checkpointLocks.set(featureId, checkpointPromise);
1157
+
1158
+ try {
1159
+ return await checkpointPromise;
1160
+ } finally {
1161
+ this.checkpointLocks.delete(featureId);
1162
+ }
1163
+ }
1164
+
1165
+ private async _doCheckpoint(featureId: string): Promise<CheckpointResult> {
1166
+ // 1. Pause agent
1167
+ await this.provider.pauseAgent(featureId).catch(() => {
1168
+ this.logger.warn(`Agent ${featureId} did not acknowledge pause`);
1169
+ });
1170
+
1171
+ // 2. Wait for file writes to stabilize (100ms)
1172
+ await this.waitForStableFileSystem(featureId);
1173
+
1174
+ // 3. Capture diff
1175
+ const diff = await this.captureDiff(featureId);
1176
+
1177
+ // 4. Validate
1178
+ const validation = await this.validateDiff(featureId, diff);
1179
+
1180
+ // 5. Resume agent
1181
+ await this.provider.resumeAgent(featureId);
1182
+
1183
+ return validation;
1184
+ }
1185
+ }
1186
+ ```
1187
+
1188
+ ### 9.2 Risk: Validation After Write (Too Late)
1189
+
1190
+ **Problem:** Interactive mode validates AFTER agent writes files. If validation fails:
1191
+
1192
+ - Agent has already made dependent changes based on invalid state
1193
+ - Revert is destructive and loses agent's work
1194
+ - No atomic rollback guarantee
1195
+
1196
+ **Mitigation:**
1197
+
1198
+ - **Shadow workspace:** Agent writes to shadow directory, validation promotes to real worktree
1199
+ - **Incremental validation:** Validate each file as it's written (watchdog triggers per-file validation)
1200
+ - **Validation cache:** Cache validation results per file to avoid re-validating unchanged files
1201
+ - **Partial revert:** Only revert files that violate constraints, keep valid changes
1202
+ - **Agent guidance:** Send pre-emptive warnings when agent attempts to write to suspicious paths
1203
+
1204
+ **Implementation (Shadow Workspace):**
1205
+
1206
+ ```typescript
1207
+ class InteractiveExecutionService {
1208
+ async runInteractiveWorker(featureId: string): Promise<void> {
1209
+ const realWorktree = this.kernel.worktreePath(featureId);
1210
+ const shadowWorktree = `${realWorktree}.shadow`;
1211
+
1212
+ // 1. Create shadow workspace (copy of real worktree)
1213
+ await fs.cp(realWorktree, shadowWorktree, { recursive: true });
1214
+
1215
+ // 2. Agent works in shadow
1216
+ await this.provider.runWorker({
1217
+ working_directory: shadowWorktree,
1218
+ // ...
1219
+ });
1220
+
1221
+ // 3. Validate shadow changes
1222
+ const diff = await this.git.diff(realWorktree, shadowWorktree);
1223
+ const validation = await this.validateDiff(featureId, diff);
1224
+
1225
+ if (validation.valid) {
1226
+ // 4. Promote shadow to real (atomic)
1227
+ await fs.rm(realWorktree, { recursive: true });
1228
+ await fs.rename(shadowWorktree, realWorktree);
1229
+ } else {
1230
+ // 5. Discard shadow, notify agent
1231
+ await fs.rm(shadowWorktree, { recursive: true });
1232
+ await this.notifyAgent(featureId, validation.violations);
1233
+ }
1234
+ }
1235
+ }
1236
+ ```
1237
+
1238
+ **Alternative (Incremental Validation):**
1239
+
1240
+ ```typescript
1241
+ class WorktreeWatchdogService {
1242
+ async startWatching(featureId: string): Promise<void> {
1243
+ const watcher = chokidar.watch(worktreePath, {
1244
+ awaitWriteFinish: { stabilityThreshold: 100, pollInterval: 50 },
1245
+ });
1246
+
1247
+ watcher.on('change', async (path) => {
1248
+ // Validate single file immediately
1249
+ const validation = await this.validateSingleFile(featureId, path);
1250
+
1251
+ if (!validation.valid) {
1252
+ // Immediate feedback to agent
1253
+ await this.notifyAgent(featureId, validation.violations, 'critical');
1254
+
1255
+ if (this.config.revert_on_violation) {
1256
+ // Revert single file only
1257
+ await this.git.checkout(featureId, path);
1258
+ }
1259
+ }
1260
+ });
1261
+ }
1262
+ }
1263
+ ```
1264
+
1265
+ ### 9.3 Risk: Agent Modifies Files Outside Worktree
1266
+
1267
+ **Problem:** Agent could escape worktree via:
1268
+
1269
+ - Symlinks (`ln -s /etc/passwd ./passwd`)
1270
+ - Parent directory traversal (`../../sensitive-file`)
1271
+ - Absolute paths (`/etc/passwd`)
1272
+ - `.git` directory modification
1273
+
1274
+ **Mitigation:**
1275
+
1276
+ - **Symlink detection:** Watchdog rejects symlink creation immediately
1277
+ - **Path canonicalization:** Resolve all paths to absolute, verify they start with worktree prefix
1278
+ - **`.git` protection:** Watchdog ignores `.git` directory, validation rejects any `.git` changes
1279
+ - **Filesystem sandbox (advanced):** Run agent in container with bind-mount to worktree only
1280
+
1281
+ **Implementation:**
1282
+
1283
+ ```typescript
1284
+ class WorktreeWatchdogService {
1285
+ private async validatePath(featureId: string, filePath: string): Promise<boolean> {
1286
+ const worktreePath = this.kernel.worktreePath(featureId);
1287
+ const canonicalPath = path.resolve(filePath);
1288
+
1289
+ // 1. Must be inside worktree
1290
+ if (!canonicalPath.startsWith(worktreePath)) {
1291
+ await this.notifyAgent(
1292
+ featureId,
1293
+ [`Path escape detected: ${filePath} is outside worktree`],
1294
+ 'critical',
1295
+ );
1296
+ return false;
1297
+ }
1298
+
1299
+ // 2. Must not be .git directory
1300
+ if (canonicalPath.includes('/.git/')) {
1301
+ await this.notifyAgent(
1302
+ featureId,
1303
+ [`Git directory modification blocked: ${filePath}`],
1304
+ 'critical',
1305
+ );
1306
+ return false;
1307
+ }
1308
+
1309
+ // 3. Must not be symlink
1310
+ const stats = await fs.lstat(canonicalPath);
1311
+ if (stats.isSymbolicLink()) {
1312
+ await this.notifyAgent(featureId, [`Symlink creation blocked: ${filePath}`], 'critical');
1313
+ // Auto-remove symlink
1314
+ await fs.unlink(canonicalPath);
1315
+ return false;
1316
+ }
1317
+
1318
+ return true;
1319
+ }
1320
+ }
1321
+ ```
1322
+
1323
+ ### 9.4 Risk: Validation Overhead in Interactive Mode
1324
+
1325
+ **Problem:** Every checkpoint runs full `git diff` + validation. With large changesets:
1326
+
1327
+ - `git diff` on 1000 files takes seconds
1328
+ - Parsing 1000 diffs is CPU-intensive
1329
+ - Validating 1000 paths against complex plan patterns is slow
1330
+ - Checkpoint blocks agent progress
1331
+
1332
+ **Mitigation:**
1333
+
1334
+ - **Incremental diff:** Only diff files changed since last checkpoint (use watchdog's changed file list)
1335
+ - **Validation cache:** Cache validation results per file path + plan version
1336
+ - **Validation budget:** Timeout validation after 5s, log warning, allow agent to continue
1337
+ - **Async validation:** Run validation in background, don't block agent (notify on completion)
1338
+ - **Sampling:** For large changesets (>100 files), validate random sample + high-risk files only
1339
+
1340
+ **Implementation:**
1341
+
1342
+ ```typescript
1343
+ class CheckpointService {
1344
+ private validationCache = new Map<string, ValidationResult>();
1345
+
1346
+ async validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult> {
1347
+ const plan = await this.loadPlan(featureId);
1348
+ const parsedDiff = this.parseDiff(diff);
1349
+ const changedFiles = parsedDiff.map((d) => d.path);
1350
+
1351
+ // Check cache, validate uncached files with timeout
1352
+ const validationPromise = this.validateFiles(featureId, changedFiles, plan);
1353
+ const timeoutPromise = new Promise<ValidationResult>((resolve) =>
1354
+ setTimeout(
1355
+ () =>
1356
+ resolve({
1357
+ valid: false,
1358
+ violations: ['Validation timeout after 5s'],
1359
+ severity: 'warning',
1360
+ }),
1361
+ 5000,
1362
+ ),
1363
+ );
1364
+
1365
+ return await Promise.race([validationPromise, timeoutPromise]);
1366
+ }
1367
+
1368
+ private async validateFiles(featureId: string, files: string[], plan: Plan) {
1369
+ // Sampling for large changesets (>100 files)
1370
+ if (files.length > 100) {
1371
+ const highRiskFiles = files.filter(
1372
+ (f) => f.includes('schema') || f.includes('contract') || f.includes('migration'),
1373
+ );
1374
+ const sampledFiles = this.randomSample(files, 20);
1375
+ files = [...new Set([...highRiskFiles, ...sampledFiles])];
1376
+ }
1377
+
1378
+ return await pMap(files, (file) => this.validateSingleFile(featureId, file, plan), {
1379
+ concurrency: 10,
1380
+ });
1381
+ }
1382
+ }
1383
+ ```
1384
+
1385
+ ### 9.5 Risk: Incomplete Audit Trail
1386
+
1387
+ **Problem:** Checkpoints only capture state at intervals. Between checkpoints:
1388
+
1389
+ - No record of intermediate states
1390
+ - No record of which files were attempted but reverted
1391
+ - No record of agent's decision process
1392
+ - Cannot reconstruct exact sequence of changes
1393
+
1394
+ **Mitigation:**
1395
+
1396
+ - **Continuous event log:** Watchdog logs every file change event with timestamp
1397
+ - **Agent action log:** Capture agent's tool calls and responses (even in interactive mode)
1398
+ - **Validation history:** Log all validation attempts (pass/fail) with details
1399
+ - **Checkpoint chain:** Each checkpoint references previous checkpoint (linked list)
1400
+ - **Diff accumulation:** Store both incremental diffs (since last checkpoint) and cumulative diffs (since start)
1401
+
1402
+ **Implementation:**
1403
+
1404
+ ```typescript
1405
+ interface CheckpointMetadata {
1406
+ checkpoint_id: string;
1407
+ timestamp: string;
1408
+ previous_checkpoint_id: string | null; // Chain checkpoints
1409
+ files_changed_since_last: string[]; // Incremental
1410
+ files_changed_total: string[]; // Cumulative
1411
+ diff_incremental: string; // Path to incremental diff
1412
+ diff_cumulative: string; // Path to cumulative diff
1413
+ validation_status: 'valid' | 'invalid' | 'skipped';
1414
+ violations: string[];
1415
+ severity: ViolationSeverity;
1416
+ }
1417
+
1418
+ interface FileChangeEvent {
1419
+ timestamp: string;
1420
+ event_type: 'add' | 'change' | 'unlink';
1421
+ file_path: string;
1422
+ file_size: number;
1423
+ checkpoint_id: string | null; // Which checkpoint captured this change
1424
+ }
1425
+
1426
+ class WorktreeWatchdogService {
1427
+ private eventLog = new Map<string, FileChangeEvent[]>();
1428
+
1429
+ async startWatching(featureId: string): Promise<void> {
1430
+ const watcher = chokidar.watch(worktreePath);
1431
+
1432
+ watcher.on('all', (event, path) => {
1433
+ const changeEvent: FileChangeEvent = {
1434
+ timestamp: new Date().toISOString(),
1435
+ event_type: event,
1436
+ file_path: path,
1437
+ file_size: fs.statSync(path).size,
1438
+ checkpoint_id: null, // Will be set when checkpoint captures it
1439
+ };
1440
+
1441
+ this.eventLog.get(featureId)?.push(changeEvent);
1442
+
1443
+ // Persist event log continuously
1444
+ this.persistEventLog(featureId);
1445
+ });
1446
+ }
1447
+
1448
+ private async persistEventLog(featureId: string): Promise<void> {
1449
+ const logPath = `.aop/features/${featureId}/logs/file-changes.jsonl`;
1450
+ const events = this.eventLog.get(featureId) || [];
1451
+
1452
+ // Append to JSONL file (one JSON object per line)
1453
+ await fs.appendFile(logPath, events.map((e) => JSON.stringify(e)).join('\n') + '\n');
1454
+ }
1455
+ }
1456
+ ```
1457
+
1458
+ ### 9.6 Risk: Provider Incompatibility
1459
+
1460
+ **Problem:** Not all providers support interactive mode:
1461
+
1462
+ - API-based providers (Claude API, Codex API) cannot set `cwd`
1463
+ - Some CLI providers don't respect working directory
1464
+ - Agent may not understand pause/resume protocol
1465
+
1466
+ **Mitigation:**
1467
+
1468
+ - **Provider capability detection:** Query provider for `supportsInteractiveMode` before starting
1469
+ - **Automatic fallback:** If provider doesn't support interactive, fall back to deterministic mode
1470
+ - **Clear error messages:** Warn user if they request interactive mode with incompatible provider
1471
+ - **Provider adapter interface:** Standardize pause/resume/working_directory contract
1472
+
1473
+ **Implementation:**
1474
+
1475
+ ```typescript
1476
+ interface WorkerProviderCapabilities {
1477
+ supportsInteractiveMode: boolean;
1478
+ supportsWorkingDirectory: boolean;
1479
+ supportsPauseResume: boolean;
1480
+ supportsMessagePassing: boolean;
1481
+ }
1482
+
1483
+ interface WorkerProvider {
1484
+ getCapabilities(): WorkerProviderCapabilities;
1485
+ // ... existing methods
1486
+ }
1487
+
1488
+ class SupervisorRuntime {
1489
+ private resolveExecutionMode(featureId: string, requestedMode: ExecutionMode): ExecutionMode {
1490
+ const capabilities = this.provider.getCapabilities();
1491
+
1492
+ if (requestedMode === 'interactive' && !capabilities.supportsInteractiveMode) {
1493
+ this.logger.warn(
1494
+ `Provider ${this.provider.name} does not support interactive mode, ` +
1495
+ `falling back to deterministic mode`,
1496
+ );
1497
+ return 'deterministic';
1498
+ }
1499
+
1500
+ return requestedMode;
1501
+ }
1502
+ }
1503
+ ```
1504
+
1505
+ ### 9.7 Risk: Security - Agent Escapes Worktree
1506
+
1507
+ **Problem:** Agent with direct file system access could:
1508
+
1509
+ - Write malicious code that executes before checkpoint
1510
+ - Modify `.git` directory to corrupt repository
1511
+ - Create symlinks to escape worktree
1512
+ - Exhaust disk space with large files
1513
+ - Execute arbitrary commands via shell scripts
1514
+
1515
+ **Mitigation:**
1516
+
1517
+ - **Path validation:** Reject any file operations outside worktree (see 9.3)
1518
+ - **`.git` protection:** Watchdog blocks all `.git` modifications immediately
1519
+ - **Disk quota:** Set file size limits per feature (e.g., 1GB max)
1520
+ - **Executable detection:** Flag creation of executable files for review
1521
+ - **Sandboxing (advanced):** Run agent in Docker container with read-only bind mounts except worktree
1522
+
1523
+ **Implementation:**
1524
+
1525
+ ```typescript
1526
+ class WorktreeWatchdogService {
1527
+ private diskUsage = new Map<string, number>();
1528
+ private readonly MAX_DISK_USAGE = 1024 * 1024 * 1024; // 1GB
1529
+
1530
+ async startWatching(featureId: string): Promise<void> {
1531
+ const watcher = chokidar.watch(worktreePath);
1532
+
1533
+ watcher.on('add', async (path) => {
1534
+ // 1. Path validation
1535
+ if (!(await this.validatePath(featureId, path))) {
1536
+ await fs.unlink(path);
1537
+ return;
1538
+ }
1539
+
1540
+ // 2. Disk quota
1541
+ const stats = await fs.stat(path);
1542
+ const currentUsage = this.diskUsage.get(featureId) || 0;
1543
+
1544
+ if (currentUsage + stats.size > this.MAX_DISK_USAGE) {
1545
+ await this.notifyAgent(
1546
+ featureId,
1547
+ [
1548
+ `Disk quota exceeded: ${currentUsage + stats.size} bytes > ${this.MAX_DISK_USAGE} bytes`,
1549
+ ],
1550
+ 'critical',
1551
+ );
1552
+ await fs.unlink(path);
1553
+ return;
1554
+ }
1555
+
1556
+ this.diskUsage.set(featureId, currentUsage + stats.size);
1557
+
1558
+ // 3. Executable detection
1559
+ if (stats.mode & 0o111) {
1560
+ // Has execute bit
1561
+ await this.notifyAgent(
1562
+ featureId,
1563
+ [`Executable file created: ${path} - requires review`],
1564
+ 'warning',
1565
+ );
1566
+ }
1567
+ });
1568
+ }
1569
+ }
1570
+ ```
1571
+
1572
+ ### 9.8 Risk: No Graceful Degradation
1573
+
1574
+ **Problem:** If watchdog fails, checkpoint service fails, or validation times out:
1575
+
1576
+ - Agent continues running blind (no monitoring)
1577
+ - Changes accumulate without validation
1578
+ - System state becomes inconsistent
1579
+ - No clear recovery path
1580
+
1581
+ **Mitigation:**
1582
+
1583
+ - **Circuit breakers:** After 3 consecutive checkpoint failures, pause agent and escalate to human
1584
+ - **Health checks:** Periodic health checks for watchdog, checkpoint service, validation service
1585
+ - **Fallback mode:** If interactive mode fails, automatically switch to deterministic mode
1586
+ - **Agent timeout:** If agent doesn't complete within max_execution_time, force checkpoint and terminate
1587
+ - **Recovery protocol:** On service failure, capture current state, notify human, wait for manual intervention
1588
+
1589
+ **Implementation:**
1590
+
1591
+ ```typescript
1592
+ class InteractiveExecutionService {
1593
+ private checkpointFailures = new Map<string, number>();
1594
+ private readonly MAX_CHECKPOINT_FAILURES = 3;
1595
+
1596
+ async runInteractiveWorker(featureId: string): Promise<void> {
1597
+ try {
1598
+ // Health check before starting
1599
+ await this.healthCheck();
1600
+
1601
+ // Start agent with timeout
1602
+ const agentPromise = this.provider.runWorker({...});
1603
+ const timeoutPromise = new Promise((_, reject) =>
1604
+ setTimeout(() => reject(new Error('Agent timeout')),
1605
+ this.config.max_execution_time_ms)
1606
+ );
1607
+
1608
+ await Promise.race([agentPromise, timeoutPromise]);
1609
+
1610
+ } catch (error) {
1611
+ // Circuit breaker check
1612
+ const failures = this.checkpointFailures.get(featureId) || 0;
1613
+
1614
+ if (failures >= this.MAX_CHECKPOINT_FAILURES) {
1615
+ await this.escalateToHuman(featureId, error);
1616
+ throw new Error('Interactive mode failed, human intervention required');
1617
+ }
1618
+
1619
+ // Fallback to deterministic mode
1620
+ this.logger.warn(
1621
+ `Interactive mode failed for ${featureId}, falling back to deterministic mode`
1622
+ );
1623
+
1624
+ await this.runDeterministicWorker(featureId);
1625
+ }
1626
+ }
1627
+
1628
+ private async healthCheck(): Promise<void> {
1629
+ const checks = [
1630
+ this.watchdog.healthCheck(),
1631
+ this.checkpointService.healthCheck(),
1632
+ this.validationService.healthCheck(),
1633
+ ];
1634
+
1635
+ const results = await Promise.allSettled(checks);
1636
+ const failures = results.filter(r => r.status === 'rejected');
1637
+
1638
+ if (failures.length > 0) {
1639
+ throw new Error(`Health check failed: ${failures.length} services unhealthy`);
1640
+ }
1641
+ }
1642
+
1643
+ private async escalateToHuman(featureId: string, error: Error): Promise<void> {
1644
+ // 1. Capture current state
1645
+ const state = await this.captureState(featureId);
1646
+
1647
+ // 2. Pause agent
1648
+ await this.provider.pauseAgent(featureId);
1649
+
1650
+ // 3. Notify human via all channels
1651
+ await this.notificationService.send({
1652
+ severity: 'critical',
1653
+ title: `Interactive mode failure: ${featureId}`,
1654
+ message: `Agent execution failed after ${this.MAX_CHECKPOINT_FAILURES} checkpoint failures`,
1655
+ details: { error: error.message, state },
1656
+ actions: [
1657
+ { label: 'Review State', url: `/dashboard/features/${featureId}` },
1658
+ { label: 'Force Checkpoint', action: 'force_checkpoint' },
1659
+ { label: 'Terminate Agent', action: 'terminate' },
1660
+ ],
1661
+ });
1662
+
1663
+ // 4. Update feature state to BLOCKED
1664
+ await this.kernel.updateFeatureState(featureId, {
1665
+ status: 'blocked',
1666
+ blocked_reason: 'interactive_mode_failure',
1667
+ requires_human_intervention: true,
1668
+ });
1669
+ }
1670
+ }
1671
+ ```
1672
+
1673
+ ### 9.9 Risk: Concurrent Feature Resource Contention
1674
+
1675
+ - Fallback to deterministic mode if provider doesn't support `working_directory`
1676
+
1677
+ ---
1678
+
1679
+ ### 9.9 Risk: Concurrent Feature Resource Contention
1680
+
1681
+ **Problem:** Multiple features in interactive mode simultaneously:
1682
+
1683
+ - Watchdog service is singleton with shared state
1684
+ - Checkpoint service has no concurrency control
1685
+ - Validation service could be bottlenecked
1686
+ - File system events could be misattributed
1687
+
1688
+ **Mitigation:**
1689
+
1690
+ - **Per-feature service instances:** Each feature gets isolated watchdog/checkpoint instances
1691
+ - **Resource pooling:** Limit concurrent interactive features (e.g., max 5)
1692
+ - **Event attribution:** Watchdog uses separate `chokidar` instance per feature
1693
+ - **Validation queue:** Serialize validation requests with priority queue
1694
+ - **Backpressure:** If validation queue is full, pause low-priority features
1695
+
1696
+ **Implementation:**
1697
+
1698
+ ```typescript
1699
+ class WorktreeWatchdogService {
1700
+ private watchers = new Map<string, FSWatcher>(); // Per-feature watchers
1701
+ private eventLogs = new Map<string, FileChangeEvent[]>();
1702
+
1703
+ async startWatching(featureId: string): Promise<void> {
1704
+ const worktreePath = this.kernel.worktreePath(featureId);
1705
+ const watcher = chokidar.watch(worktreePath, {
1706
+ ignored: /(^|[\/\\])\../,
1707
+ persistent: true,
1708
+ ignoreInitial: true,
1709
+ });
1710
+
1711
+ this.watchers.set(featureId, watcher);
1712
+ this.eventLogs.set(featureId, []);
1713
+
1714
+ watcher.on('all', (event, path) => {
1715
+ this.handleFileChange(featureId, event, path);
1716
+ });
1717
+ }
1718
+ }
1719
+
1720
+ class CheckpointService {
1721
+ private validationQueue = new PQueue({ concurrency: 3 });
1722
+
1723
+ async validateCheckpoint(featureId: string, diff: string): Promise<ValidationResult> {
1724
+ return this.validationQueue.add(() => this._validateCheckpoint(featureId, diff), {
1725
+ priority: this.getFeaturePriority(featureId),
1726
+ });
1727
+ }
1728
+ }
1729
+ ```
1730
+
1731
+ ---
1732
+
1733
+ ## 10. Performance Requirements
1734
+
1735
+ ### 10.1 Latency Budgets
1736
+
1737
+ | Operation | Target | Maximum | Degradation Strategy |
1738
+ | ------------------------- | ------- | ------- | ---------------------------- |
1739
+ | Checkpoint creation | < 500ms | 2s | Skip if timeout, log warning |
1740
+ | Single file validation | < 50ms | 200ms | Use cached result if timeout |
1741
+ | Full diff validation | < 1s | 5s | Sample validation if timeout |
1742
+ | Agent pause/resume | < 100ms | 500ms | Force checkpoint if no ACK |
1743
+ | Watchdog event processing | < 10ms | 50ms | Buffer events if slow |
1744
+
1745
+ ### 10.2 Throughput Requirements
1746
+
1747
+ | Metric | Target | Maximum |
1748
+ | -------------------------------- | --------------- | --------------- |
1749
+ | File changes per second | 100 | 500 |
1750
+ | Concurrent interactive features | 5 | 10 |
1751
+ | Checkpoints per feature per hour | 120 (every 30s) | 360 (every 10s) |
1752
+ | Validation cache hit rate | > 80% | N/A |
1753
+
1754
+ ### 10.3 Resource Limits
1755
+
1756
+ | Resource | Limit per Feature | Limit Global |
1757
+ | --------------------- | ----------------- | ------------ |
1758
+ | Disk usage | 1 GB | 10 GB |
1759
+ | Memory (watchdog) | 50 MB | 500 MB |
1760
+ | CPU (validation) | 10% | 50% |
1761
+ | Open file descriptors | 1000 | 10000 |
1762
+
1763
+ ### 10.4 Monitoring and Alerts
1764
+
1765
+ **Metrics to track:**
1766
+
1767
+ - Checkpoint latency (p50, p95, p99)
1768
+ - Validation latency (p50, p95, p99)
1769
+ - Watchdog event processing latency
1770
+ - Validation cache hit rate
1771
+ - Checkpoint failure rate
1772
+ - Agent pause/resume success rate
1773
+ - Disk usage per feature
1774
+ - Memory usage per watchdog instance
1775
+
1776
+ **Alerts:**
1777
+
1778
+ - Checkpoint latency > 2s for 3 consecutive checkpoints
1779
+ - Validation failure rate > 10%
1780
+ - Disk usage > 80% of limit
1781
+ - Memory usage > 80% of limit
1782
+ - Watchdog event queue depth > 1000
1783
+
1784
+ ---
1785
+
1786
+ ## 11. Success Metrics
1787
+
1788
+ ### 10.1 Functional Metrics
1789
+
1790
+ - [x] Both execution modes pass all tests
1791
+ - [x] No regressions in deterministic mode
1792
+ - [x] Interactive mode enforces all plan/policy/lock constraints
1793
+ - [x] Audit trail complete for both modes
1794
+
1795
+ ### 10.2 Quality Metrics
1796
+
1797
+ - [ ] Agent output quality improves in interactive mode (subjective, user feedback)
1798
+ - [ ] Iteration speed increases in interactive mode (measured by time-to-completion)
1799
+ - [ ] Context window usage decreases in interactive mode (fewer `repo.read_file` calls)
1800
+
1801
+ ### 10.3 Performance Metrics
1802
+
1803
+ - [ ] Checkpoint validation completes in < 500ms
1804
+ - [ ] Watchdog overhead < 5% CPU per active feature
1805
+ - [ ] No memory leaks in long-running interactive sessions
1806
+
1807
+ ---
1808
+
1809
+ ## 11. Future Enhancements
1810
+
1811
+ ### 11.1 Hybrid Mode
1812
+
1813
+ **Concept:** Agent can choose execution mode per task.
1814
+
1815
+ **Example:**
1816
+
1817
+ - Use deterministic mode for critical contract changes (requires explicit approval)
1818
+ - Use interactive mode for implementation details (faster iteration)
1819
+
1820
+ ### 11.2 Checkpoint Branching
1821
+
1822
+ **Concept:** Create git branches at checkpoints for easy rollback.
1823
+
1824
+ **Benefit:** Agent can experiment freely, supervisor can revert to any checkpoint.
1825
+
1826
+ ### 11.3 Multi-Agent Interactive Mode
1827
+
1828
+ **Concept:** Multiple agents work in same worktree with conflict resolution.
1829
+
1830
+ **Challenge:** Requires sophisticated merge logic and coordination.
1831
+
1832
+ ### 11.4 Real-Time Collaboration
1833
+
1834
+ **Concept:** Human operator can edit files in worktree while agent is running.
1835
+
1836
+ **Benefit:** Pair programming with AI agent.
1837
+
1838
+ **Challenge:** Requires conflict detection and resolution.
1839
+
1840
+ ---
1841
+
1842
+ ## 12. Appendix
1843
+
1844
+ ### 12.1 Related Specifications
1845
+
1846
+ - **Shadow Workspace Implementation:** [Shadow Workspace Implementation Specification](../outstanding/shadow_workspace_implementation_spec.md) - Detailed implementation of shadow workspace strategy for validation-before-write guarantees
1847
+ - **Runtime Inspection:** [Runtime Inspection Specification](./agentic_orchestrator_runtime_inspection_spec.md) - Dashboard integration for execution mode monitoring
1848
+
1849
+ ### 12.2 File System Watchdog Libraries
1850
+
1851
+ **Options:**
1852
+
1853
+ - `chokidar` - Cross-platform, battle-tested, 20M+ downloads/week
1854
+ - `fs.watch` - Native Node.js, no dependencies
1855
+ - `watchman` - Facebook's file watching service (requires external daemon)
1856
+
1857
+ **Recommendation:** `chokidar` for reliability and cross-platform support.
1858
+
1859
+ ### 12.2 Checkpoint Storage Format
1860
+
1861
+ **Diff snapshot format:** Unified diff (same as `git diff` output)
1862
+
1863
+ **Metadata format:** JSON in `state.md` frontmatter
1864
+
1865
+ **Example:**
1866
+
1867
+ ```yaml
1868
+ checkpoints:
1869
+ - checkpoint_id: 'ckpt-001-a3f2b9c4'
1870
+ timestamp: '2026-03-05T16:28:19.503Z'
1871
+ files_changed: ['src/app.ts', 'src/utils.ts']
1872
+ validation_status: 'valid'
1873
+ violations: []
1874
+ severity: 'info'
1875
+ diff_snapshot: '.aop/features/my_feature/checkpoints/ckpt-001-a3f2b9c4.diff'
1876
+ - checkpoint_id: 'ckpt-002-b7e4c1d9'
1877
+ timestamp: '2026-03-05T16:29:05.127Z'
1878
+ files_changed: ['src/config.ts']
1879
+ validation_status: 'invalid'
1880
+ violations: ["Path 'src/config.ts' not in allowed_areas"]
1881
+ severity: 'warning'
1882
+ diff_snapshot: '.aop/features/my_feature/checkpoints/ckpt-002-b7e4c1d9.diff'
1883
+ ```
1884
+
1885
+ ### 12.3 Validation Reuse
1886
+
1887
+ **Shared validation logic:**
1888
+
1889
+ - `PatchService.validateDiff(featureId, parsedDiff)`
1890
+ - Used by:
1891
+ - `PatchService.repoApplyPatch` (deterministic mode)
1892
+ - `CheckpointService.validateCheckpoint` (interactive mode)
1893
+
1894
+ **Validation steps:**
1895
+
1896
+ 1. Parse diff into file operations (create/modify/delete)
1897
+ 2. Load accepted plan
1898
+ 3. Validate paths against `allowed_areas` and `forbidden_areas`
1899
+ 4. Validate locks held for contract modifications
1900
+ 5. Validate against `protected_areas` and `path_rules`
1901
+ 6. Return validation result with violations array
1902
+
1903
+ ---
1904
+
1905
+ **End of Specification**