@dv.nghiem/flowdeck 0.4.11 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +0 -2
  2. package/dist/agents/orchestrator.d.ts.map +1 -1
  3. package/dist/config/index.d.ts +1 -1
  4. package/dist/config/index.d.ts.map +1 -1
  5. package/dist/config/schema.d.ts +27 -1
  6. package/dist/config/schema.d.ts.map +1 -1
  7. package/dist/dashboard/lib/state-reader.d.ts +2 -1
  8. package/dist/dashboard/lib/state-reader.d.ts.map +1 -1
  9. package/dist/dashboard/server.mjs +128 -13
  10. package/dist/dashboard/types.d.ts +12 -0
  11. package/dist/dashboard/types.d.ts.map +1 -1
  12. package/dist/hooks/approval-hook.d.ts +16 -2
  13. package/dist/hooks/approval-hook.d.ts.map +1 -1
  14. package/dist/hooks/compaction-hook.d.ts +1 -1
  15. package/dist/hooks/compaction-hook.d.ts.map +1 -1
  16. package/dist/hooks/context-window-monitor.d.ts +7 -1
  17. package/dist/hooks/context-window-monitor.d.ts.map +1 -1
  18. package/dist/hooks/decision-trace-hook.d.ts +3 -0
  19. package/dist/hooks/decision-trace-hook.d.ts.map +1 -1
  20. package/dist/hooks/event-log-hook.d.ts +19 -3
  21. package/dist/hooks/event-log-hook.d.ts.map +1 -1
  22. package/dist/hooks/guard-rails.d.ts +16 -5
  23. package/dist/hooks/guard-rails.d.ts.map +1 -1
  24. package/dist/hooks/orchestrator-guard-hook.d.ts +8 -5
  25. package/dist/hooks/orchestrator-guard-hook.d.ts.map +1 -1
  26. package/dist/hooks/shell-env-hook.d.ts.map +1 -1
  27. package/dist/hooks/tool-guard.d.ts +19 -3
  28. package/dist/hooks/tool-guard.d.ts.map +1 -1
  29. package/dist/index.d.ts.map +1 -1
  30. package/dist/index.js +8401 -4863
  31. package/dist/services/agent-contract-registry.d.ts.map +1 -1
  32. package/dist/services/agent-trace-graph.d.ts +4 -0
  33. package/dist/services/agent-trace-graph.d.ts.map +1 -1
  34. package/dist/services/agent-validator.d.ts +2 -1
  35. package/dist/services/agent-validator.d.ts.map +1 -1
  36. package/dist/services/approval-manager.d.ts +14 -1
  37. package/dist/services/approval-manager.d.ts.map +1 -1
  38. package/dist/services/audit-log.d.ts +23 -0
  39. package/dist/services/audit-log.d.ts.map +1 -0
  40. package/dist/services/context-ingress.d.ts +75 -0
  41. package/dist/services/context-ingress.d.ts.map +1 -0
  42. package/dist/services/deadlock-detector.d.ts.map +1 -1
  43. package/dist/services/delegation-budget.d.ts +55 -0
  44. package/dist/services/delegation-budget.d.ts.map +1 -0
  45. package/dist/services/event-logger.d.ts +3 -1
  46. package/dist/services/event-logger.d.ts.map +1 -1
  47. package/dist/services/execution-substrate.d.ts +35 -0
  48. package/dist/services/execution-substrate.d.ts.map +1 -0
  49. package/dist/services/harness-controller.d.ts +58 -0
  50. package/dist/services/harness-controller.d.ts.map +1 -0
  51. package/dist/services/harness-policy.d.ts +24 -0
  52. package/dist/services/harness-policy.d.ts.map +1 -0
  53. package/dist/services/harness-types.d.ts +178 -0
  54. package/dist/services/harness-types.d.ts.map +1 -0
  55. package/dist/services/lazy-rule-loader.d.ts +2 -0
  56. package/dist/services/lazy-rule-loader.d.ts.map +1 -1
  57. package/dist/services/loop-detector.d.ts.map +1 -1
  58. package/dist/services/prompt-cache.d.ts +25 -0
  59. package/dist/services/prompt-cache.d.ts.map +1 -0
  60. package/dist/services/recovery-layer.d.ts +26 -0
  61. package/dist/services/recovery-layer.d.ts.map +1 -0
  62. package/dist/services/run-trace.d.ts +17 -0
  63. package/dist/services/run-trace.d.ts.map +1 -1
  64. package/dist/services/state-persistence.d.ts +22 -0
  65. package/dist/services/state-persistence.d.ts.map +1 -0
  66. package/dist/services/supervisor-binding.d.ts +9 -0
  67. package/dist/services/supervisor-binding.d.ts.map +1 -1
  68. package/dist/services/token-metrics.d.ts +39 -0
  69. package/dist/services/token-metrics.d.ts.map +1 -0
  70. package/dist/services/verification-layer.d.ts +24 -0
  71. package/dist/services/verification-layer.d.ts.map +1 -0
  72. package/dist/services/workflow-scorecard.d.ts +5 -0
  73. package/dist/services/workflow-scorecard.d.ts.map +1 -1
  74. package/dist/tools/decision-trace.d.ts +4 -0
  75. package/dist/tools/decision-trace.d.ts.map +1 -1
  76. package/dist/tools/delegate.d.ts +16 -0
  77. package/dist/tools/delegate.d.ts.map +1 -0
  78. package/dist/tools/failure-replay.d.ts +8 -0
  79. package/dist/tools/failure-replay.d.ts.map +1 -1
  80. package/dist/tools/policy-engine.d.ts +1 -0
  81. package/dist/tools/policy-engine.d.ts.map +1 -1
  82. package/docs/concepts/HARNESS_ARCHITECTURE.md +241 -0
  83. package/docs/concepts/HARNESS_LAYERS.md +378 -0
  84. package/docs/concepts/HARNESS_WIRING.md +404 -0
  85. package/docs/getting-started/installation.md +0 -18
  86. package/docs/index.md +0 -1
  87. package/docs/reference/hooks.md +1 -16
  88. package/package.json +6 -6
  89. package/src/commands/fd-guarded-edit.md +69 -0
  90. package/src/rules/common/agent-defense.md +66 -0
  91. package/src/rules/common/agent-orchestration.md +35 -1
  92. package/src/skills/context-budget/SKILL.md +266 -0
  93. package/src/skills/context-guard/SKILL.md +172 -0
  94. package/src/skills/context-steward/SKILL.md +297 -0
  95. package/src/skills/decision-trace/SKILL.md +137 -0
  96. package/src/skills/research-first/SKILL.md +344 -0
  97. package/src/skills/session-persistence/SKILL.md +320 -0
  98. package/src/skills/telemetry-steward/SKILL.md +191 -0
  99. package/dist/services/rtk-manager.d.ts +0 -80
  100. package/dist/services/rtk-manager.d.ts.map +0 -1
  101. package/dist/services/rtk-policy.d.ts +0 -26
  102. package/dist/services/rtk-policy.d.ts.map +0 -1
  103. package/dist/tools/rtk-setup.d.ts +0 -22
  104. package/dist/tools/rtk-setup.d.ts.map +0 -1
  105. package/docs/reference/rtk.md +0 -162
@@ -0,0 +1,241 @@
1
+ # FlowDeck Target Harness Architecture
2
+
3
+ **Status**: Proposed
4
+ **Scope**: Transform FlowDeck from a prompt-heavy plugin into a real agent-harness engineering runtime while staying OpenCode-native.
5
+
6
+ ## 1. Core idea
7
+
8
+ Today FlowDeck registers agents, rules, skills, commands, hooks and tools, but most critical behavior lives in prompts (orchestrator prompt, command markdown). Several runtime services exist as code and tests but are not wired into the plugin lifecycle. The target architecture moves three things into runtime enforcement:
9
+
10
+ 1. **Delegation is explicit.** The orchestrator no longer "asks" the model to route; it calls a `delegate` tool that the harness executes, tracks, and budgets.
11
+ 2. **Policy is executable.** Agent contracts, supervisor review, approval gates, and loop/deadlock detection run in hooks/services, not only in prompt text.
12
+ 3. **State is first-class.** Workflow state, run traces, agent spans, approvals, and observations are persisted, queryable, and used for recovery, review, and audit.
13
+
14
+ The model is still the reasoner, but the harness owns execution, state, and governance.
15
+
16
+ ## 2. Design principles
17
+
18
+ - **Correctness first**: use existing working services before inventing new ones.
19
+ - **OpenCode-native**: keep tools, permissions allow/deny/ask, agents, skills, hooks, and config.
20
+ - **Prompts describe, runtime enforces**: contracts, budgets, gates, and routing are checked in code.
21
+ - **Minimum surface area**: only expose what callers (agents/commands) need.
22
+ - **Testable layers**: each layer has narrow interfaces and can be exercised without the full OpenCode runtime.
23
+
24
+ ## 3. High-level component diagram
25
+
26
+ ```
27
+ ┌─────────────────────────────────────────────────────────────────────────┐
28
+ │ OpenCode host │
29
+ │ (model, session, native tools, permissions, agents, commands, skills) │
30
+ └─────────────────────────────────────────────────────────────────────────┘
31
+
32
+
33
+ ┌─────────────────────────────────────────────────────────────────────────┐
34
+ │ FlowDeck plugin (src/index.ts) │
35
+ │ registers agents, tools, hooks, MCPs, commands, skills, rules │
36
+ └─────────────────────────────────────────────────────────────────────────┘
37
+
38
+ ┌───────────────────────────┼───────────────────────────┐
39
+ ▼ ▼ ▼
40
+ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
41
+ │ ContextIngress│ │ ActionMediator│ │ExecutionSubstrate
42
+ │ Service │ │ Service │ │ Service │
43
+ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘
44
+ │ │ │
45
+ ▼ ▼ ▼
46
+ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
47
+ │StatePersistence│ │Verification& │ │Recovery& │
48
+ │ Service │ │ Review │ │ Debugging │
49
+ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘
50
+ │ │ │
51
+ └──────────────────────────┼──────────────────────────┘
52
+
53
+ ┌─────────────────────────────┐
54
+ │ Delegation & Coordination │
55
+ │ (orchestrator + router) │
56
+ └─────────────────────────────┘
57
+
58
+
59
+ ┌─────────────────────────────┐
60
+ │ Governance & Audit │
61
+ │ (contracts, approvals, logs)│
62
+ └─────────────────────────────┘
63
+ ```
64
+
65
+ ## 4. End-to-end data flow
66
+
67
+ A typical user request (`/fd-quick "add auth middleware"`) flows through the harness:
68
+
69
+ 1. **Command entry** — OpenCode fires `command.execute.before`/`after`. The harness starts a `RunTrace` (run_id).
70
+ 2. **Context ingress** — `ContextIngressService` assembles `STATE.md`, `PLAN.md`, `.codebase/` docs, recent events, relevant skills/rules, and a token-budget snapshot. It short-circuits to the trivial-chat path if the request is a simple question.
71
+ 3. **Routing** — `quick-router` + `workflow-router` classify the task and produce a `WorkflowRoute` (workflow class + stage sequence). `model-router` provides complexity/eligible-agent hints.
72
+ 4. **Delegation** — The orchestrator calls the `delegate` tool. `ActionMediator` validates the target agent against `agent-contract-registry`, runs `agent-validator`, checks `supervisor-binding`, and enforces the delegation budget.
73
+ 5. **Execution** — `ExecutionSubstrate` opens an `AgentSpan` (agent-trace-graph), tracks the child session, applies tool lifecycle hooks, and records cost/time.
74
+ 6. **Tool mediation** — On every `tool.execute.before`, `ActionMediator` normalizes args, classifies risk, runs approval gates (`approval-manager`), arch constraints, phase gates, loop detection, and orchestrator guard.
75
+ 7. **State persistence** — Each meaningful change writes to `.planning/STATE.md`, `.codebase/RUNS.jsonl`, `.codebase/AGENT_SPANS.jsonl`, `.opencode/flowdeck-events.jsonl`, or `.codebase/APPROVALS.json`.
76
+ 8. **Verification** — At stage boundaries `VerificationService` checks tests, coverage, review verdict, and design approval before allowing the next stage.
77
+ 9. **Recovery** — If a span fails or a deadlock/loop signal fires, `RecoveryService` classifies the failure, bounds retries, and either re-routes, escalates, or stops.
78
+ 10. **Audit** — On run end, `WorkflowScorecard` is generated and `AGENT_PERF.json` is updated.
79
+
80
+ ## 5. Key interface contracts
81
+
82
+ These interfaces are the contracts between layers. Implementations may be added incrementally.
83
+
84
+ ### 5.1 Context ingress
85
+
86
+ ```typescript
87
+ // src/services/context-ingress.ts
88
+ export interface AssembledContext {
89
+ runId: string;
90
+ sessionId: string;
91
+ projectRoot: string;
92
+ state: PlanningState;
93
+ route: WorkflowRoute | null;
94
+ relevantRules: string[];
95
+ relevantSkills: string[];
96
+ recentEvents: ToolEvent[];
97
+ observations: Observation[];
98
+ tokenBudget: TokenBudgetSnapshot;
99
+ isTrivialChat: boolean;
100
+ }
101
+
102
+ export interface ContextIngressService {
103
+ assemble(input: { command: string; args: string; sessionId: string }): Promise<AssembledContext>;
104
+ refreshRunId(ctx: AssembledContext): AssembledContext;
105
+ snapshotBudget(ctx: AssembledContext): TokenBudgetSnapshot;
106
+ }
107
+ ```
108
+
109
+ ### 5.2 Action mediator
110
+
111
+ ```typescript
112
+ // src/services/action-mediator.ts
113
+ export interface ActionRequest {
114
+ toolName: string;
115
+ args: Record<string, unknown>;
116
+ agentName?: string;
117
+ runId: string;
118
+ sessionId: string;
119
+ }
120
+
121
+ export interface ActionDecision {
122
+ action: "allow" | "block" | "ask" | "escalate";
123
+ reason: string;
124
+ riskScore: number;
125
+ requiredApprovalId?: string;
126
+ }
127
+
128
+ export interface ActionMediatorService {
129
+ check(request: ActionRequest): ActionDecision;
130
+ recordOutcome(request: ActionRequest, decision: ActionDecision, output: unknown): void;
131
+ }
132
+ ```
133
+
134
+ ### 5.3 Delegation
135
+
136
+ ```typescript
137
+ // src/tools/delegate.ts
138
+ export interface DelegateInput {
139
+ target: "agent" | "command";
140
+ name: string; // e.g. "backend-coder" or "fd-plan"
141
+ taskDescription: string;
142
+ contextSummary?: string;
143
+ mode?: "quick" | "standard" | "explore" | "verify-heavy";
144
+ parentSpanId?: string;
145
+ }
146
+
147
+ export interface DelegateResult {
148
+ spanId: string;
149
+ childSessionId?: string;
150
+ status: "running" | "blocked" | "escalated";
151
+ reason?: string;
152
+ }
153
+ ```
154
+
155
+ ### 5.4 Run pipeline
156
+
157
+ ```typescript
158
+ // src/tools/run-pipeline.ts
159
+ export interface RunPipelineInput {
160
+ workflowClass: WorkflowClass;
161
+ stages?: string[]; // optional override
162
+ taskDescription: string;
163
+ confirm?: boolean;
164
+ }
165
+
166
+ export interface RunPipelineResult {
167
+ runId: string;
168
+ completedStages: string[];
169
+ currentStage: string | null;
170
+ blocked: boolean;
171
+ blockedReason?: string;
172
+ }
173
+ ```
174
+
175
+ ### 5.5 Delegation budget
176
+
177
+ ```typescript
178
+ // src/services/delegation-budget.ts
179
+ export interface DelegationBudget {
180
+ runId: string;
181
+ maxToolCalls: number;
182
+ maxDepth: number;
183
+ maxSameStepRetries: number;
184
+ spentToolCalls: number;
185
+ currentDepth: number;
186
+ }
187
+
188
+ export interface DelegationBudgetService {
189
+ init(runId: string, config?: Partial<DelegationBudget>): DelegationBudget;
190
+ checkSpend(runId: string, toolName: string): { ok: boolean; remaining: number };
191
+ recordDelegation(parentRunId: string, childRunId: string): boolean;
192
+ }
193
+ ```
194
+
195
+ ## 6. State files
196
+
197
+ | File | Owner | Purpose | Lifecycle |
198
+ |------|-------|---------|-----------|
199
+ | `.planning/STATE.md` | `planning-state` tool | Current phase, plan confirmation, design gates | Long-lived, updated per phase |
200
+ | `.planning/PLAN.md` | `planning-state` tool | Numbered plan steps | Long-lived, created per feature |
201
+ | `.codebase/RUNS.jsonl` | `run-trace` service | Command-level run history | Append-only |
202
+ | `.codebase/AGENT_SPANS.jsonl` | `agent-trace-graph` service | Causal agent delegation spans | Append-only |
203
+ | `.codebase/SCORECARDS.jsonl` | `workflow-scorecard` service | 10-dimension run quality scores | Append-only |
204
+ | `.codebase/DEADLOCK_SIGNALS.jsonl` | `deadlock-detector` service | Detected loop/deadlock signals | Append-only |
205
+ | `.codebase/APPROVALS.json` | `approval-manager` service | Pending/approved sensitive operations | Mutable |
206
+ | `.codebase/AGENT_PERF.json` | `agent-performance` service | Per-agent/model/task success stats | Mutable |
207
+ | `.codebase/WORKFLOW_ROUTING.jsonl` | `workflow-router` service | Routing decisions and escalations | Append-only |
208
+ | `.opencode/flowdeck-events.jsonl` | `event-logger` service | Raw tool/session events | Append-only, rotated |
209
+
210
+ ## 7. Failure modes and recovery
211
+
212
+ | Failure | Detection | Recovery |
213
+ |---------|-----------|----------|
214
+ | Same tool repeated with same result | `LoopDetector` in `tool.execute.before` | Block + escalation message |
215
+ | Agent bounce / circular delegation | `deadlock-detector` over `AGENT_SPANS.jsonl` | Log signal, auto-stop if configured |
216
+ | Budget exhausted | `DelegationBudgetService` | Warn / stop / escalate based on `governance.costBudget.onExhaustion` |
217
+ | Approval missing | `approval-hook` + `ActionMediator` | Block with `APPROVAL_REQUIRED` and approval id |
218
+ | Contract violation | `agent-validator` | Advisory warning or strict block |
219
+ | Child session error | `event` hook `session.error` | Close span as `failed`, surface to parent |
220
+ | Tool persistence failure | `event-logger` health flag | Loop detection falls back to in-memory, logs warning |
221
+ | Unregistered target | `supervisor-binding` + `command-validator` | Block before execution |
222
+
223
+ ## 8. Security considerations
224
+
225
+ - Secrets never enter state files; event args are sanitized by `sanitizeArgs`.
226
+ - Sensitive paths require explicit approval stored in `.codebase/APPROVALS.json` with TTL.
227
+ - Orchestrator cannot use write/edit/bash tools; `OrchestratorGuard` throws.
228
+ - Arch constraints in `.codebase/CONSTRAINTS.md` block edits to forbidden paths.
229
+ - Phase gates block implementation during discuss/plan phases.
230
+ - Tool guard blocks dangerous bash/read/write patterns when enabled.
231
+
232
+ ## 9. Migration path
233
+
234
+ The harness is built incrementally. Each layer can be merged independently:
235
+
236
+ 1. Wire existing unwired services into `src/index.ts` (agent validator, trace graph, run trace, scorecard, deadlock detector, delegation budget). Existing behavior remains opt-in or advisory.
237
+ 2. Add `delegate` and `run-pipeline` tools behind feature flags; keep markdown commands as fallback.
238
+ 3. Replace prompt-based routing directives with tool calls once delegation is stable.
239
+ 4. Promote governance from advisory to strict via `flowdeck.json`.
240
+
241
+ No existing public API is broken in step 1.
@@ -0,0 +1,378 @@
1
+ # FlowDeck Harness Layers
2
+
3
+ This document maps each of the eight target harness layers to concrete responsibilities, interfaces, and existing FlowDeck code.
4
+
5
+ ---
6
+
7
+ ## Layer 1: Context ingress
8
+
9
+ **Responsibilities**
10
+
11
+ - Assemble prompt/context from `STATE.md`, `PLAN.md`, `.codebase/` docs, recent events, skills, and rules.
12
+ - Provide a lightweight trivial-chat path for questions that need no workflow.
13
+ - Lazy-load rules/skills based on stage and detected language.
14
+ - Deduplicate context, prune stale entries, and summarize oversized content.
15
+ - Emit token-budget diagnostics so the orchestrator knows how much context remains.
16
+
17
+ **Key types/interfaces**
18
+
19
+ ```typescript
20
+ interface ContextIngressService {
21
+ assemble(input: {
22
+ command: string;
23
+ args: string;
24
+ sessionId: string;
25
+ projectRoot: string;
26
+ }): Promise<AssembledContext>;
27
+ }
28
+
29
+ interface AssembledContext {
30
+ runId: string;
31
+ sessionId: string;
32
+ state: PlanningState;
33
+ route: WorkflowRoute | null;
34
+ relevantRules: string[];
35
+ relevantSkills: string[];
36
+ recentEvents: ToolEvent[];
37
+ observations: Observation[];
38
+ tokenBudget: TokenBudgetSnapshot;
39
+ isTrivialChat: boolean;
40
+ }
41
+ ```
42
+
43
+ **Reused**
44
+
45
+ - `src/services/lazy-rule-loader.ts` — language/stage-based rule discovery and selection.
46
+ - `src/tools/planning-state.ts` + `planning-state-lib.ts` — read/write `STATE.md` and `PLAN.md`.
47
+ - `src/tools/codebase-state.ts` + `repo-memory.ts` — read `.codebase/` docs and architecture graph.
48
+ - `src/services/preflight-explorer.ts` — repo evidence and task-relative context.
49
+ - `src/services/model-router.ts` — complexity classification and stage-aware agent filtering.
50
+ - `src/hooks/context-window-monitor.ts` — token-usage reminder.
51
+
52
+ **Replaced / new**
53
+
54
+ - A new `ContextIngressService` consolidates the current ad-hoc reads scattered across orchestrator prompts and command markdown.
55
+ - Trivial-chat short-circuit is currently implicit in prompts; it becomes an explicit `isTrivialChat` flag.
56
+ - Token-budget diagnostics move from a passive monitor to an active input into routing decisions.
57
+
58
+ ---
59
+
60
+ ## Layer 2: Action mediation
61
+
62
+ **Responsibilities**
63
+
64
+ - Expose the allowed tool surface per agent/role.
65
+ - Normalize and validate tool arguments.
66
+ - Classify risky actions and compute a risk score.
67
+ - Enforce approval gates, arch constraints, phase gates, and orchestrator guard.
68
+ - Prevent unsafe and duplicate execution through a single policy path.
69
+
70
+ **Key types/interfaces**
71
+
72
+ ```typescript
73
+ interface ActionRequest {
74
+ toolName: string;
75
+ args: Record<string, unknown>;
76
+ agentName?: string;
77
+ runId: string;
78
+ sessionId: string;
79
+ }
80
+
81
+ interface ActionDecision {
82
+ action: "allow" | "block" | "ask" | "escalate";
83
+ reason: string;
84
+ riskScore: number;
85
+ requiredApprovalId?: string;
86
+ }
87
+
88
+ interface ActionMediatorService {
89
+ check(request: ActionRequest): ActionDecision;
90
+ recordOutcome(request: ActionRequest, decision: ActionDecision, output: unknown): void;
91
+ }
92
+ ```
93
+
94
+ **Reused**
95
+
96
+ - `src/services/agent-contract-registry.ts` — allowed/forbidden tools and escalation conditions.
97
+ - `src/services/agent-validator.ts` — contract violation detection.
98
+ - `src/services/supervisor-binding.ts` — preflight policy review for commands/agents.
99
+ - `src/services/approval-manager.ts` — approval request/check storage.
100
+ - `src/hooks/orchestrator-guard-hook.ts` — blocks orchestrator from write/edit/bash tools.
101
+ - `src/hooks/tool-guard.ts` — blocks dangerous read/write/bash patterns and arch constraints.
102
+ - `src/hooks/guard-rails.ts` — phase enforcement and design gates.
103
+ - `src/hooks/approval-hook.ts` — sensitive-file approval gate.
104
+ - `src/services/loop-detector.ts` — duplicate/no-progress execution prevention.
105
+
106
+ **Replaced / new**
107
+
108
+ - A new `ActionMediatorService` becomes the single policy path. Today each hook runs independently in `src/index.ts`; the mediator composes them in a defined order and returns one decision.
109
+ - Risk scoring is currently fragmented; the mediator centralizes it.
110
+
111
+ ---
112
+
113
+ ## Layer 3: Execution substrate
114
+
115
+ **Responsibilities**
116
+
117
+ - Provide the real execution environment for commands, tools, and agent delegations.
118
+ - Track command and tool lifecycle.
119
+ - Apply timeouts and budgets.
120
+ - Isolate long-running or risky operations.
121
+ - Emit observability events.
122
+
123
+ **Key types/interfaces**
124
+
125
+ ```typescript
126
+ interface ExecutionSubstrate {
127
+ startRun(command: string, args: Record<string, unknown>, sessionId: string): RunTrace;
128
+ openSpan(input: OpenSpanInput): AgentSpan;
129
+ closeSpan(spanId: string, status: SpanStatus, opts?: CloseSpanOptions): void;
130
+ recordToolCall(spanId: string, toolName: string): void;
131
+ attachTimeout(runId: string, ms: number): void;
132
+ }
133
+ ```
134
+
135
+ **Reused**
136
+
137
+ - `src/services/run-trace.ts` — command-level run lifecycle.
138
+ - `src/services/agent-trace-graph.ts` — causal agent spans.
139
+ - `src/services/event-logger.ts` + `src/hooks/event-log-hook.ts` — tool/session events.
140
+ - `src/services/cost-estimator.ts` — USD cost estimation.
141
+ - `src/services/delegation-budget.ts` (new) — budget envelope.
142
+ - OpenCode native tool execution (the harness wraps it, does not replace it).
143
+
144
+ **Replaced / new**
145
+
146
+ - A new `ExecutionSubstrate` service owns the lifecycle coordination between run trace, agent spans, events, and budget. Currently these are updated separately from hooks.
147
+ - Timeout/budget isolation is mostly absent today; the substrate adds explicit timeouts and long-running-op markers.
148
+
149
+ ---
150
+
151
+ ## Layer 4: State persistence
152
+
153
+ **Responsibilities**
154
+
155
+ - Persist workflow/run state, action history, and observations.
156
+ - Support resumption and recovery across sessions.
157
+ - Prevent loops via remembered attempts.
158
+ - Separate ephemeral state (session cache) from long-lived state (`.planning/`, `.codebase/`).
159
+
160
+ **Key types/interfaces**
161
+
162
+ ```typescript
163
+ interface StatePersistenceService {
164
+ loadRunState(runId: string): RunState | null;
165
+ saveRunState(runId: string, state: RunState): void;
166
+ appendObservation(runId: string, observation: Observation): void;
167
+ getRecentObservations(runId: string, limit?: number): Observation[];
168
+ }
169
+
170
+ interface RunState {
171
+ runId: string;
172
+ workflowClass: WorkflowClass;
173
+ completedStages: string[];
174
+ currentStage: string | null;
175
+ blocked: boolean;
176
+ blockedReason?: string;
177
+ observations: Observation[];
178
+ }
179
+ ```
180
+
181
+ **Reused**
182
+
183
+ - `src/tools/planning-state.ts` — `STATE.md`/`PLAN.md` persistence.
184
+ - `src/services/run-trace.ts` — `RUNS.jsonl`.
185
+ - `src/services/agent-trace-graph.ts` — `AGENT_SPANS.jsonl`.
186
+ - `src/services/event-logger.ts` — `.opencode/flowdeck-events.jsonl`.
187
+ - `src/services/loop-detector.ts` — in-memory remembered attempts.
188
+ - `src/hooks/session-persistence` skill and `src/hooks/session-idle-hook.ts` — session summaries.
189
+
190
+ **Replaced / new**
191
+
192
+ - A new `StatePersistenceService` unifies run, stage, and observation access.
193
+ - Ephemeral vs long-lived state separation is currently implicit; it becomes explicit in the interface.
194
+
195
+ ---
196
+
197
+ ## Layer 5: Verification and review
198
+
199
+ **Responsibilities**
200
+
201
+ - Verify that actions actually happened.
202
+ - Run checks/tests and collect evidence.
203
+ - Distinguish claimed success from verified success.
204
+ - Gate risky completion before the next stage.
205
+
206
+ **Key types/interfaces**
207
+
208
+ ```typescript
209
+ interface VerificationService {
210
+ verifyStage(stage: string, runId: string): VerificationResult;
211
+ checkTests(runId: string): TestResult;
212
+ checkReview(runId: string): ReviewResult;
213
+ }
214
+
215
+ interface VerificationResult {
216
+ passed: boolean;
217
+ evidence: string[];
218
+ blockers: string[];
219
+ }
220
+ ```
221
+
222
+ **Reused**
223
+
224
+ - `src/services/workflow-scorecard.ts` — quality dimensions (TDD, approvals, reviews).
225
+ - `src/services/agent-validator.ts` — contract compliance.
226
+ - `src/services/supervisor-binding.ts` — post-stage review when `postExecutionReview=true`.
227
+ - Existing test-running via bash hooks.
228
+
229
+ **Replaced / new**
230
+
231
+ - A new `VerificationService` moves verification from prompt instructions (`fd-verify`) to runtime checks.
232
+ - Claimed vs verified success is tracked by comparing tool output claims with later verification evidence.
233
+
234
+ ---
235
+
236
+ ## Layer 6: Recovery and debugging
237
+
238
+ **Responsibilities**
239
+
240
+ - Detect no-progress loops and stuck runs.
241
+ - Classify failures and bound retries.
242
+ - Explain blockages to the orchestrator/user.
243
+ - Recover from failures or escalate cleanly.
244
+ - Surface diagnostics.
245
+
246
+ **Key types/interfaces**
247
+
248
+ ```typescript
249
+ interface RecoveryService {
250
+ assessFailure(runId: string, error: unknown): FailureAssessment;
251
+ decideRecovery(assessment: FailureAssessment): RecoveryPlan;
252
+ executeRecovery(plan: RecoveryPlan): RecoveryResult;
253
+ }
254
+
255
+ interface FailureAssessment {
256
+ type: "loop" | "deadlock" | "transient" | "contract" | "budget" | "unknown";
257
+ evidence: string[];
258
+ retryable: boolean;
259
+ }
260
+ ```
261
+
262
+ **Reused**
263
+
264
+ - `src/services/loop-detector.ts` — same-result / no-progress detection.
265
+ - `src/services/deadlock-detector.ts` — agent bounce, circular delegation, stage stall.
266
+ - `src/services/failure-replay.ts` tool — historical failure lookup.
267
+ - `src/services/agent-performance.ts` — success-rate guidance for re-routing.
268
+
269
+ **Replaced / new**
270
+
271
+ - A new `RecoveryService` coordinates loop detector, deadlock detector, and failure replay into one decision path.
272
+ - Retry bounding is currently per-tool; the service bounds retries per-run and per-stage.
273
+
274
+ ---
275
+
276
+ ## Layer 7: Delegation and coordination
277
+
278
+ **Responsibilities**
279
+
280
+ - Orchestrator routes and supervises work.
281
+ - Select the minimal workflow class.
282
+ - Coordinate specialists and the default executor.
283
+ - Maintain parent-child visibility across sessions.
284
+ - Escalate when the initial workflow class is insufficient.
285
+
286
+ **Key types/interfaces**
287
+
288
+ ```typescript
289
+ interface DelegateTool {
290
+ execute(input: DelegateInput): Promise<DelegateResult>;
291
+ }
292
+
293
+ interface RunPipelineTool {
294
+ execute(input: RunPipelineInput): Promise<RunPipelineResult>;
295
+ }
296
+
297
+ interface CoordinationService {
298
+ route(task: string, ctx: AssembledContext): WorkflowRoute;
299
+ escalate(runId: string, from: WorkflowClass, to: WorkflowClass, reason: string): void;
300
+ }
301
+ ```
302
+
303
+ **Reused**
304
+
305
+ - `src/agents/orchestrator.ts` — agent definition and routing prompt.
306
+ - `src/agents/default-executor.ts` — direct execution worker.
307
+ - `src/services/quick-router.ts` — task classification and stage sequence.
308
+ - `src/services/workflow-router.ts` — adaptive workflow class selection.
309
+ - `src/services/model-router.ts` — complexity/agent-tier hints.
310
+ - `src/services/agent-trace-graph.ts` — parent/child span linkage.
311
+
312
+ **Replaced / new**
313
+
314
+ - New `delegate` and `run-pipeline` tools turn prompt-based routing into imperative calls.
315
+ - A new `CoordinationService` owns workflow class selection and escalation logic that currently lives in the orchestrator prompt.
316
+
317
+ ---
318
+
319
+ ## Layer 8: Governance and audit
320
+
321
+ **Responsibilities**
322
+
323
+ - Enforce permissions and approvals.
324
+ - Track sensitive actions.
325
+ - Log workflow decisions.
326
+ - Provide auditability.
327
+ - Keep destructive actions human-governed.
328
+
329
+ **Key types/interfaces**
330
+
331
+ ```typescript
332
+ interface GovernanceService {
333
+ reviewTarget(target: string, ctx: SupervisorContext): SupervisorDecision;
334
+ recordDecision(decision: SupervisorDecision): void;
335
+ isActionAllowed(action: string, runId: string): boolean;
336
+ }
337
+
338
+ interface AuditLogEntry {
339
+ runId: string;
340
+ timestamp: string;
341
+ actor: string;
342
+ action: string;
343
+ decision: string;
344
+ reason: string;
345
+ }
346
+ ```
347
+
348
+ **Reused**
349
+
350
+ - `src/services/agent-contract-registry.ts` — capability contracts.
351
+ - `src/services/agent-validator.ts` — contract enforcement.
352
+ - `src/services/supervisor-binding.ts` — structured approve/revise/block/escalate decisions.
353
+ - `src/services/approval-manager.ts` — approval workflow.
354
+ - `src/services/command-validator.ts` — registered command validation.
355
+ - `src/services/workflow-scorecard.ts` — run-level audit score.
356
+ - `src/services/run-trace.ts` + `agent-trace-graph.ts` + `event-logger.ts` — decision and action logs.
357
+ - `src/tools/decision-trace.ts` — explicit decision recording.
358
+ - `src/tools/policy-engine.ts` — policy storage and query.
359
+
360
+ **Replaced / new**
361
+
362
+ - A new `GovernanceService` composes contracts, supervisor, approvals, and command validation into a single governance surface.
363
+ - Sensitive-action tracking moves from opt-in hooks to always-on audit logging.
364
+
365
+ ---
366
+
367
+ ## Layer-to-file quick reference
368
+
369
+ | Layer | Primary new file | Main existing files reused |
370
+ |-------|------------------|----------------------------|
371
+ | Context ingress | `src/services/context-ingress.ts` | `lazy-rule-loader`, `planning-state`, `codebase-state`, `repo-memory`, `preflight-explorer`, `model-router`, `context-window-monitor` |
372
+ | Action mediation | `src/services/action-mediator.ts` | `agent-contract-registry`, `agent-validator`, `supervisor-binding`, `approval-manager`, `orchestrator-guard-hook`, `tool-guard`, `guard-rails`, `approval-hook`, `loop-detector` |
373
+ | Execution substrate | `src/services/execution-substrate.ts` | `run-trace`, `agent-trace-graph`, `event-logger`, `event-log-hook`, `cost-estimator`, `delegation-budget` |
374
+ | State persistence | `src/services/state-persistence.ts` | `planning-state`, `run-trace`, `agent-trace-graph`, `event-logger`, `loop-detector`, `session-idle-hook` |
375
+ | Verification & review | `src/services/verification.ts` | `workflow-scorecard`, `agent-validator`, `supervisor-binding` |
376
+ | Recovery & debugging | `src/services/recovery.ts` | `loop-detector`, `deadlock-detector`, `failure-replay`, `agent-performance` |
377
+ | Delegation & coordination | `src/tools/delegate.ts`, `src/tools/run-pipeline.ts`, `src/services/coordination.ts` | `agents/orchestrator`, `agents/default-executor`, `quick-router`, `workflow-router`, `model-router`, `agent-trace-graph` |
378
+ | Governance & audit | `src/services/governance.ts` | `agent-contract-registry`, `agent-validator`, `supervisor-binding`, `approval-manager`, `command-validator`, `workflow-scorecard`, `run-trace`, `agent-trace-graph`, `event-logger`, `decision-trace`, `policy-engine` |