@dv.nghiem/flowdeck 0.4.12 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/agents/orchestrator.d.ts.map +1 -1
  2. package/dist/config/index.d.ts +1 -1
  3. package/dist/config/index.d.ts.map +1 -1
  4. package/dist/config/schema.d.ts +27 -1
  5. package/dist/config/schema.d.ts.map +1 -1
  6. package/dist/hooks/approval-hook.d.ts +16 -2
  7. package/dist/hooks/approval-hook.d.ts.map +1 -1
  8. package/dist/hooks/compaction-hook.d.ts +1 -1
  9. package/dist/hooks/compaction-hook.d.ts.map +1 -1
  10. package/dist/hooks/context-window-monitor.d.ts +7 -1
  11. package/dist/hooks/context-window-monitor.d.ts.map +1 -1
  12. package/dist/hooks/decision-trace-hook.d.ts +3 -0
  13. package/dist/hooks/decision-trace-hook.d.ts.map +1 -1
  14. package/dist/hooks/event-log-hook.d.ts +19 -3
  15. package/dist/hooks/event-log-hook.d.ts.map +1 -1
  16. package/dist/hooks/guard-rails.d.ts +16 -5
  17. package/dist/hooks/guard-rails.d.ts.map +1 -1
  18. package/dist/hooks/orchestrator-guard-hook.d.ts +8 -5
  19. package/dist/hooks/orchestrator-guard-hook.d.ts.map +1 -1
  20. package/dist/hooks/tool-guard.d.ts +19 -3
  21. package/dist/hooks/tool-guard.d.ts.map +1 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +8367 -4620
  24. package/dist/services/agent-contract-registry.d.ts.map +1 -1
  25. package/dist/services/agent-trace-graph.d.ts +4 -0
  26. package/dist/services/agent-trace-graph.d.ts.map +1 -1
  27. package/dist/services/agent-validator.d.ts +2 -1
  28. package/dist/services/agent-validator.d.ts.map +1 -1
  29. package/dist/services/approval-manager.d.ts +14 -1
  30. package/dist/services/approval-manager.d.ts.map +1 -1
  31. package/dist/services/audit-log.d.ts +23 -0
  32. package/dist/services/audit-log.d.ts.map +1 -0
  33. package/dist/services/context-ingress.d.ts +75 -0
  34. package/dist/services/context-ingress.d.ts.map +1 -0
  35. package/dist/services/deadlock-detector.d.ts.map +1 -1
  36. package/dist/services/delegation-budget.d.ts +55 -0
  37. package/dist/services/delegation-budget.d.ts.map +1 -0
  38. package/dist/services/event-logger.d.ts +3 -1
  39. package/dist/services/event-logger.d.ts.map +1 -1
  40. package/dist/services/execution-substrate.d.ts +35 -0
  41. package/dist/services/execution-substrate.d.ts.map +1 -0
  42. package/dist/services/harness-controller.d.ts +58 -0
  43. package/dist/services/harness-controller.d.ts.map +1 -0
  44. package/dist/services/harness-policy.d.ts +24 -0
  45. package/dist/services/harness-policy.d.ts.map +1 -0
  46. package/dist/services/harness-types.d.ts +178 -0
  47. package/dist/services/harness-types.d.ts.map +1 -0
  48. package/dist/services/lazy-rule-loader.d.ts +2 -0
  49. package/dist/services/lazy-rule-loader.d.ts.map +1 -1
  50. package/dist/services/prompt-cache.d.ts +25 -0
  51. package/dist/services/prompt-cache.d.ts.map +1 -0
  52. package/dist/services/recovery-layer.d.ts +26 -0
  53. package/dist/services/recovery-layer.d.ts.map +1 -0
  54. package/dist/services/run-trace.d.ts +17 -0
  55. package/dist/services/run-trace.d.ts.map +1 -1
  56. package/dist/services/state-persistence.d.ts +22 -0
  57. package/dist/services/state-persistence.d.ts.map +1 -0
  58. package/dist/services/supervisor-binding.d.ts +9 -0
  59. package/dist/services/supervisor-binding.d.ts.map +1 -1
  60. package/dist/services/token-metrics.d.ts +39 -0
  61. package/dist/services/token-metrics.d.ts.map +1 -0
  62. package/dist/services/verification-layer.d.ts +24 -0
  63. package/dist/services/verification-layer.d.ts.map +1 -0
  64. package/dist/services/workflow-scorecard.d.ts +5 -0
  65. package/dist/services/workflow-scorecard.d.ts.map +1 -1
  66. package/dist/tools/decision-trace.d.ts +4 -0
  67. package/dist/tools/decision-trace.d.ts.map +1 -1
  68. package/dist/tools/delegate.d.ts +16 -0
  69. package/dist/tools/delegate.d.ts.map +1 -0
  70. package/dist/tools/failure-replay.d.ts +8 -0
  71. package/dist/tools/failure-replay.d.ts.map +1 -1
  72. package/dist/tools/policy-engine.d.ts +1 -0
  73. package/dist/tools/policy-engine.d.ts.map +1 -1
  74. package/docs/concepts/HARNESS_ARCHITECTURE.md +241 -0
  75. package/docs/concepts/HARNESS_LAYERS.md +378 -0
  76. package/docs/concepts/HARNESS_WIRING.md +404 -0
  77. package/package.json +1 -1
  78. package/src/commands/fd-guarded-edit.md +69 -0
@@ -0,0 +1,378 @@
1
+ # FlowDeck Harness Layers
2
+
3
+ This document maps each of the eight target harness layers to concrete responsibilities, interfaces, and existing FlowDeck code.
4
+
5
+ ---
6
+
7
+ ## Layer 1: Context ingress
8
+
9
+ **Responsibilities**
10
+
11
+ - Assemble prompt/context from `STATE.md`, `PLAN.md`, `.codebase/` docs, recent events, skills, and rules.
12
+ - Provide a lightweight trivial-chat path for questions that need no workflow.
13
+ - Lazy-load rules/skills based on stage and detected language.
14
+ - Deduplicate context, prune stale entries, and summarize oversized content.
15
+ - Emit token-budget diagnostics so the orchestrator knows how much context remains.
16
+
17
+ **Key types/interfaces**
18
+
19
+ ```typescript
20
+ interface ContextIngressService {
21
+ assemble(input: {
22
+ command: string;
23
+ args: string;
24
+ sessionId: string;
25
+ projectRoot: string;
26
+ }): Promise<AssembledContext>;
27
+ }
28
+
29
+ interface AssembledContext {
30
+ runId: string;
31
+ sessionId: string;
32
+ state: PlanningState;
33
+ route: WorkflowRoute | null;
34
+ relevantRules: string[];
35
+ relevantSkills: string[];
36
+ recentEvents: ToolEvent[];
37
+ observations: Observation[];
38
+ tokenBudget: TokenBudgetSnapshot;
39
+ isTrivialChat: boolean;
40
+ }
41
+ ```
42
+
43
+ **Reused**
44
+
45
+ - `src/services/lazy-rule-loader.ts` — language/stage-based rule discovery and selection.
46
+ - `src/tools/planning-state.ts` + `planning-state-lib.ts` — read/write `STATE.md` and `PLAN.md`.
47
+ - `src/tools/codebase-state.ts` + `repo-memory.ts` — read `.codebase/` docs and architecture graph.
48
+ - `src/services/preflight-explorer.ts` — repo evidence and task-relative context.
49
+ - `src/services/model-router.ts` — complexity classification and stage-aware agent filtering.
50
+ - `src/hooks/context-window-monitor.ts` — token-usage reminder.
51
+
52
+ **Replaced / new**
53
+
54
+ - A new `ContextIngressService` consolidates the current ad-hoc reads scattered across orchestrator prompts and command markdown.
55
+ - Trivial-chat short-circuit is currently implicit in prompts; it becomes an explicit `isTrivialChat` flag.
56
+ - Token-budget diagnostics move from a passive monitor to an active input into routing decisions.
57
+
58
+ ---
59
+
60
+ ## Layer 2: Action mediation
61
+
62
+ **Responsibilities**
63
+
64
+ - Expose the allowed tool surface per agent/role.
65
+ - Normalize and validate tool arguments.
66
+ - Classify risky actions and compute a risk score.
67
+ - Enforce approval gates, arch constraints, phase gates, and orchestrator guard.
68
+ - Prevent unsafe and duplicate execution through a single policy path.
69
+
70
+ **Key types/interfaces**
71
+
72
+ ```typescript
73
+ interface ActionRequest {
74
+ toolName: string;
75
+ args: Record<string, unknown>;
76
+ agentName?: string;
77
+ runId: string;
78
+ sessionId: string;
79
+ }
80
+
81
+ interface ActionDecision {
82
+ action: "allow" | "block" | "ask" | "escalate";
83
+ reason: string;
84
+ riskScore: number;
85
+ requiredApprovalId?: string;
86
+ }
87
+
88
+ interface ActionMediatorService {
89
+ check(request: ActionRequest): ActionDecision;
90
+ recordOutcome(request: ActionRequest, decision: ActionDecision, output: unknown): void;
91
+ }
92
+ ```
93
+
94
+ **Reused**
95
+
96
+ - `src/services/agent-contract-registry.ts` — allowed/forbidden tools and escalation conditions.
97
+ - `src/services/agent-validator.ts` — contract violation detection.
98
+ - `src/services/supervisor-binding.ts` — preflight policy review for commands/agents.
99
+ - `src/services/approval-manager.ts` — approval request/check storage.
100
+ - `src/hooks/orchestrator-guard-hook.ts` — blocks orchestrator from write/edit/bash tools.
101
+ - `src/hooks/tool-guard.ts` — blocks dangerous read/write/bash patterns and arch constraints.
102
+ - `src/hooks/guard-rails.ts` — phase enforcement and design gates.
103
+ - `src/hooks/approval-hook.ts` — sensitive-file approval gate.
104
+ - `src/services/loop-detector.ts` — duplicate/no-progress execution prevention.
105
+
106
+ **Replaced / new**
107
+
108
+ - A new `ActionMediatorService` becomes the single policy path. Today each hook runs independently in `src/index.ts`; the mediator composes them in a defined order and returns one decision.
109
+ - Risk scoring is currently fragmented; the mediator centralizes it.
110
+
111
+ ---
112
+
113
+ ## Layer 3: Execution substrate
114
+
115
+ **Responsibilities**
116
+
117
+ - Provide the real execution environment for commands, tools, and agent delegations.
118
+ - Track command and tool lifecycle.
119
+ - Apply timeouts and budgets.
120
+ - Isolate long-running or risky operations.
121
+ - Emit observability events.
122
+
123
+ **Key types/interfaces**
124
+
125
+ ```typescript
126
+ interface ExecutionSubstrate {
127
+ startRun(command: string, args: Record<string, unknown>, sessionId: string): RunTrace;
128
+ openSpan(input: OpenSpanInput): AgentSpan;
129
+ closeSpan(spanId: string, status: SpanStatus, opts?: CloseSpanOptions): void;
130
+ recordToolCall(spanId: string, toolName: string): void;
131
+ attachTimeout(runId: string, ms: number): void;
132
+ }
133
+ ```
134
+
135
+ **Reused**
136
+
137
+ - `src/services/run-trace.ts` — command-level run lifecycle.
138
+ - `src/services/agent-trace-graph.ts` — causal agent spans.
139
+ - `src/services/event-logger.ts` + `src/hooks/event-log-hook.ts` — tool/session events.
140
+ - `src/services/cost-estimator.ts` — USD cost estimation.
141
+ - `src/services/delegation-budget.ts` (new) — budget envelope.
142
+ - OpenCode native tool execution (the harness wraps it, does not replace it).
143
+
144
+ **Replaced / new**
145
+
146
+ - A new `ExecutionSubstrate` service owns the lifecycle coordination between run trace, agent spans, events, and budget. Currently these are updated separately from hooks.
147
+ - Timeout/budget isolation is mostly absent today; the substrate adds explicit timeouts and long-running-op markers.
148
+
149
+ ---
150
+
151
+ ## Layer 4: State persistence
152
+
153
+ **Responsibilities**
154
+
155
+ - Persist workflow/run state, action history, and observations.
156
+ - Support resumption and recovery across sessions.
157
+ - Prevent loops via remembered attempts.
158
+ - Separate ephemeral state (session cache) from long-lived state (`.planning/`, `.codebase/`).
159
+
160
+ **Key types/interfaces**
161
+
162
+ ```typescript
163
+ interface StatePersistenceService {
164
+ loadRunState(runId: string): RunState | null;
165
+ saveRunState(runId: string, state: RunState): void;
166
+ appendObservation(runId: string, observation: Observation): void;
167
+ getRecentObservations(runId: string, limit?: number): Observation[];
168
+ }
169
+
170
+ interface RunState {
171
+ runId: string;
172
+ workflowClass: WorkflowClass;
173
+ completedStages: string[];
174
+ currentStage: string | null;
175
+ blocked: boolean;
176
+ blockedReason?: string;
177
+ observations: Observation[];
178
+ }
179
+ ```
180
+
181
+ **Reused**
182
+
183
+ - `src/tools/planning-state.ts` — `STATE.md`/`PLAN.md` persistence.
184
+ - `src/services/run-trace.ts` — `RUNS.jsonl`.
185
+ - `src/services/agent-trace-graph.ts` — `AGENT_SPANS.jsonl`.
186
+ - `src/services/event-logger.ts` — `.opencode/flowdeck-events.jsonl`.
187
+ - `src/services/loop-detector.ts` — in-memory remembered attempts.
188
+ - `src/hooks/session-persistence` skill and `src/hooks/session-idle-hook.ts` — session summaries.
189
+
190
+ **Replaced / new**
191
+
192
+ - A new `StatePersistenceService` unifies run, stage, and observation access.
193
+ - Ephemeral vs long-lived state separation is currently implicit; it becomes explicit in the interface.
194
+
195
+ ---
196
+
197
+ ## Layer 5: Verification and review
198
+
199
+ **Responsibilities**
200
+
201
+ - Verify that actions actually happened.
202
+ - Run checks/tests and collect evidence.
203
+ - Distinguish claimed success from verified success.
204
+ - Gate risky completion before the next stage.
205
+
206
+ **Key types/interfaces**
207
+
208
+ ```typescript
209
+ interface VerificationService {
210
+ verifyStage(stage: string, runId: string): VerificationResult;
211
+ checkTests(runId: string): TestResult;
212
+ checkReview(runId: string): ReviewResult;
213
+ }
214
+
215
+ interface VerificationResult {
216
+ passed: boolean;
217
+ evidence: string[];
218
+ blockers: string[];
219
+ }
220
+ ```
221
+
222
+ **Reused**
223
+
224
+ - `src/services/workflow-scorecard.ts` — quality dimensions (TDD, approvals, reviews).
225
+ - `src/services/agent-validator.ts` — contract compliance.
226
+ - `src/services/supervisor-binding.ts` — post-stage review when `postExecutionReview=true`.
227
+ - Existing test-running via bash hooks.
228
+
229
+ **Replaced / new**
230
+
231
+ - A new `VerificationService` moves verification from prompt instructions (`fd-verify`) to runtime checks.
232
+ - Claimed vs verified success is tracked by comparing tool output claims with later verification evidence.
233
+
234
+ ---
235
+
236
+ ## Layer 6: Recovery and debugging
237
+
238
+ **Responsibilities**
239
+
240
+ - Detect no-progress loops and stuck runs.
241
+ - Classify failures and bound retries.
242
+ - Explain blockages to the orchestrator/user.
243
+ - Recover from failures or escalate cleanly.
244
+ - Surface diagnostics.
245
+
246
+ **Key types/interfaces**
247
+
248
+ ```typescript
249
+ interface RecoveryService {
250
+ assessFailure(runId: string, error: unknown): FailureAssessment;
251
+ decideRecovery(assessment: FailureAssessment): RecoveryPlan;
252
+ executeRecovery(plan: RecoveryPlan): RecoveryResult;
253
+ }
254
+
255
+ interface FailureAssessment {
256
+ type: "loop" | "deadlock" | "transient" | "contract" | "budget" | "unknown";
257
+ evidence: string[];
258
+ retryable: boolean;
259
+ }
260
+ ```
261
+
262
+ **Reused**
263
+
264
+ - `src/services/loop-detector.ts` — same-result / no-progress detection.
265
+ - `src/services/deadlock-detector.ts` — agent bounce, circular delegation, stage stall.
266
+ - `src/services/failure-replay.ts` tool — historical failure lookup.
267
+ - `src/services/agent-performance.ts` — success-rate guidance for re-routing.
268
+
269
+ **Replaced / new**
270
+
271
+ - A new `RecoveryService` coordinates loop detector, deadlock detector, and failure replay into one decision path.
272
+ - Retry bounding is currently per-tool; the service bounds retries per-run and per-stage.
273
+
274
+ ---
275
+
276
+ ## Layer 7: Delegation and coordination
277
+
278
+ **Responsibilities**
279
+
280
+ - Orchestrator routes and supervises work.
281
+ - Select the minimal workflow class.
282
+ - Coordinate specialists and the default executor.
283
+ - Maintain parent-child visibility across sessions.
284
+ - Escalate when the initial workflow class is insufficient.
285
+
286
+ **Key types/interfaces**
287
+
288
+ ```typescript
289
+ interface DelegateTool {
290
+ execute(input: DelegateInput): Promise<DelegateResult>;
291
+ }
292
+
293
+ interface RunPipelineTool {
294
+ execute(input: RunPipelineInput): Promise<RunPipelineResult>;
295
+ }
296
+
297
+ interface CoordinationService {
298
+ route(task: string, ctx: AssembledContext): WorkflowRoute;
299
+ escalate(runId: string, from: WorkflowClass, to: WorkflowClass, reason: string): void;
300
+ }
301
+ ```
302
+
303
+ **Reused**
304
+
305
+ - `src/agents/orchestrator.ts` — agent definition and routing prompt.
306
+ - `src/agents/default-executor.ts` — direct execution worker.
307
+ - `src/services/quick-router.ts` — task classification and stage sequence.
308
+ - `src/services/workflow-router.ts` — adaptive workflow class selection.
309
+ - `src/services/model-router.ts` — complexity/agent-tier hints.
310
+ - `src/services/agent-trace-graph.ts` — parent/child span linkage.
311
+
312
+ **Replaced / new**
313
+
314
+ - New `delegate` and `run-pipeline` tools turn prompt-based routing into imperative calls.
315
+ - A new `CoordinationService` owns workflow class selection and escalation logic that currently lives in the orchestrator prompt.
316
+
317
+ ---
318
+
319
+ ## Layer 8: Governance and audit
320
+
321
+ **Responsibilities**
322
+
323
+ - Enforce permissions and approvals.
324
+ - Track sensitive actions.
325
+ - Log workflow decisions.
326
+ - Provide auditability.
327
+ - Keep destructive actions human-governed.
328
+
329
+ **Key types/interfaces**
330
+
331
+ ```typescript
332
+ interface GovernanceService {
333
+ reviewTarget(target: string, ctx: SupervisorContext): SupervisorDecision;
334
+ recordDecision(decision: SupervisorDecision): void;
335
+ isActionAllowed(action: string, runId: string): boolean;
336
+ }
337
+
338
+ interface AuditLogEntry {
339
+ runId: string;
340
+ timestamp: string;
341
+ actor: string;
342
+ action: string;
343
+ decision: string;
344
+ reason: string;
345
+ }
346
+ ```
347
+
348
+ **Reused**
349
+
350
+ - `src/services/agent-contract-registry.ts` — capability contracts.
351
+ - `src/services/agent-validator.ts` — contract enforcement.
352
+ - `src/services/supervisor-binding.ts` — structured approve/revise/block/escalate decisions.
353
+ - `src/services/approval-manager.ts` — approval workflow.
354
+ - `src/services/command-validator.ts` — registered command validation.
355
+ - `src/services/workflow-scorecard.ts` — run-level audit score.
356
+ - `src/services/run-trace.ts` + `agent-trace-graph.ts` + `event-logger.ts` — decision and action logs.
357
+ - `src/tools/decision-trace.ts` — explicit decision recording.
358
+ - `src/tools/policy-engine.ts` — policy storage and query.
359
+
360
+ **Replaced / new**
361
+
362
+ - A new `GovernanceService` composes contracts, supervisor, approvals, and command validation into a single governance surface.
363
+ - Sensitive-action tracking moves from opt-in hooks to always-on audit logging.
364
+
365
+ ---
366
+
367
+ ## Layer-to-file quick reference
368
+
369
+ | Layer | Primary new file | Main existing files reused |
370
+ |-------|------------------|----------------------------|
371
+ | Context ingress | `src/services/context-ingress.ts` | `lazy-rule-loader`, `planning-state`, `codebase-state`, `repo-memory`, `preflight-explorer`, `model-router`, `context-window-monitor` |
372
+ | Action mediation | `src/services/action-mediator.ts` | `agent-contract-registry`, `agent-validator`, `supervisor-binding`, `approval-manager`, `orchestrator-guard-hook`, `tool-guard`, `guard-rails`, `approval-hook`, `loop-detector` |
373
+ | Execution substrate | `src/services/execution-substrate.ts` | `run-trace`, `agent-trace-graph`, `event-logger`, `event-log-hook`, `cost-estimator`, `delegation-budget` |
374
+ | State persistence | `src/services/state-persistence.ts` | `planning-state`, `run-trace`, `agent-trace-graph`, `event-logger`, `loop-detector`, `session-idle-hook` |
375
+ | Verification & review | `src/services/verification.ts` | `workflow-scorecard`, `agent-validator`, `supervisor-binding` |
376
+ | Recovery & debugging | `src/services/recovery.ts` | `loop-detector`, `deadlock-detector`, `failure-replay`, `agent-performance` |
377
+ | Delegation & coordination | `src/tools/delegate.ts`, `src/tools/run-pipeline.ts`, `src/services/coordination.ts` | `agents/orchestrator`, `agents/default-executor`, `quick-router`, `workflow-router`, `model-router`, `agent-trace-graph` |
378
+ | Governance & audit | `src/services/governance.ts` | `agent-contract-registry`, `agent-validator`, `supervisor-binding`, `approval-manager`, `command-validator`, `workflow-scorecard`, `run-trace`, `agent-trace-graph`, `event-logger`, `decision-trace`, `policy-engine` |