@exaudeus/workrail 3.35.1 → 3.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/console-ui/assets/{index-D7jQyCSD.js → index-n8cJrS4v.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/daemon/workflow-runner.d.ts +4 -0
- package/dist/daemon/workflow-runner.js +133 -0
- package/dist/manifest.json +23 -23
- package/dist/mcp/handlers/v2-advance-events.js +1 -1
- package/dist/mcp/handlers/v2-execution/start.d.ts +1 -0
- package/dist/mcp/handlers/v2-execution/start.js +3 -2
- package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +64 -32
- package/dist/v2/durable-core/schemas/session/events.d.ts +20 -10
- package/dist/v2/durable-core/schemas/session/events.js +1 -1
- package/dist/v2/durable-core/schemas/session/gaps.d.ts +8 -8
- package/dist/v2/durable-core/schemas/session/gaps.js +1 -1
- package/docs/ideas/backlog.md +80 -1
- package/docs/ideas/design-candidates-spawn-agent-task.md +178 -0
- package/docs/ideas/design-review-findings-spawn-agent-task.md +139 -0
- package/docs/ideas/implementation_plan_spawn_agent.md +217 -0
- package/package.json +1 -1
|
@@ -60,10 +60,18 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
60
60
|
}>>;
|
|
61
61
|
} & {
|
|
62
62
|
kind: z.ZodLiteral<"session_created">;
|
|
63
|
-
data: z.ZodObject<{
|
|
63
|
+
data: z.ZodObject<{
|
|
64
|
+
parentSessionId: z.ZodOptional<z.ZodString>;
|
|
65
|
+
}, "strip", z.ZodTypeAny, {
|
|
66
|
+
parentSessionId?: string | undefined;
|
|
67
|
+
}, {
|
|
68
|
+
parentSessionId?: string | undefined;
|
|
69
|
+
}>;
|
|
64
70
|
}, "strip", z.ZodTypeAny, {
|
|
65
71
|
kind: "session_created";
|
|
66
|
-
data: {
|
|
72
|
+
data: {
|
|
73
|
+
parentSessionId?: string | undefined;
|
|
74
|
+
};
|
|
67
75
|
v: 1;
|
|
68
76
|
sessionId: string;
|
|
69
77
|
eventIndex: number;
|
|
@@ -75,7 +83,9 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
75
83
|
} | undefined;
|
|
76
84
|
}, {
|
|
77
85
|
kind: "session_created";
|
|
78
|
-
data: {
|
|
86
|
+
data: {
|
|
87
|
+
parentSessionId?: string | undefined;
|
|
88
|
+
};
|
|
79
89
|
v: 1;
|
|
80
90
|
sessionId: string;
|
|
81
91
|
eventIndex: number;
|
|
@@ -1834,12 +1844,12 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1834
1844
|
category: "user_only_dependency";
|
|
1835
1845
|
}>, z.ZodObject<{
|
|
1836
1846
|
category: z.ZodLiteral<"contract_violation">;
|
|
1837
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
1847
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
1838
1848
|
}, "strip", z.ZodTypeAny, {
|
|
1839
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1849
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1840
1850
|
category: "contract_violation";
|
|
1841
1851
|
}, {
|
|
1842
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1852
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1843
1853
|
category: "contract_violation";
|
|
1844
1854
|
}>, z.ZodObject<{
|
|
1845
1855
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -1902,7 +1912,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1902
1912
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1903
1913
|
category: "user_only_dependency";
|
|
1904
1914
|
} | {
|
|
1905
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1915
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1906
1916
|
category: "contract_violation";
|
|
1907
1917
|
} | {
|
|
1908
1918
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -1932,7 +1942,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1932
1942
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1933
1943
|
category: "user_only_dependency";
|
|
1934
1944
|
} | {
|
|
1935
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1945
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1936
1946
|
category: "contract_violation";
|
|
1937
1947
|
} | {
|
|
1938
1948
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -1965,7 +1975,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1965
1975
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1966
1976
|
category: "user_only_dependency";
|
|
1967
1977
|
} | {
|
|
1968
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1978
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1969
1979
|
category: "contract_violation";
|
|
1970
1980
|
} | {
|
|
1971
1981
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -2007,7 +2017,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
2007
2017
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
2008
2018
|
category: "user_only_dependency";
|
|
2009
2019
|
} | {
|
|
2010
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
2020
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
2011
2021
|
category: "contract_violation";
|
|
2012
2022
|
} | {
|
|
2013
2023
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -102,7 +102,7 @@ const PreferencesChangedDataV1Schema = zod_1.z
|
|
|
102
102
|
}
|
|
103
103
|
});
|
|
104
104
|
exports.DomainEventV1Schema = zod_1.z.discriminatedUnion('kind', [
|
|
105
|
-
exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({}) }),
|
|
105
|
+
exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({ parentSessionId: zod_1.z.string().optional() }) }),
|
|
106
106
|
exports.DomainEventEnvelopeV1Schema.extend({
|
|
107
107
|
kind: zod_1.z.literal('observation_recorded'),
|
|
108
108
|
scope: zod_1.z.undefined(),
|
|
@@ -11,12 +11,12 @@ export declare const GapReasonSchema: z.ZodDiscriminatedUnion<"category", [z.Zod
|
|
|
11
11
|
category: "user_only_dependency";
|
|
12
12
|
}>, z.ZodObject<{
|
|
13
13
|
category: z.ZodLiteral<"contract_violation">;
|
|
14
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
14
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
15
15
|
}, "strip", z.ZodTypeAny, {
|
|
16
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
16
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
17
17
|
category: "contract_violation";
|
|
18
18
|
}, {
|
|
19
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
19
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
20
20
|
category: "contract_violation";
|
|
21
21
|
}>, z.ZodObject<{
|
|
22
22
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -86,12 +86,12 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
86
86
|
category: "user_only_dependency";
|
|
87
87
|
}>, z.ZodObject<{
|
|
88
88
|
category: z.ZodLiteral<"contract_violation">;
|
|
89
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
89
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
90
90
|
}, "strip", z.ZodTypeAny, {
|
|
91
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
91
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
92
92
|
category: "contract_violation";
|
|
93
93
|
}, {
|
|
94
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
94
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
95
95
|
category: "contract_violation";
|
|
96
96
|
}>, z.ZodObject<{
|
|
97
97
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -154,7 +154,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
154
154
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
155
155
|
category: "user_only_dependency";
|
|
156
156
|
} | {
|
|
157
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
157
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
158
158
|
category: "contract_violation";
|
|
159
159
|
} | {
|
|
160
160
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -184,7 +184,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
184
184
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
185
185
|
category: "user_only_dependency";
|
|
186
186
|
} | {
|
|
187
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
187
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
188
188
|
category: "contract_violation";
|
|
189
189
|
} | {
|
|
190
190
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -12,7 +12,7 @@ exports.UserOnlyDependencyReasonSchema = zod_1.z.enum([
|
|
|
12
12
|
]);
|
|
13
13
|
exports.GapReasonSchema = zod_1.z.discriminatedUnion('category', [
|
|
14
14
|
zod_1.z.object({ category: zod_1.z.literal('user_only_dependency'), detail: exports.UserOnlyDependencyReasonSchema }),
|
|
15
|
-
zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes']) }),
|
|
15
|
+
zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes', 'assessment_followup_required']) }),
|
|
16
16
|
zod_1.z.object({
|
|
17
17
|
category: zod_1.z.literal('capability_missing'),
|
|
18
18
|
detail: zod_1.z.enum(['required_capability_unavailable', 'required_capability_unknown']),
|
package/docs/ideas/backlog.md
CHANGED
|
@@ -5121,7 +5121,9 @@ Workflow steps declare their artifact output type:
|
|
|
5121
5121
|
}
|
|
5122
5122
|
```
|
|
5123
5123
|
|
|
5124
|
-
|
|
5124
|
+
**Both the daemon AND the MCP server** store step artifacts automatically. The artifact store is a WorkRail data layer feature, not daemon-specific. A human using Claude Code with the MCP produces the same artifacts in the same store as an autonomous daemon session. The console shows them for both. Other sessions (human-driven or autonomous) can query them either way.
|
|
5125
|
+
|
|
5126
|
+
In MCP mode, the human can explicitly commit an artifact to the repo if desired (e.g. a final spec becomes `docs/specs/feature-x.md`). But the default is the artifact store -- repo is opt-in. The `NEVER COMMIT MARKDOWN FILES` rule in workflow metaGuidance exists because the artifact store doesn't exist yet. Once it does, that rule becomes unnecessary for all runtimes.
|
|
5125
5127
|
|
|
5126
5128
|
---
|
|
5127
5129
|
|
|
@@ -5146,3 +5148,80 @@ Everything else -- design docs, review findings, investigation notes, implementa
|
|
|
5146
5148
|
6. **Knowledge graph integration** -- artifacts become nodes, sessions link to their artifacts
|
|
5147
5149
|
|
|
5148
5150
|
**The `NEVER COMMIT MARKDOWN FILES` rule in metaGuidance is a symptom of this missing feature.** The rule exists because agents keep dumping files in the wrong place. With a proper artifact store, the rule becomes unnecessary -- artifacts have nowhere to go except the artifact store.
|
|
5151
|
+
|
|
5152
|
+
---
|
|
5153
|
+
|
|
5154
|
+
### "Add to repo" button in console for artifacts (Apr 18, 2026)
|
|
5155
|
+
|
|
5156
|
+
Instead of workflow steps declaring upfront whether an artifact goes to the repo, the human makes that decision after seeing the content -- via a button in the console.
|
|
5157
|
+
|
|
5158
|
+
**The flow:**
|
|
5159
|
+
1. Agent produces artifact → stored automatically in `~/.workrail/artifacts/`
|
|
5160
|
+
2. Human opens it in the console Artifacts tab
|
|
5161
|
+
3. Sees action buttons: **📁 Add to repo** | **📋 Copy** | **🔗 Share link**
|
|
5162
|
+
4. Clicks "Add to repo" → console prompts: "Save as: `docs/design/design-candidates-<name>.md`" (editable path with sensible default)
|
|
5163
|
+
5. Console commits the artifact as markdown to the repo at that path, with a commit message like `docs: add design candidates for <workflow-goal>`
|
|
5164
|
+
|
|
5165
|
+
**Why this is better than workflow-level declaration:**
|
|
5166
|
+
- Agent doesn't need to know at step time whether output will be repo-worthy
|
|
5167
|
+
- Human decides after seeing actual content quality
|
|
5168
|
+
- Ephemeral working artifacts stay ephemeral; only promoted ones go to the repo
|
|
5169
|
+
- No "NEVER COMMIT MARKDOWN FILES" rule needed -- agents just produce artifacts, humans decide what's repo-worthy
|
|
5170
|
+
|
|
5171
|
+
**Button options:**
|
|
5172
|
+
- **📁 Add to repo** -- renders artifact as markdown, commits to repo at specified path
|
|
5173
|
+
- **📋 Copy** -- copies rendered markdown to clipboard
|
|
5174
|
+
- **🔗 Share link** -- generates a URL that opens the artifact in the console. ⚠️ Local-only: only works on the same machine or with shared filesystem access. Requires cloud hosting for true team sharing (see cloud hosting spec in backlog)
|
|
5175
|
+
- **📤 Export** -- save to arbitrary filesystem path outside the repo
|
|
5176
|
+
|
|
5177
|
+
**The commit WorkTrain creates:**
|
|
5178
|
+
```
|
|
5179
|
+
docs(design): add design candidates for MCP simplification
|
|
5180
|
+
|
|
5181
|
+
Source: WorkTrain session sess_3bmj... (mr-review-workflow-agentic)
|
|
5182
|
+
Artifact: design-candidates-stdio-simplification-2026-04-18.md
|
|
5183
|
+
```
|
|
5184
|
+
|
|
5185
|
+
**Also useful for:** implementation plans the team wants to track, spec files that belong in the repo permanently, investigation summaries that become part of incident post-mortems.
|
|
5186
|
+
|
|
5187
|
+
---
|
|
5188
|
+
|
|
5189
|
+
## Current state update (Apr 18, 2026 -- later)
|
|
5190
|
+
|
|
5191
|
+
**npm version: v3.35.1** (auto-released after spawn_agent merged)
|
|
5192
|
+
|
|
5193
|
+
### What additionally shipped since the milestone (commit 473f4bd0)
|
|
5194
|
+
|
|
5195
|
+
- ✅ **`complete_step` tool** (#569) -- daemon manages continueToken internally, LLM never handles it. Notes required (min 50 chars). `continue_workflow` deprecated.
|
|
5196
|
+
- ✅ **`spawn_agent` tool** (#573) -- native in-process child session spawning. parentSessionId in session_created event. Depth enforcement. Semaphore bypass. All 4 WorkflowRunResult variants handled.
|
|
5197
|
+
- ✅ **`complete_step` description fix** (#575) -- removed token-seeking language from deprecated continue_workflow description that would have triggered the LLM to seek a token.
|
|
5198
|
+
- ✅ **Discovery ran before both implementations** -- wr.discovery validated complete_step approach (found 1 merge blocker fixed), designed spawn_agent architecture (found semaphore deadlock risk avoided).
|
|
5199
|
+
|
|
5200
|
+
### Updated limitations
|
|
5201
|
+
|
|
5202
|
+
**Still open from previous list:**
|
|
5203
|
+
1. ~~complete_step just merged, untested~~ → ✅ merged, description fixed, discovery validated
|
|
5204
|
+
2. ~~spawn_agent not merged~~ → ✅ merged as #573
|
|
5205
|
+
3. **No session identity in console UI** -- parentSessionId is NOW in the event store (schema extended in #573) but console doesn't show the tree yet. Data is there; visualization is not.
|
|
5206
|
+
4. **No coordinator scripts** -- spawn_agent exists, coordinator templates don't.
|
|
5207
|
+
5. **Subagent loop still LLM-driven** -- workflow-as-orchestrator model spec'd but not built.
|
|
5208
|
+
6. **Workflow runtime adapter not built** -- one spec, two runtimes model spec'd but not built.
|
|
5209
|
+
7. **Knowledge graph not built** -- context still sweeps files every session.
|
|
5210
|
+
8. **Artifacts not first-class** -- agents still dump markdown files in repo. Artifact store spec'd but not built.
|
|
5211
|
+
9. **No notifications** -- daemon completes silently.
|
|
5212
|
+
10. **MCP simplification PR-B** -- HttpServer still starts with MCP server.
|
|
5213
|
+
|
|
5214
|
+
### What's now possible that wasn't before
|
|
5215
|
+
|
|
5216
|
+
With `complete_step` + `spawn_agent`:
|
|
5217
|
+
- Agents can advance workflows without ever touching a token (removes the #1 session failure cause)
|
|
5218
|
+
- Workflows can declare delegation and the daemon spawns proper child sessions (all visible in event log)
|
|
5219
|
+
- Multi-phase work has a path to becoming a coherent work unit (parentSessionId in data, UI visualization next)
|
|
5220
|
+
|
|
5221
|
+
### Next priorities
|
|
5222
|
+
|
|
5223
|
+
1. **Console session tree view** -- parentSessionId data is in the store. Build the UI to show it.
|
|
5224
|
+
2. **First coordinator script template** -- `coordinator-mr-review.sh` that spawns: discovery → review → (conditional) fix → re-review. Proves the spawn/await loop works end-to-end.
|
|
5225
|
+
3. **Notifications** -- macOS notification + generic webhook. ~30 min implementation.
|
|
5226
|
+
4. **Late-bound goals** -- default `goalTemplate: "{{$.goal}}"` when no static goal. 10-line fix in trigger-store.ts.
|
|
5227
|
+
5. **Artifacts store foundation** -- `~/.workrail/artifacts/` directory structure. Step 1 of the first-class artifacts vision.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Design Candidates: spawn_agent Task Implementation
|
|
2
|
+
|
|
3
|
+
> Full investigative material is in `design-candidates-spawn-agent.md`, `design-spawn-agent.md`,
|
|
4
|
+
> and `design-review-findings-spawn-agent.md`. This file summarizes for the current coding task.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Problem Understanding
|
|
9
|
+
|
|
10
|
+
### Core Tensions
|
|
11
|
+
|
|
12
|
+
**T1: Blocking vs. semaphore deadlock**
|
|
13
|
+
`TriggerRouter.dispatch()` is fire-and-forget (non-blocking by design) and uses a global `Semaphore`.
|
|
14
|
+
A parent holding a slot cannot wait for a child to acquire another slot -- deadlock.
|
|
15
|
+
Correct path: call `runWorkflow()` directly, bypassing the semaphore entirely.
|
|
16
|
+
|
|
17
|
+
**T2: Typed schema extension vs. internalContext injection**
|
|
18
|
+
Adding `parentSessionId` to `session_created.data` is the typed, durable, query-friendly path.
|
|
19
|
+
Injecting via `internalContext` (context_set event) is the proven fast path.
|
|
20
|
+
Both are needed: `internalContext` for the `executeStartWorkflow()` call, AND schema extension for future DAG queries.
|
|
21
|
+
|
|
22
|
+
**T3: Deterministic childSessionId vs. code simplicity**
|
|
23
|
+
Pre-creating the child session (Candidate 2) gives a deterministic `childSessionId` before the run starts.
|
|
24
|
+
Direct `runWorkflow()` (Candidate 1) is simpler but cannot return `childSessionId` if the run crashes before the AgentLoop starts.
|
|
25
|
+
|
|
26
|
+
**T4: Depth propagation safety**
|
|
27
|
+
Using `context.spawnDepth` (generic map) is fragile -- any code that overwrites context silently breaks depth enforcement.
|
|
28
|
+
Using `WorkflowTrigger.spawnDepth` (typed `readonly` field) is compiler-enforced and cannot be accidentally lost.
|
|
29
|
+
|
|
30
|
+
### Likely Seam
|
|
31
|
+
`workflow-runner.ts` -- new `makeSpawnAgentTool()` factory alongside existing tool factories.
|
|
32
|
+
`events.ts` -- one-line additive schema extension for `session_created.data`.
|
|
33
|
+
`start.ts` -- thread `parentSessionId` through `buildInitialEvents()`.
|
|
34
|
+
|
|
35
|
+
### What Makes It Hard
|
|
36
|
+
- The `runWorkflow()` call inside `execute()` requires capturing `ctx`, `apiKey`, `daemonRegistry?`, `emitter?` in the factory closure.
|
|
37
|
+
- `executeStartWorkflow()` returns `RA<StartWorkflowResult, StartWorkflowError>` -- must be unwrapped asynchronously.
|
|
38
|
+
- `_preAllocatedStartResponse` expects `startResult.value.response` (not the full `StartWorkflowResult`).
|
|
39
|
+
- Junior developer would call `dispatch()` instead of `runWorkflow()` and create a deadlock.
|
|
40
|
+
- `session_created.data` currently hardcodes `data: {}` in `buildInitialEvents()` -- must thread `parentSessionId` into that call.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Philosophy Constraints
|
|
45
|
+
|
|
46
|
+
From `CLAUDE.md` and repo patterns:
|
|
47
|
+
|
|
48
|
+
- **Errors as data**: Return `{ outcome: 'error', notes: msg }` JSON, not thrown exceptions, for child failures.
|
|
49
|
+
- **Exhaustiveness**: Handle all 4 `WorkflowRunResult` variants without `as unknown` casts.
|
|
50
|
+
- **Immutability**: New `WorkflowTrigger` fields are `readonly`.
|
|
51
|
+
- **DI for boundaries**: `runWorkflowFn`, `ctx`, `apiKey`, `emitter` all injected at construction time.
|
|
52
|
+
- **YAGNI**: Phase 1 only. No `spawn_session + await_sessions`, no bare-prompt mode, no width guardrails.
|
|
53
|
+
- **Make illegal states unrepresentable**: `childSessionId` always present (pre-create guarantees it).
|
|
54
|
+
|
|
55
|
+
No philosophy conflicts between stated rules and repo patterns.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Impact Surface
|
|
60
|
+
|
|
61
|
+
| File | Change | Risk |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| `src/daemon/workflow-runner.ts` | Add `parentSessionId?`, `spawnDepth?` to `WorkflowTrigger`; add `makeSpawnAgentTool()`; inject in `runWorkflow()`; update `BASE_SYSTEM_PROMPT`; update `_preAllocatedStartResponse` JSDoc | Low -- additive |
|
|
64
|
+
| `src/v2/durable-core/schemas/session/events.ts` | Extend `session_created.data` with `parentSessionId?: z.string().optional()` | Low -- `z.object({})` uses strip mode |
|
|
65
|
+
| `src/mcp/handlers/v2-execution/start.ts` | Thread `parentSessionId` from `internalContext` into `session_created` event via `buildInitialEvents()` | Low -- internal API |
|
|
66
|
+
| `src/trigger/trigger-router.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
|
|
67
|
+
| `src/v2/usecases/console-routes.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Candidates
|
|
72
|
+
|
|
73
|
+
### Candidate 1: Direct runWorkflow() call
|
|
74
|
+
|
|
75
|
+
**Summary**: `makeSpawnAgentTool()` calls `runWorkflow()` directly. No pre-creation. Session ID extracted from result after run.
|
|
76
|
+
|
|
77
|
+
**Tensions resolved**: YAGNI (fewest lines), blocking (natural await).
|
|
78
|
+
**Tensions accepted**: Crash-before-start has no observable `childSessionId`. `childSessionId` is absent on failure.
|
|
79
|
+
|
|
80
|
+
**Boundary**: `WorkflowTrigger` + direct `runWorkflow()` call.
|
|
81
|
+
**Why this boundary**: `WorkflowTrigger` is the natural seam -- carries all session config. No new types.
|
|
82
|
+
|
|
83
|
+
**Failure mode**: `runWorkflow()` crashes before AgentLoop starts -- `childSessionId` is null, parent gets `{ outcome: 'error', childSessionId: null }`.
|
|
84
|
+
|
|
85
|
+
**Repo-pattern relationship**: Follows factory pattern. No adaptation of `_preAllocatedStartResponse`.
|
|
86
|
+
|
|
87
|
+
**Gain**: ~10 fewer lines, maximum simplicity.
|
|
88
|
+
**Give up**: No deterministic `childSessionId` on startup failures. Less crash observability.
|
|
89
|
+
|
|
90
|
+
**Scope**: Best-fit.
|
|
91
|
+
**Philosophy fit**: Honors YAGNI strongest. Slight tension with 'make illegal states unrepresentable' (`childSessionId` can be null).
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Candidate 2: Pre-create session with _preAllocatedStartResponse (RECOMMENDED)
|
|
96
|
+
|
|
97
|
+
**Summary**: `execute()` calls `executeStartWorkflow()` with `parentSessionId` in `internalContext`, decodes `childSessionId` from the returned `continueToken`, then calls `runWorkflow()` with `_preAllocatedStartResponse`.
|
|
98
|
+
|
|
99
|
+
**Tensions resolved**: Deterministic `childSessionId`, crash-before-start observability, `childSessionId` seeds Phase 2, 'make illegal states unrepresentable'.
|
|
100
|
+
**Tensions accepted**: One extra async call (~10-50ms).
|
|
101
|
+
|
|
102
|
+
**Boundary**: `WorkflowTrigger._preAllocatedStartResponse` + `internalContext` injection.
|
|
103
|
+
**Why this boundary**: Direct adaptation of the proven `_preAllocatedStartResponse` pattern from `console-routes.ts`. Session store sees the child immediately -- correct observable behavior.
|
|
104
|
+
|
|
105
|
+
**Failure mode**: `executeStartWorkflow()` succeeds, `runWorkflow()` fails before AgentLoop -- zombie session in store. Accepted for Phase 1.
|
|
106
|
+
|
|
107
|
+
**Repo-pattern relationship**: Adapts proven `_preAllocatedStartResponse` pattern.
|
|
108
|
+
|
|
109
|
+
**Gain**: `childSessionId` always known before child runs. Deterministic. Child observable from moment of `execute()`.
|
|
110
|
+
**Give up**: One extra async call. Slightly more setup code.
|
|
111
|
+
|
|
112
|
+
**Scope**: Best-fit.
|
|
113
|
+
**Philosophy fit**: Honors determinism over cleverness, make illegal states unrepresentable, DI. No conflicts.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
### Candidate 3: Read depth from session store at execute() time
|
|
118
|
+
|
|
119
|
+
**Summary**: Instead of passing `currentDepth` as a constructor parameter, read `spawnDepth` from parent session store inside `execute()`.
|
|
120
|
+
|
|
121
|
+
**Tensions resolved**: Accurate depth for checkpoint-resumed sessions (theoretical edge case).
|
|
122
|
+
**Tensions accepted**: Async I/O in `execute()`, more error paths, session store dependency.
|
|
123
|
+
|
|
124
|
+
**Boundary**: Session store read inside `execute()`.
|
|
125
|
+
**Why this boundary is NOT best-fit**: Expensive, speculative. Checkpoint-resumed daemon sessions restart AgentLoop from scratch -- constructor parameter is always correctly set.
|
|
126
|
+
|
|
127
|
+
**Failure mode**: Store read fails -- fail-safe blocks spawn, adds error path complexity.
|
|
128
|
+
|
|
129
|
+
**Repo-pattern relationship**: Departs from constructor-injection pattern.
|
|
130
|
+
|
|
131
|
+
**Gain**: Accurate depth for resumed sessions. **Give up**: YAGNI violation, async I/O, extra error paths.
|
|
132
|
+
|
|
133
|
+
**Scope**: Too broad. **Philosophy fit**: Conflicts with YAGNI.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Comparison and Recommendation
|
|
138
|
+
|
|
139
|
+
### Comparison Matrix
|
|
140
|
+
|
|
141
|
+
| Tension | C1 | C2 | C3 |
|
|
142
|
+
|---|---|---|---|
|
|
143
|
+
| Blocking fidelity | Strong | Strong | Strong |
|
|
144
|
+
| Deterministic childSessionId | Weak | Strong | Weak |
|
|
145
|
+
| Semaphore bypass | Strong | Strong | Strong |
|
|
146
|
+
| YAGNI | Strong | Moderate | Weak |
|
|
147
|
+
| Crash observability | Weak | Strong | Weak |
|
|
148
|
+
| Depth accuracy | Adequate | Adequate | Strong (speculative) |
|
|
149
|
+
| Repo pattern | Follows | Adapts proven | Departs |
|
|
150
|
+
| Philosophy | Full | Full | Partial |
|
|
151
|
+
|
|
152
|
+
### Recommendation: Candidate 2
|
|
153
|
+
|
|
154
|
+
C2 is best-fit. The `_preAllocatedStartResponse` pattern is proven and stable (`console-routes.ts`).
|
|
155
|
+
The marginal complexity (one extra async call) is small relative to the gain: `childSessionId` is always
|
|
156
|
+
known, crash-before-start is observable, Phase 2 is seeded. C3 is rejected on YAGNI grounds.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Self-Critique
|
|
161
|
+
|
|
162
|
+
**Strongest counter-argument**: C2 adds a zombie session failure mode that C1 doesn't have. If `executeStartWorkflow()` succeeds but `runWorkflow()` fails immediately, a session exists in the store with no corresponding run. C1 avoids this -- no session is created until the run actually starts.
|
|
163
|
+
|
|
164
|
+
**C1 as narrower option**: Still satisfies acceptance criteria. Loses crash observability and deterministic `childSessionId`. Would win if we prioritized simplicity over observability.
|
|
165
|
+
|
|
166
|
+
**C3 as broader option**: Justified only if checkpoint-resumed spawned sessions become a real production use case. No evidence for Phase 1.
|
|
167
|
+
|
|
168
|
+
**Assumption that would invalidate C2**: If `_preAllocatedStartResponse` is removed in a future refactor. Mitigation: update its JSDoc (Orange finding O2) to list `spawn_agent` as a legitimate caller.
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Open Questions for the Main Agent
|
|
173
|
+
|
|
174
|
+
1. **maxSubagentDepth source**: Design doc says read from `WorkflowTrigger.agentConfig` (default 3). Should this also check global workspace config? Decision: use `trigger.agentConfig?.maxSubagentDepth ?? 3` for Phase 1. Document in tool description.
|
|
175
|
+
|
|
176
|
+
2. **`session_created.data` strictness**: Confirmed `z.object({})` uses strip mode. Extension is safe. Unverified by migration run -- low risk.
|
|
177
|
+
|
|
178
|
+
3. **Zombie session cleanup**: Deferred to Phase 2. Document as known edge case in tool description.
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Design Review Findings: spawn_agent Tool Implementation
|
|
2
|
+
|
|
3
|
+
_Concise, actionable findings for main-agent synthesis. Design: Candidate 2 (pre-create session with _preAllocatedStartResponse, then blocking runWorkflow())._
|
|
4
|
+
|
|
5
|
+
> Note: Full discovery-phase review is in `design-review-findings-spawn-agent.md`. This file is for the current coding task review pass.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Tradeoff Review
|
|
10
|
+
|
|
11
|
+
### T1: Parent clock keeps ticking while child runs
|
|
12
|
+
- Confirmed acceptable. Success criterion 2 ('parent does not advance until child completes') is satisfied even on timeout (parent aborts, not advances).
|
|
13
|
+
- When parent times out, child continues as orphaned session. Work is preserved in session store. Session tree preserves the parent-child link.
|
|
14
|
+
- Mitigation needed: document in tool description.
|
|
15
|
+
- **Status: ACCEPTED.**
|
|
16
|
+
|
|
17
|
+
### T2: _preAllocatedStartResponse comment needs update
|
|
18
|
+
- Current comment: 'set only by the dispatch HTTP handler.' spawn_agent will be another legitimate internal caller.
|
|
19
|
+
- If not updated, future developer may remove spawn_agent support as accidental usage.
|
|
20
|
+
- **Status: REQUIRED FIX (low effort, Step 1.1).**
|
|
21
|
+
|
|
22
|
+
### T3: One extra async call in execute()
|
|
23
|
+
- executeStartWorkflow() is ~10-50ms (no LLM call). Negligible for a tool that blocks 1-30 minutes.
|
|
24
|
+
- **Status: ACCEPTED.**
|
|
25
|
+
|
|
26
|
+
### T4: session_created.data extension
|
|
27
|
+
- Confirmed `z.object({})` uses strip mode (not `.strict()`). Extension with `parentSessionId?: z.string().optional()` is backward-compatible.
|
|
28
|
+
- `buildInitialEvents()` currently hardcodes `data: {}` -- requires threading `parentSessionId` parameter.
|
|
29
|
+
- **Status: REQUIRED, LOW RISK.**
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Failure Mode Review
|
|
34
|
+
|
|
35
|
+
### FM1: Parent timeout while child is running
|
|
36
|
+
- Severity: LOW. Child completes normally, work preserved, session tree intact.
|
|
37
|
+
- Design coverage: adequate. Orphaned child traceable via parentSessionId.
|
|
38
|
+
- **No revision required.**
|
|
39
|
+
|
|
40
|
+
### FM2: executeStartWorkflow() succeeds, runWorkflow() fails before AgentLoop starts
|
|
41
|
+
- Severity: MEDIUM. Zombie session in store (shows as 'running' indefinitely).
|
|
42
|
+
- Design coverage: partial. Parent gets `{ childSessionId, outcome: 'error', notes: errorMessage }` -- child session is observable. But zombie cleanup is deferred.
|
|
43
|
+
- Mitigation for Phase 1: document as known edge case. Phase 2: session timeout/zombie cleanup.
|
|
44
|
+
- **Status: ACCEPTED for Phase 1.**
|
|
45
|
+
|
|
46
|
+
### FM3: spawnDepth propagation failure
|
|
47
|
+
- Severity: HIGH if unmitigated. FULLY MITIGATED by using typed `readonly spawnDepth?: number` field on `WorkflowTrigger`.
|
|
48
|
+
- After fix: severity drops to LOW (depth is typed, cannot be accidentally lost).
|
|
49
|
+
- **Status: MITIGATED.**
|
|
50
|
+
|
|
51
|
+
### FM4: Depth bypass via width (sequential spawning)
|
|
52
|
+
- Severity: LOW for Phase 1. `maxSessionMinutes` on parent is the practical limit.
|
|
53
|
+
- **Status: ACCEPTED, deferred to Phase 2.**
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Runner-Up / Simpler Alternative Review
|
|
58
|
+
|
|
59
|
+
**Candidate 1 (direct runWorkflow, no pre-create):** Close alternative. Simpler execute() -- no executeStartWorkflow() call. Loses: `childSessionId` is unknown until after runWorkflow() starts; crash-before-start has no childSessionId to return.
|
|
60
|
+
|
|
61
|
+
**No elements worth borrowing from Candidate 1.** C2 already does everything C1 does plus the session-ID-upfront guarantee.
|
|
62
|
+
|
|
63
|
+
**Could skip session_created.data extension?** Technically yes -- `parentSessionId` in `context_set` events is still durable and queryable. But the extension is ~8 lines total and future-proofs DAG queries. Keep it.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Philosophy Alignment
|
|
68
|
+
|
|
69
|
+
### Clearly satisfied
|
|
70
|
+
- Errors as data: discriminated union return, no throws
|
|
71
|
+
- DI for boundaries: ctx, apiKey, emitter all injected at construction time
|
|
72
|
+
- Immutability: WorkflowTrigger fully readonly, new fields also readonly
|
|
73
|
+
- Exhaustiveness: WorkflowRunResult match handles all 4 variants
|
|
74
|
+
- Validate at boundaries: depth check at start of execute()
|
|
75
|
+
- YAGNI: Phase 1 only; non-blocking spawn deferred
|
|
76
|
+
- Make illegal states unrepresentable: childSessionId always present (pre-create guarantees it)
|
|
77
|
+
|
|
78
|
+
### Under tension (acceptable)
|
|
79
|
+
- Architectural fixes over patches: parentSessionId via internalContext is somewhat patch-like. Acceptable because internalContext is an established pattern for daemon-internal injection (is_autonomous, workspacePath). Tension is low.
|
|
80
|
+
- Compose with small pure functions: execute() has two async operations (~50 lines). Complexity is necessary and bounded.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Findings
|
|
85
|
+
|
|
86
|
+
### Red (blocking)
|
|
87
|
+
_None._
|
|
88
|
+
|
|
89
|
+
### Orange (required before implementation)
|
|
90
|
+
|
|
91
|
+
**O1: Use `readonly spawnDepth?: number` on WorkflowTrigger (not `context.spawnDepth`)**
|
|
92
|
+
Rationale: generic context map can be silently overwritten by trigger system or other callers, breaking depth enforcement. Typed field makes the invariant explicit and compiler-checked.
|
|
93
|
+
Files: `src/daemon/workflow-runner.ts` (WorkflowTrigger type definition)
|
|
94
|
+
**Status: Design already incorporates this fix.**
|
|
95
|
+
|
|
96
|
+
**O2: Update `_preAllocatedStartResponse` comment to list spawn_agent as legitimate caller**
|
|
97
|
+
Rationale: current comment misleads future developers. Without this update, spawn_agent support could be removed as accidental.
|
|
98
|
+
Files: `src/daemon/workflow-runner.ts` (WorkflowTrigger._preAllocatedStartResponse JSDoc)
|
|
99
|
+
**Status: Must be applied during implementation.**
|
|
100
|
+
|
|
101
|
+
**O3: Thread parentSessionId into buildInitialEvents() for session_created.data**
|
|
102
|
+
Rationale: the `internalContext` injection only reaches `context_set` events, not `session_created.data`. For the typed schema extension to work, `buildInitialEvents()` needs a new optional parameter.
|
|
103
|
+
Files: `src/mcp/handlers/v2-execution/start.ts` (`buildInitialEvents()` signature and call site)
|
|
104
|
+
**Status: Required -- not in original design review, discovered during implementation analysis.**
|
|
105
|
+
|
|
106
|
+
### Yellow (should-fix, not blocking)
|
|
107
|
+
|
|
108
|
+
**Y1: Document parent-clock behavior in tool description**
|
|
109
|
+
The spawn_agent tool description should note that the parent session's maxSessionMinutes clock runs while the child executes. Workflow authors must configure the parent's timeout to be longer than the expected child duration.
|
|
110
|
+
|
|
111
|
+
**Y2: Document zombie session edge case**
|
|
112
|
+
The spawn_agent tool description should note that if runWorkflow() fails before the AgentLoop starts, a zombie session may exist in the store. Phase 2 will add cleanup.
|
|
113
|
+
|
|
114
|
+
**Y3: maxSubagentDepth source**
|
|
115
|
+
For Phase 1, default to 3 if `WorkflowTrigger.agentConfig?.maxSubagentDepth` is not set. Document in tool description.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Recommended Revisions
|
|
120
|
+
|
|
121
|
+
1. Use `readonly spawnDepth?: number` on WorkflowTrigger (O1 -- design already incorporates)
|
|
122
|
+
2. Update `_preAllocatedStartResponse` JSDoc to list spawn_agent as a legitimate internal caller (O2)
|
|
123
|
+
3. Add `parentSessionId?: string` parameter to `buildInitialEvents()` and thread it into `session_created.data` (O3)
|
|
124
|
+
4. Add parent-clock behavior documentation to spawn_agent tool description (Y1)
|
|
125
|
+
5. Add zombie session documentation to spawn_agent tool description (Y2)
|
|
126
|
+
6. Use `trigger.agentConfig?.maxSubagentDepth ?? 3` as maxDepth (Y3)
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Residual Concerns
|
|
131
|
+
|
|
132
|
+
**RC1: Session tree query infrastructure deferred to Phase 2**
|
|
133
|
+
`parentSessionId` is written to the session store but no query path exists to read 'all children of session X'. The console DAG view cannot render the tree until Phase 2. This is by design -- Phase 1 writes the data; Phase 2 builds the reader.
|
|
134
|
+
|
|
135
|
+
**RC2: Zod strictness on session_created.data**
|
|
136
|
+
Confirmed that `z.object({})` uses strip mode (not strict). Extension with `parentSessionId?: z.string().optional()` is backward-compatible. Unverified by an actual migration run -- low risk but unvalidated.
|
|
137
|
+
|
|
138
|
+
**RC3: maxTotalAgentsPerTask guardrail deferred**
|
|
139
|
+
Phase 1 enforces depth only. Wide spawning is not caught by depth limits. Phase 2 adds the concurrency registry. For Phase 1, `maxSessionMinutes` on the parent session is the practical limit on total work done.
|