@exaudeus/workrail 3.35.0 → 3.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,10 +60,18 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
60
60
  }>>;
61
61
  } & {
62
62
  kind: z.ZodLiteral<"session_created">;
63
- data: z.ZodObject<{}, "strip", z.ZodTypeAny, {}, {}>;
63
+ data: z.ZodObject<{
64
+ parentSessionId: z.ZodOptional<z.ZodString>;
65
+ }, "strip", z.ZodTypeAny, {
66
+ parentSessionId?: string | undefined;
67
+ }, {
68
+ parentSessionId?: string | undefined;
69
+ }>;
64
70
  }, "strip", z.ZodTypeAny, {
65
71
  kind: "session_created";
66
- data: {};
72
+ data: {
73
+ parentSessionId?: string | undefined;
74
+ };
67
75
  v: 1;
68
76
  sessionId: string;
69
77
  eventIndex: number;
@@ -75,7 +83,9 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
75
83
  } | undefined;
76
84
  }, {
77
85
  kind: "session_created";
78
- data: {};
86
+ data: {
87
+ parentSessionId?: string | undefined;
88
+ };
79
89
  v: 1;
80
90
  sessionId: string;
81
91
  eventIndex: number;
@@ -1834,12 +1844,12 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
1834
1844
  category: "user_only_dependency";
1835
1845
  }>, z.ZodObject<{
1836
1846
  category: z.ZodLiteral<"contract_violation">;
1837
- detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
1847
+ detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
1838
1848
  }, "strip", z.ZodTypeAny, {
1839
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
1849
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
1840
1850
  category: "contract_violation";
1841
1851
  }, {
1842
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
1852
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
1843
1853
  category: "contract_violation";
1844
1854
  }>, z.ZodObject<{
1845
1855
  category: z.ZodLiteral<"capability_missing">;
@@ -1902,7 +1912,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
1902
1912
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
1903
1913
  category: "user_only_dependency";
1904
1914
  } | {
1905
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
1915
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
1906
1916
  category: "contract_violation";
1907
1917
  } | {
1908
1918
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -1932,7 +1942,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
1932
1942
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
1933
1943
  category: "user_only_dependency";
1934
1944
  } | {
1935
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
1945
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
1936
1946
  category: "contract_violation";
1937
1947
  } | {
1938
1948
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -1965,7 +1975,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
1965
1975
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
1966
1976
  category: "user_only_dependency";
1967
1977
  } | {
1968
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
1978
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
1969
1979
  category: "contract_violation";
1970
1980
  } | {
1971
1981
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -2007,7 +2017,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
2007
2017
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
2008
2018
  category: "user_only_dependency";
2009
2019
  } | {
2010
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
2020
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
2011
2021
  category: "contract_violation";
2012
2022
  } | {
2013
2023
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -102,7 +102,7 @@ const PreferencesChangedDataV1Schema = zod_1.z
102
102
  }
103
103
  });
104
104
  exports.DomainEventV1Schema = zod_1.z.discriminatedUnion('kind', [
105
- exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({}) }),
105
+ exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({ parentSessionId: zod_1.z.string().optional() }) }),
106
106
  exports.DomainEventEnvelopeV1Schema.extend({
107
107
  kind: zod_1.z.literal('observation_recorded'),
108
108
  scope: zod_1.z.undefined(),
@@ -11,12 +11,12 @@ export declare const GapReasonSchema: z.ZodDiscriminatedUnion<"category", [z.Zod
11
11
  category: "user_only_dependency";
12
12
  }>, z.ZodObject<{
13
13
  category: z.ZodLiteral<"contract_violation">;
14
- detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
14
+ detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
15
15
  }, "strip", z.ZodTypeAny, {
16
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
16
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
17
17
  category: "contract_violation";
18
18
  }, {
19
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
19
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
20
20
  category: "contract_violation";
21
21
  }>, z.ZodObject<{
22
22
  category: z.ZodLiteral<"capability_missing">;
@@ -86,12 +86,12 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
86
86
  category: "user_only_dependency";
87
87
  }>, z.ZodObject<{
88
88
  category: z.ZodLiteral<"contract_violation">;
89
- detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
89
+ detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
90
90
  }, "strip", z.ZodTypeAny, {
91
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
91
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
92
92
  category: "contract_violation";
93
93
  }, {
94
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
94
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
95
95
  category: "contract_violation";
96
96
  }>, z.ZodObject<{
97
97
  category: z.ZodLiteral<"capability_missing">;
@@ -154,7 +154,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
154
154
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
155
155
  category: "user_only_dependency";
156
156
  } | {
157
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
157
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
158
158
  category: "contract_violation";
159
159
  } | {
160
160
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -184,7 +184,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
184
184
  detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
185
185
  category: "user_only_dependency";
186
186
  } | {
187
- detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
187
+ detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
188
188
  category: "contract_violation";
189
189
  } | {
190
190
  detail: "required_capability_unknown" | "required_capability_unavailable";
@@ -12,7 +12,7 @@ exports.UserOnlyDependencyReasonSchema = zod_1.z.enum([
12
12
  ]);
13
13
  exports.GapReasonSchema = zod_1.z.discriminatedUnion('category', [
14
14
  zod_1.z.object({ category: zod_1.z.literal('user_only_dependency'), detail: exports.UserOnlyDependencyReasonSchema }),
15
- zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes']) }),
15
+ zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes', 'assessment_followup_required']) }),
16
16
  zod_1.z.object({
17
17
  category: zod_1.z.literal('capability_missing'),
18
18
  detail: zod_1.z.enum(['required_capability_unavailable', 'required_capability_unknown']),
@@ -4975,3 +4975,253 @@ Long-term (when mobile exists):
4975
4975
  ```
4976
4976
 
4977
4977
  **Build order:** outbox.jsonl integration (foundation, works everywhere) → generic webhook (covers Slack/Discord/Teams/anything) → platform notifications (macOS/Linux/Windows) → mobile app push (when mobile exists).
4978
+
4979
+ ---
4980
+
4981
+ ## 🎉 WorkTrain first confirmed end-to-end autonomous session (Apr 18, 2026)
4982
+
4983
+ **Timestamp:** 2026-04-18T15:09:49Z
4984
+ **Commit:** `473f4bd0` (main)
4985
+ **npm version:** v3.34.1 (published, installable by anyone)
4986
+ **What happened:** A real MR review workflow (`mr-review-workflow-agentic`) ran completely autonomously via webhook trigger, advanced through all phases (context gathering, review, synthesis, validation, handoff), self-validated, and produced a structured finding set. 8 step advances, `outcome: success`.
4987
+
4988
+ **Trigger:** `POST /webhook/mr-review {"goal": "Review PR #566: fix two minor bugs..."}`
4989
+ **Session:** `sess_3bmjuzf7l2vrqynjtleg5iskm4`
4990
+ **Result:** APPROVE with High confidence. 3 Minor findings, 1 Informational. Correctly decided not to delegate since no Critical/Major issues.
4991
+
4992
+ ---
4993
+
4994
+ ### What works at this commit
4995
+
4996
+ - ✅ Daemon accepts webhooks, starts sessions, runs workflows end-to-end
4997
+ - ✅ Sessions advance through all workflow phases autonomously
4998
+ - ✅ `mr-review-workflow-agentic` v2.6 runs fully -- context gathering, review phases, synthesis loop, validation, handoff
4999
+ - ✅ `wr.discovery` v3.2.0 runs fully -- with new phase-0-reframe (goal reframing before research)
5000
+ - ✅ Console shows live sessions via event log (no daemon connection required)
5001
+ - ✅ MCP server is stable (bridge removed, EPIPE fixed, v3.34.1 published)
5002
+ - ✅ GitHub + GitLab polling triggers (no webhooks needed)
5003
+ - ✅ `worktrain init`, `tell`, `inbox`, `spawn`, `await` CLI commands
5004
+ - ✅ Stuck detection + visibility (`worktrain status`, `worktrain logs --follow`)
5005
+ - ✅ `complete_step` tool -- daemon manages continueToken, LLM never handles it
5006
+ - ✅ Assessment gate circuit breaker (stops at 3 blocked attempts, shows artifact format)
5007
+ - ✅ `worktrain daemon --install` creates launchd service (daemon survives MCP reconnects)
5008
+ - ✅ Self-configuration (`triggers.yml`, `daemon-soul.md`, `AGENTS.md` for workrail repo)
5009
+
5010
+ ### Current limitations at this commit
5011
+
5012
+ **Blocking reliable complex workflows:**
5013
+ 1. **`complete_step` not yet tested in production** -- just merged, daemon still using `continue_workflow` in running sessions. Needs daemon restart to take effect.
5014
+ 2. **Assessment gates still unreliable** -- `complete_step` fixes the token issue; the `artifacts` field (#557) fixes the submission issue. But `coding-task-workflow-agentic` phases with quality gates haven't been tested end-to-end yet.
5015
+ 3. **Native `spawn_agent` not yet merged** -- implementation in progress. Until it lands, all subagent delegation is via `mcp__nested-subagent__Task` (invisible black box).
5016
+ 4. **No session identity (parentSessionId)** -- multi-phase work appears as unrelated flat sessions in the console.
5017
+
5018
+ **Architecture not yet realized:**
5019
+ 5. **Coordinator scripts don't exist** -- `worktrain spawn/await` is there but no templates.
5020
+ 6. **Subagent loop not rethought** -- LLM still decides when to delegate; workflow-as-orchestrator model is spec'd but not built.
5021
+ 7. **Workflow runtime adapter not built** -- workflows run in daemon mode as-is; no MCP vs daemon adaptation layer.
5022
+ 8. **Knowledge graph not built** -- context gathering still sweeps files on every session.
5023
+ 9. **MCP simplification PR-B not done** -- HttpServer still starts with MCP server.
5024
+
5025
+ **Missing for production autonomy:**
5026
+ 10. **No notifications** -- daemon completes work silently. Users have no awareness unless watching console/logs.
5027
+ 11. **No auto-commit from handoff artifact** -- merged but untested end-to-end.
5028
+ 12. **Late-bound goals not implemented** -- triggers require static goals; dynamic goals (like PR reviews) need `goalTemplate: "{{$.goal}}"` as default.
5029
+ 13. **No coordinator script template** -- the multi-phase autonomous pipeline exists as primitives but not as a usable script.
5030
+
5031
+ ---
5032
+
5033
+ ### Artifacts as first-class citizens: explorable, accessible, out of the repo (Apr 18, 2026)
5034
+
5035
+ **The current mess:** every autonomous session dumps `design-candidates.md`, `implementation_plan.md`, `design-review-findings.md`, `mr-review.md` etc. as files in the repo root or worktrees. They are:
5036
+ - Not indexed or searchable
5037
+ - Not visible in the console
5038
+ - Not accessible to other sessions (agent B can't read agent A's handoff without knowing the exact file path)
5039
+ - Polluting the repo with ephemeral working documents
5040
+ - Lost when worktrees are cleaned up
5041
+ - Scattered across the filesystem with no structure
5042
+
5043
+ **The right model:** artifacts are WorkTrain data, not filesystem files.
5044
+
5045
+ ---
5046
+
5047
+ #### What an artifact is
5048
+
5049
+ Any structured output from a session that has value beyond the session itself:
5050
+ - **Handoff docs** -- what one session produces for the next to consume
5051
+ - **Design candidates** -- research output with tradeoffs and recommendation
5052
+ - **Implementation plans** -- what to build, how, in what order
5053
+ - **Review findings** -- MR review output with findings, severity, recommendation
5054
+ - **Spec files** -- behavioral specs, acceptance criteria, API contracts
5055
+ - **Investigation summaries** -- bug investigation root cause and reproduction
5056
+ - **Context bundles** -- pre-packaged knowledge for subagent consumption
5057
+
5058
+ **NOT artifacts:** step notes (stay in WorkRail session store), event logs (stay in daemon events), source code (stays in repo).
5059
+
5060
+ ---
5061
+
5062
+ #### Where artifacts live
5063
+
5064
+ `~/.workrail/artifacts/<sessionId>/<artifact-type>-<timestamp>.json`
5065
+
5066
+ Structured JSON, not markdown. The display layer (console, `worktrain artifacts`) renders them as human-readable. Other agents query them as structured data.
5067
+
5068
+ **Why JSON not markdown:**
5069
+ - Queryable by other agents (what are the findings with severity=critical?)
5070
+ - Renderable by the console with proper formatting, filtering, search
5071
+ - Versionable and diffable in the artifact store
5072
+ - Accessible via the knowledge graph (artifacts become nodes with typed edges)
5073
+
5074
+ ---
5075
+
5076
+ #### Console integration
5077
+
5078
+ The console session detail view gets an "Artifacts" tab alongside "Steps" and "Notes":
5079
+
5080
+ ```
5081
+ Session: sess_3bmj... [MR Review: PR #566]
5082
+ ├── Steps (8)
5083
+ ├── Notes
5084
+ └── Artifacts (3)
5085
+ ├── 📋 review-findings.json "APPROVE -- 3 Minor, 1 Info"
5086
+ ├── 📄 context-bundle.json "12 files read, 4 patterns identified"
5087
+ └── 🔍 investigation-notes.json "Signal 3 dead code in max_turns path"
5088
+ ```
5089
+
5090
+ Click an artifact → full rendered view in the console.
5091
+
5092
+ ---
5093
+
5094
+ #### Accessibility to other agents
5095
+
5096
+ Agents can query artifacts from prior sessions via a new tool:
5097
+
5098
+ ```
5099
+ read_artifact({ sessionId: 'sess_3bmj...', type: 'review-findings' })
5100
+ → { verdict: 'APPROVE', findings: [...], recommendation: '...' }
5101
+
5102
+ search_artifacts({ type: 'implementation-plan', workflowId: 'coding-task-workflow-agentic', since: '7d' })
5103
+ → [{ sessionId, summary, createdAt }, ...]
5104
+ ```
5105
+
5106
+ This replaces the current pattern where agents `cat design-candidates.md` from a known path -- fragile, path-dependent, breaks across worktrees.
5107
+
5108
+ ---
5109
+
5110
+ #### Workflow integration
5111
+
5112
+ Workflow steps declare their artifact output type:
5113
+
5114
+ ```json
5115
+ {
5116
+ "id": "phase-1c-challenge-and-select",
5117
+ "output": {
5118
+ "artifact": "design-candidates",
5119
+ "schema": "wr.artifacts.design-candidates.v1"
5120
+ }
5121
+ }
5122
+ ```
5123
+
5124
+ **Both the daemon AND the MCP server** store step artifacts automatically. The artifact store is a WorkRail data layer feature, not daemon-specific. A human using Claude Code with the MCP produces the same artifacts in the same store as an autonomous daemon session. The console shows them for both. Other sessions (human-driven or autonomous) can query them either way.
5125
+
5126
+ In MCP mode, the human can explicitly commit an artifact to the repo if desired (e.g. a final spec becomes `docs/specs/feature-x.md`). But the default is the artifact store -- repo is opt-in. The `NEVER COMMIT MARKDOWN FILES` rule in workflow metaGuidance exists because the artifact store doesn't exist yet. Once it does, that rule becomes unnecessary for all runtimes.
5127
+
5128
+ ---
5129
+
5130
+ #### What stays in the repo
5131
+
5132
+ Almost nothing from WorkTrain sessions. The only things that belong in the repo:
5133
+ - Source code changes (committed via auto-commit or human review)
5134
+ - Long-lived spec files that are part of the product (e.g. `docs/ideas/backlog.md`)
5135
+ - Workflow definitions (`workflows/*.json`)
5136
+
5137
+ Everything else -- design docs, review findings, investigation notes, implementation plans -- lives in `~/.workrail/artifacts/`. If you want a design doc in the repo, you explicitly commit it. The default is: it lives in WorkTrain's data layer.
5138
+
5139
+ ---
5140
+
5141
+ #### Build order
5142
+
5143
+ 1. **Artifact store** -- `~/.workrail/artifacts/<sessionId>/` directory structure, JSON schema for common types
5144
+ 2. **Daemon writes artifacts** -- workflow steps with `output.artifact` declaration write to the artifact store automatically
5145
+ 3. **`worktrain artifacts` CLI** -- list, read, search artifacts by session, type, date
5146
+ 4. **Console artifacts tab** -- render artifacts in session detail view
5147
+ 5. **`read_artifact` / `search_artifacts` tools** -- agents can query the artifact store
5148
+ 6. **Knowledge graph integration** -- artifacts become nodes, sessions link to their artifacts
5149
+
5150
+ **The `NEVER COMMIT MARKDOWN FILES` rule in metaGuidance is a symptom of this missing feature.** The rule exists because agents keep dumping files in the wrong place. With a proper artifact store, the rule becomes unnecessary -- artifacts have nowhere to go except the artifact store.
5151
+
5152
+ ---
5153
+
5154
+ ### "Add to repo" button in console for artifacts (Apr 18, 2026)
5155
+
5156
+ Instead of workflow steps declaring upfront whether an artifact goes to the repo, the human makes that decision after seeing the content -- via a button in the console.
5157
+
5158
+ **The flow:**
5159
+ 1. Agent produces artifact → stored automatically in `~/.workrail/artifacts/`
5160
+ 2. Human opens it in the console Artifacts tab
5161
+ 3. Sees action buttons: **📁 Add to repo** | **📋 Copy** | **🔗 Share link**
5162
+ 4. Clicks "Add to repo" → console prompts: "Save as: `docs/design/design-candidates-<name>.md`" (editable path with sensible default)
5163
+ 5. Console commits the artifact as markdown to the repo at that path, with a commit message like `docs: add design candidates for <workflow-goal>`
5164
+
5165
+ **Why this is better than workflow-level declaration:**
5166
+ - Agent doesn't need to know at step time whether output will be repo-worthy
5167
+ - Human decides after seeing actual content quality
5168
+ - Ephemeral working artifacts stay ephemeral; only promoted ones go to the repo
5169
+ - No "NEVER COMMIT MARKDOWN FILES" rule needed -- agents just produce artifacts, humans decide what's repo-worthy
5170
+
5171
+ **Button options:**
5172
+ - **📁 Add to repo** -- renders artifact as markdown, commits to repo at specified path
5173
+ - **📋 Copy** -- copies rendered markdown to clipboard
5174
+ - **🔗 Share link** -- generates a URL that opens the artifact in the console. ⚠️ Local-only: only works on the same machine or with shared filesystem access. Requires cloud hosting for true team sharing (see cloud hosting spec in backlog)
5175
+ - **📤 Export** -- save to arbitrary filesystem path outside the repo
5176
+
5177
+ **The commit WorkTrain creates:**
5178
+ ```
5179
+ docs(design): add design candidates for MCP simplification
5180
+
5181
+ Source: WorkTrain session sess_3bmj... (mr-review-workflow-agentic)
5182
+ Artifact: design-candidates-stdio-simplification-2026-04-18.md
5183
+ ```
5184
+
5185
+ **Also useful for:** implementation plans the team wants to track, spec files that belong in the repo permanently, investigation summaries that become part of incident post-mortems.
5186
+
5187
+ ---
5188
+
5189
+ ## Current state update (Apr 18, 2026 -- later)
5190
+
5191
+ **npm version: v3.35.1** (auto-released after spawn_agent merged)
5192
+
5193
+ ### What additionally shipped since the milestone (commit 473f4bd0)
5194
+
5195
+ - ✅ **`complete_step` tool** (#569) -- daemon manages continueToken internally, LLM never handles it. Notes required (min 50 chars). `continue_workflow` deprecated.
5196
+ - ✅ **`spawn_agent` tool** (#573) -- native in-process child session spawning. parentSessionId in session_created event. Depth enforcement. Semaphore bypass. All 4 WorkflowRunResult variants handled.
5197
+ - ✅ **`complete_step` description fix** (#575) -- removed token-seeking language from deprecated continue_workflow description that would have triggered the LLM to seek a token.
5198
+ - ✅ **Discovery ran before both implementations** -- wr.discovery validated complete_step approach (found 1 merge blocker fixed), designed spawn_agent architecture (found semaphore deadlock risk avoided).
5199
+
5200
+ ### Updated limitations
5201
+
5202
+ **Still open from previous list:**
5203
+ 1. ~~complete_step just merged, untested~~ → ✅ merged, description fixed, discovery validated
5204
+ 2. ~~spawn_agent not merged~~ → ✅ merged as #573
5205
+ 3. **No session identity in console UI** -- parentSessionId is NOW in the event store (schema extended in #573) but console doesn't show the tree yet. Data is there; visualization is not.
5206
+ 4. **No coordinator scripts** -- spawn_agent exists, coordinator templates don't.
5207
+ 5. **Subagent loop still LLM-driven** -- workflow-as-orchestrator model spec'd but not built.
5208
+ 6. **Workflow runtime adapter not built** -- one spec, two runtimes model spec'd but not built.
5209
+ 7. **Knowledge graph not built** -- context still sweeps files every session.
5210
+ 8. **Artifacts not first-class** -- agents still dump markdown files in repo. Artifact store spec'd but not built.
5211
+ 9. **No notifications** -- daemon completes silently.
5212
+ 10. **MCP simplification PR-B** -- HttpServer still starts with MCP server.
5213
+
5214
+ ### What's now possible that wasn't before
5215
+
5216
+ With `complete_step` + `spawn_agent`:
5217
+ - Agents can advance workflows without ever touching a token (removes the #1 session failure cause)
5218
+ - Workflows can declare delegation and the daemon spawns proper child sessions (all visible in event log)
5219
+ - Multi-phase work has a path to becoming a coherent work unit (parentSessionId in data, UI visualization next)
5220
+
5221
+ ### Next priorities
5222
+
5223
+ 1. **Console session tree view** -- parentSessionId data is in the store. Build the UI to show it.
5224
+ 2. **First coordinator script template** -- `coordinator-mr-review.sh` that spawns: discovery → review → (conditional) fix → re-review. Proves the spawn/await loop works end-to-end.
5225
+ 3. **Notifications** -- macOS notification + generic webhook. ~30 min implementation.
5226
+ 4. **Late-bound goals** -- default `goalTemplate: "{{$.goal}}"` when no static goal. 10-line fix in trigger-store.ts.
5227
+ 5. **Artifacts store foundation** -- `~/.workrail/artifacts/` directory structure. Step 1 of the first-class artifacts vision.
@@ -0,0 +1,178 @@
1
+ # Design Candidates: spawn_agent Task Implementation
2
+
3
+ > Full investigative material is in `design-candidates-spawn-agent.md`, `design-spawn-agent.md`,
4
+ > and `design-review-findings-spawn-agent.md`. This file summarizes for the current coding task.
5
+
6
+ ---
7
+
8
+ ## Problem Understanding
9
+
10
+ ### Core Tensions
11
+
12
+ **T1: Blocking vs. semaphore deadlock**
13
+ `TriggerRouter.dispatch()` is fire-and-forget (non-blocking by design) and uses a global `Semaphore`.
14
+ A parent holding a slot cannot wait for a child to acquire another slot -- deadlock.
15
+ Correct path: call `runWorkflow()` directly, bypassing the semaphore entirely.
16
+
17
+ **T2: Typed schema extension vs. internalContext injection**
18
+ Adding `parentSessionId` to `session_created.data` is the typed, durable, query-friendly path.
19
+ Injecting via `internalContext` (context_set event) is the proven fast path.
20
+ Both are needed: `internalContext` for the `executeStartWorkflow()` call, AND schema extension for future DAG queries.
21
+
22
+ **T3: Deterministic childSessionId vs. code simplicity**
23
+ Pre-creating the child session (Candidate 2) gives a deterministic `childSessionId` before the run starts.
24
+ Direct `runWorkflow()` (Candidate 1) is simpler but cannot return `childSessionId` if the run crashes before the AgentLoop starts.
25
+
26
+ **T4: Depth propagation safety**
27
+ Using `context.spawnDepth` (generic map) is fragile -- any code that overwrites context silently breaks depth enforcement.
28
+ Using `WorkflowTrigger.spawnDepth` (typed `readonly` field) is compiler-enforced and cannot be accidentally lost.
29
+
30
+ ### Likely Seam
31
+ `workflow-runner.ts` -- new `makeSpawnAgentTool()` factory alongside existing tool factories.
32
+ `events.ts` -- one-line additive schema extension for `session_created.data`.
33
+ `start.ts` -- thread `parentSessionId` through `buildInitialEvents()`.
34
+
35
+ ### What Makes It Hard
36
+ - The `runWorkflow()` call inside `execute()` requires capturing `ctx`, `apiKey`, `daemonRegistry?`, `emitter?` in the factory closure.
37
+ - `executeStartWorkflow()` returns `RA<StartWorkflowResult, StartWorkflowError>` -- must be unwrapped asynchronously.
38
+ - `_preAllocatedStartResponse` expects `startResult.value.response` (not the full `StartWorkflowResult`).
39
+ - Junior developer would call `dispatch()` instead of `runWorkflow()` and create a deadlock.
40
+ - `session_created.data` currently hardcodes `data: {}` in `buildInitialEvents()` -- must thread `parentSessionId` into that call.
41
+
42
+ ---
43
+
44
+ ## Philosophy Constraints
45
+
46
+ From `CLAUDE.md` and repo patterns:
47
+
48
+ - **Errors as data**: Return `{ outcome: 'error', notes: msg }` JSON, not thrown exceptions, for child failures.
49
+ - **Exhaustiveness**: Handle all 4 `WorkflowRunResult` variants without `as unknown` casts.
50
+ - **Immutability**: New `WorkflowTrigger` fields are `readonly`.
51
+ - **DI for boundaries**: `runWorkflowFn`, `ctx`, `apiKey`, `emitter` all injected at construction time.
52
+ - **YAGNI**: Phase 1 only. No `spawn_session + await_sessions`, no bare-prompt mode, no width guardrails.
53
+ - **Make illegal states unrepresentable**: `childSessionId` always present (pre-create guarantees it).
54
+
55
+ No philosophy conflicts between stated rules and repo patterns.
56
+
57
+ ---
58
+
59
+ ## Impact Surface
60
+
61
+ | File | Change | Risk |
62
+ |---|---|---|
63
+ | `src/daemon/workflow-runner.ts` | Add `parentSessionId?`, `spawnDepth?` to `WorkflowTrigger`; add `makeSpawnAgentTool()`; inject in `runWorkflow()`; update `BASE_SYSTEM_PROMPT`; update `_preAllocatedStartResponse` JSDoc | Low -- additive |
64
+ | `src/v2/durable-core/schemas/session/events.ts` | Extend `session_created.data` with `parentSessionId?: z.string().optional()` | Low -- `z.object({})` uses strip mode |
65
+ | `src/mcp/handlers/v2-execution/start.ts` | Thread `parentSessionId` from `internalContext` into `session_created` event via `buildInitialEvents()` | Low -- internal API |
66
+ | `src/trigger/trigger-router.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
67
+ | `src/v2/usecases/console-routes.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
68
+
69
+ ---
70
+
71
+ ## Candidates
72
+
73
+ ### Candidate 1: Direct runWorkflow() call
74
+
75
+ **Summary**: `makeSpawnAgentTool()` calls `runWorkflow()` directly. No pre-creation. Session ID extracted from result after run.
76
+
77
+ **Tensions resolved**: YAGNI (fewest lines), blocking (natural await).
78
+ **Tensions accepted**: Crash-before-start has no observable `childSessionId`. `childSessionId` is absent on failure.
79
+
80
+ **Boundary**: `WorkflowTrigger` + direct `runWorkflow()` call.
81
+ **Why this boundary**: `WorkflowTrigger` is the natural seam -- carries all session config. No new types.
82
+
83
+ **Failure mode**: `runWorkflow()` crashes before AgentLoop starts -- `childSessionId` is null, parent gets `{ outcome: 'error', childSessionId: null }`.
84
+
85
+ **Repo-pattern relationship**: Follows factory pattern. No adaptation of `_preAllocatedStartResponse`.
86
+
87
+ **Gain**: ~10 fewer lines, maximum simplicity.
88
+ **Give up**: No deterministic `childSessionId` on startup failures. Less crash observability.
89
+
90
+ **Scope**: Best-fit.
91
+ **Philosophy fit**: Honors YAGNI strongest. Slight tension with 'make illegal states unrepresentable' (`childSessionId` can be null).
92
+
93
+ ---
94
+
95
+ ### Candidate 2: Pre-create session with _preAllocatedStartResponse (RECOMMENDED)
96
+
97
+ **Summary**: `execute()` calls `executeStartWorkflow()` with `parentSessionId` in `internalContext`, decodes `childSessionId` from the returned `continueToken`, then calls `runWorkflow()` with `_preAllocatedStartResponse`.
98
+
99
+ **Tensions resolved**: Deterministic `childSessionId`, crash-before-start observability, `childSessionId` seeds Phase 2, 'make illegal states unrepresentable'.
100
+ **Tensions accepted**: One extra async call (~10-50ms).
101
+
102
+ **Boundary**: `WorkflowTrigger._preAllocatedStartResponse` + `internalContext` injection.
103
+ **Why this boundary**: Direct adaptation of the proven `_preAllocatedStartResponse` pattern from `console-routes.ts`. Session store sees the child immediately -- correct observable behavior.
104
+
105
+ **Failure mode**: `executeStartWorkflow()` succeeds, `runWorkflow()` fails before AgentLoop -- zombie session in store. Accepted for Phase 1.
106
+
107
+ **Repo-pattern relationship**: Adapts proven `_preAllocatedStartResponse` pattern.
108
+
109
+ **Gain**: `childSessionId` always known before child runs. Deterministic. Child observable from moment of `execute()`.
110
+ **Give up**: One extra async call. Slightly more setup code.
111
+
112
+ **Scope**: Best-fit.
113
+ **Philosophy fit**: Honors determinism over cleverness, make illegal states unrepresentable, DI. No conflicts.
114
+
115
+ ---
116
+
117
+ ### Candidate 3: Read depth from session store at execute() time
118
+
119
+ **Summary**: Instead of passing `currentDepth` as a constructor parameter, read `spawnDepth` from parent session store inside `execute()`.
120
+
121
+ **Tensions resolved**: Accurate depth for checkpoint-resumed sessions (theoretical edge case).
122
+ **Tensions accepted**: Async I/O in `execute()`, more error paths, session store dependency.
123
+
124
+ **Boundary**: Session store read inside `execute()`.
125
+ **Why this boundary is NOT best-fit**: Expensive, speculative. Checkpoint-resumed daemon sessions restart AgentLoop from scratch -- constructor parameter is always correctly set.
126
+
127
+ **Failure mode**: Store read fails -- fail-safe blocks spawn, adds error path complexity.
128
+
129
+ **Repo-pattern relationship**: Departs from constructor-injection pattern.
130
+
131
+ **Gain**: Accurate depth for resumed sessions. **Give up**: YAGNI violation, async I/O, extra error paths.
132
+
133
+ **Scope**: Too broad. **Philosophy fit**: Conflicts with YAGNI.
134
+
135
+ ---
136
+
137
+ ## Comparison and Recommendation
138
+
139
+ ### Comparison Matrix
140
+
141
+ | Tension | C1 | C2 | C3 |
142
+ |---|---|---|---|
143
+ | Blocking fidelity | Strong | Strong | Strong |
144
+ | Deterministic childSessionId | Weak | Strong | Weak |
145
+ | Semaphore bypass | Strong | Strong | Strong |
146
+ | YAGNI | Strong | Moderate | Weak |
147
+ | Crash observability | Weak | Strong | Weak |
148
+ | Depth accuracy | Adequate | Adequate | Strong (speculative) |
149
+ | Repo pattern | Follows | Adapts proven | Departs |
150
+ | Philosophy | Full | Full | Partial |
151
+
152
+ ### Recommendation: Candidate 2
153
+
154
+ C2 is best-fit. The `_preAllocatedStartResponse` pattern is proven and stable (`console-routes.ts`).
155
+ The marginal complexity (one extra async call) is small relative to the gain: `childSessionId` is always
156
+ known, crash-before-start is observable, Phase 2 is seeded. C3 is rejected on YAGNI grounds.
157
+
158
+ ---
159
+
160
+ ## Self-Critique
161
+
162
+ **Strongest counter-argument**: C2 adds a zombie session failure mode that C1 doesn't have. If `executeStartWorkflow()` succeeds but `runWorkflow()` fails immediately, a session exists in the store with no corresponding run. C1 avoids this -- no session is created until the run actually starts.
163
+
164
+ **C1 as narrower option**: Still satisfies acceptance criteria. Loses crash observability and deterministic `childSessionId`. Would win if we prioritized simplicity over observability.
165
+
166
+ **C3 as broader option**: Justified only if checkpoint-resumed spawned sessions become a real production use case. No evidence for Phase 1.
167
+
168
+ **Assumption that would invalidate C2**: If `_preAllocatedStartResponse` is removed in a future refactor. Mitigation: update its JSDoc (Orange finding O2) to list `spawn_agent` as a legitimate caller.
169
+
170
+ ---
171
+
172
+ ## Open Questions for the Main Agent
173
+
174
+ 1. **maxSubagentDepth source**: Design doc says read from `WorkflowTrigger.agentConfig` (default 3). Should this also check global workspace config? Decision: use `trigger.agentConfig?.maxSubagentDepth ?? 3` for Phase 1. Document in tool description.
175
+
176
+ 2. **`session_created.data` strictness**: Confirmed `z.object({})` uses strip mode. Extension is safe. Unverified by migration run -- low risk.
177
+
178
+ 3. **Zombie session cleanup**: Deferred to Phase 2. Document as known edge case in tool description.