@exaudeus/workrail 3.35.0 → 3.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/console-ui/assets/{index-B10Bn8qC.js → index-n8cJrS4v.js} +2 -2
- package/dist/console-ui/index.html +1 -1
- package/dist/daemon/workflow-runner.d.ts +4 -0
- package/dist/daemon/workflow-runner.js +133 -0
- package/dist/manifest.json +24 -24
- package/dist/mcp/handlers/v2-advance-events.js +1 -1
- package/dist/mcp/handlers/v2-execution/start.d.ts +1 -0
- package/dist/mcp/handlers/v2-execution/start.js +3 -2
- package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +64 -32
- package/dist/v2/durable-core/schemas/session/events.d.ts +20 -10
- package/dist/v2/durable-core/schemas/session/events.js +1 -1
- package/dist/v2/durable-core/schemas/session/gaps.d.ts +8 -8
- package/dist/v2/durable-core/schemas/session/gaps.js +1 -1
- package/docs/ideas/backlog.md +250 -0
- package/docs/ideas/design-candidates-spawn-agent-task.md +178 -0
- package/docs/ideas/design-review-findings-spawn-agent-task.md +139 -0
- package/docs/ideas/implementation_plan_spawn_agent.md +217 -0
- package/package.json +1 -1
|
@@ -60,10 +60,18 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
60
60
|
}>>;
|
|
61
61
|
} & {
|
|
62
62
|
kind: z.ZodLiteral<"session_created">;
|
|
63
|
-
data: z.ZodObject<{
|
|
63
|
+
data: z.ZodObject<{
|
|
64
|
+
parentSessionId: z.ZodOptional<z.ZodString>;
|
|
65
|
+
}, "strip", z.ZodTypeAny, {
|
|
66
|
+
parentSessionId?: string | undefined;
|
|
67
|
+
}, {
|
|
68
|
+
parentSessionId?: string | undefined;
|
|
69
|
+
}>;
|
|
64
70
|
}, "strip", z.ZodTypeAny, {
|
|
65
71
|
kind: "session_created";
|
|
66
|
-
data: {
|
|
72
|
+
data: {
|
|
73
|
+
parentSessionId?: string | undefined;
|
|
74
|
+
};
|
|
67
75
|
v: 1;
|
|
68
76
|
sessionId: string;
|
|
69
77
|
eventIndex: number;
|
|
@@ -75,7 +83,9 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
75
83
|
} | undefined;
|
|
76
84
|
}, {
|
|
77
85
|
kind: "session_created";
|
|
78
|
-
data: {
|
|
86
|
+
data: {
|
|
87
|
+
parentSessionId?: string | undefined;
|
|
88
|
+
};
|
|
79
89
|
v: 1;
|
|
80
90
|
sessionId: string;
|
|
81
91
|
eventIndex: number;
|
|
@@ -1834,12 +1844,12 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1834
1844
|
category: "user_only_dependency";
|
|
1835
1845
|
}>, z.ZodObject<{
|
|
1836
1846
|
category: z.ZodLiteral<"contract_violation">;
|
|
1837
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
1847
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
1838
1848
|
}, "strip", z.ZodTypeAny, {
|
|
1839
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1849
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1840
1850
|
category: "contract_violation";
|
|
1841
1851
|
}, {
|
|
1842
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1852
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1843
1853
|
category: "contract_violation";
|
|
1844
1854
|
}>, z.ZodObject<{
|
|
1845
1855
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -1902,7 +1912,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1902
1912
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1903
1913
|
category: "user_only_dependency";
|
|
1904
1914
|
} | {
|
|
1905
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1915
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1906
1916
|
category: "contract_violation";
|
|
1907
1917
|
} | {
|
|
1908
1918
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -1932,7 +1942,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1932
1942
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1933
1943
|
category: "user_only_dependency";
|
|
1934
1944
|
} | {
|
|
1935
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1945
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1936
1946
|
category: "contract_violation";
|
|
1937
1947
|
} | {
|
|
1938
1948
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -1965,7 +1975,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
1965
1975
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
1966
1976
|
category: "user_only_dependency";
|
|
1967
1977
|
} | {
|
|
1968
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
1978
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
1969
1979
|
category: "contract_violation";
|
|
1970
1980
|
} | {
|
|
1971
1981
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -2007,7 +2017,7 @@ export declare const DomainEventV1Schema: z.ZodDiscriminatedUnion<"kind", [z.Zod
|
|
|
2007
2017
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
2008
2018
|
category: "user_only_dependency";
|
|
2009
2019
|
} | {
|
|
2010
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
2020
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
2011
2021
|
category: "contract_violation";
|
|
2012
2022
|
} | {
|
|
2013
2023
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -102,7 +102,7 @@ const PreferencesChangedDataV1Schema = zod_1.z
|
|
|
102
102
|
}
|
|
103
103
|
});
|
|
104
104
|
exports.DomainEventV1Schema = zod_1.z.discriminatedUnion('kind', [
|
|
105
|
-
exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({}) }),
|
|
105
|
+
exports.DomainEventEnvelopeV1Schema.extend({ kind: zod_1.z.literal('session_created'), data: zod_1.z.object({ parentSessionId: zod_1.z.string().optional() }) }),
|
|
106
106
|
exports.DomainEventEnvelopeV1Schema.extend({
|
|
107
107
|
kind: zod_1.z.literal('observation_recorded'),
|
|
108
108
|
scope: zod_1.z.undefined(),
|
|
@@ -11,12 +11,12 @@ export declare const GapReasonSchema: z.ZodDiscriminatedUnion<"category", [z.Zod
|
|
|
11
11
|
category: "user_only_dependency";
|
|
12
12
|
}>, z.ZodObject<{
|
|
13
13
|
category: z.ZodLiteral<"contract_violation">;
|
|
14
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
14
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
15
15
|
}, "strip", z.ZodTypeAny, {
|
|
16
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
16
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
17
17
|
category: "contract_violation";
|
|
18
18
|
}, {
|
|
19
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
19
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
20
20
|
category: "contract_violation";
|
|
21
21
|
}>, z.ZodObject<{
|
|
22
22
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -86,12 +86,12 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
86
86
|
category: "user_only_dependency";
|
|
87
87
|
}>, z.ZodObject<{
|
|
88
88
|
category: z.ZodLiteral<"contract_violation">;
|
|
89
|
-
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes"]>;
|
|
89
|
+
detail: z.ZodEnum<["missing_required_output", "invalid_required_output", "missing_required_notes", "assessment_followup_required"]>;
|
|
90
90
|
}, "strip", z.ZodTypeAny, {
|
|
91
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
91
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
92
92
|
category: "contract_violation";
|
|
93
93
|
}, {
|
|
94
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
94
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
95
95
|
category: "contract_violation";
|
|
96
96
|
}>, z.ZodObject<{
|
|
97
97
|
category: z.ZodLiteral<"capability_missing">;
|
|
@@ -154,7 +154,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
154
154
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
155
155
|
category: "user_only_dependency";
|
|
156
156
|
} | {
|
|
157
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
157
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
158
158
|
category: "contract_violation";
|
|
159
159
|
} | {
|
|
160
160
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -184,7 +184,7 @@ export declare const GapRecordedDataV1Schema: z.ZodObject<{
|
|
|
184
184
|
detail: "needs_user_secret_or_token" | "needs_user_account_access" | "needs_user_artifact" | "needs_user_choice" | "needs_user_approval" | "needs_user_environment_action";
|
|
185
185
|
category: "user_only_dependency";
|
|
186
186
|
} | {
|
|
187
|
-
detail: "invalid_required_output" | "missing_required_output" | "missing_required_notes";
|
|
187
|
+
detail: "invalid_required_output" | "missing_required_output" | "assessment_followup_required" | "missing_required_notes";
|
|
188
188
|
category: "contract_violation";
|
|
189
189
|
} | {
|
|
190
190
|
detail: "required_capability_unknown" | "required_capability_unavailable";
|
|
@@ -12,7 +12,7 @@ exports.UserOnlyDependencyReasonSchema = zod_1.z.enum([
|
|
|
12
12
|
]);
|
|
13
13
|
exports.GapReasonSchema = zod_1.z.discriminatedUnion('category', [
|
|
14
14
|
zod_1.z.object({ category: zod_1.z.literal('user_only_dependency'), detail: exports.UserOnlyDependencyReasonSchema }),
|
|
15
|
-
zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes']) }),
|
|
15
|
+
zod_1.z.object({ category: zod_1.z.literal('contract_violation'), detail: zod_1.z.enum(['missing_required_output', 'invalid_required_output', 'missing_required_notes', 'assessment_followup_required']) }),
|
|
16
16
|
zod_1.z.object({
|
|
17
17
|
category: zod_1.z.literal('capability_missing'),
|
|
18
18
|
detail: zod_1.z.enum(['required_capability_unavailable', 'required_capability_unknown']),
|
package/docs/ideas/backlog.md
CHANGED
|
@@ -4975,3 +4975,253 @@ Long-term (when mobile exists):
|
|
|
4975
4975
|
```
|
|
4976
4976
|
|
|
4977
4977
|
**Build order:** outbox.jsonl integration (foundation, works everywhere) → generic webhook (covers Slack/Discord/Teams/anything) → platform notifications (macOS/Linux/Windows) → mobile app push (when mobile exists).
|
|
4978
|
+
|
|
4979
|
+
---
|
|
4980
|
+
|
|
4981
|
+
## 🎉 WorkTrain first confirmed end-to-end autonomous session (Apr 18, 2026)
|
|
4982
|
+
|
|
4983
|
+
**Timestamp:** 2026-04-18T15:09:49Z
|
|
4984
|
+
**Commit:** `473f4bd0` (main)
|
|
4985
|
+
**npm version:** v3.34.1 (published, installable by anyone)
|
|
4986
|
+
**What happened:** A real MR review workflow (`mr-review-workflow-agentic`) ran completely autonomously via webhook trigger, advanced through all phases (context gathering, review, synthesis, validation, handoff), self-validated, and produced a structured finding set. 8 step advances, `outcome: success`.
|
|
4987
|
+
|
|
4988
|
+
**Trigger:** `POST /webhook/mr-review {"goal": "Review PR #566: fix two minor bugs..."}`
|
|
4989
|
+
**Session:** `sess_3bmjuzf7l2vrqynjtleg5iskm4`
|
|
4990
|
+
**Result:** APPROVE with High confidence. 3 Minor findings, 1 Informational. Correctly decided not to delegate since no Critical/Major issues.
|
|
4991
|
+
|
|
4992
|
+
---
|
|
4993
|
+
|
|
4994
|
+
### What works at this commit
|
|
4995
|
+
|
|
4996
|
+
- ✅ Daemon accepts webhooks, starts sessions, runs workflows end-to-end
|
|
4997
|
+
- ✅ Sessions advance through all workflow phases autonomously
|
|
4998
|
+
- ✅ `mr-review-workflow-agentic` v2.6 runs fully -- context gathering, review phases, synthesis loop, validation, handoff
|
|
4999
|
+
- ✅ `wr.discovery` v3.2.0 runs fully -- with new phase-0-reframe (goal reframing before research)
|
|
5000
|
+
- ✅ Console shows live sessions via event log (no daemon connection required)
|
|
5001
|
+
- ✅ MCP server is stable (bridge removed, EPIPE fixed, v3.34.1 published)
|
|
5002
|
+
- ✅ GitHub + GitLab polling triggers (no webhooks needed)
|
|
5003
|
+
- ✅ `worktrain init`, `tell`, `inbox`, `spawn`, `await` CLI commands
|
|
5004
|
+
- ✅ Stuck detection + visibility (`worktrain status`, `worktrain logs --follow`)
|
|
5005
|
+
- ✅ `complete_step` tool -- daemon manages continueToken, LLM never handles it
|
|
5006
|
+
- ✅ Assessment gate circuit breaker (stops at 3 blocked attempts, shows artifact format)
|
|
5007
|
+
- ✅ `worktrain daemon --install` creates launchd service (daemon survives MCP reconnects)
|
|
5008
|
+
- ✅ Self-configuration (`triggers.yml`, `daemon-soul.md`, `AGENTS.md` for workrail repo)
|
|
5009
|
+
|
|
5010
|
+
### Current limitations at this commit
|
|
5011
|
+
|
|
5012
|
+
**Blocking reliable complex workflows:**
|
|
5013
|
+
1. **`complete_step` not yet tested in production** -- just merged, daemon still using `continue_workflow` in running sessions. Needs daemon restart to take effect.
|
|
5014
|
+
2. **Assessment gates still unreliable** -- `complete_step` fixes the token issue; the `artifacts` field (#557) fixes the submission issue. But `coding-task-workflow-agentic` phases with quality gates haven't been tested end-to-end yet.
|
|
5015
|
+
3. **Native `spawn_agent` not yet merged** -- implementation in progress. Until it lands, all subagent delegation is via `mcp__nested-subagent__Task` (invisible black box).
|
|
5016
|
+
4. **No session identity (parentSessionId)** -- multi-phase work appears as unrelated flat sessions in the console.
|
|
5017
|
+
|
|
5018
|
+
**Architecture not yet realized:**
|
|
5019
|
+
5. **Coordinator scripts don't exist** -- `worktrain spawn/await` is there but no templates.
|
|
5020
|
+
6. **Subagent loop not rethought** -- LLM still decides when to delegate; workflow-as-orchestrator model is spec'd but not built.
|
|
5021
|
+
7. **Workflow runtime adapter not built** -- workflows run in daemon mode as-is; no MCP vs daemon adaptation layer.
|
|
5022
|
+
8. **Knowledge graph not built** -- context gathering still sweeps files on every session.
|
|
5023
|
+
9. **MCP simplification PR-B not done** -- HttpServer still starts with MCP server.
|
|
5024
|
+
|
|
5025
|
+
**Missing for production autonomy:**
|
|
5026
|
+
10. **No notifications** -- daemon completes work silently. Users have no awareness unless watching console/logs.
|
|
5027
|
+
11. **No auto-commit from handoff artifact** -- merged but untested end-to-end.
|
|
5028
|
+
12. **Late-bound goals not implemented** -- triggers require static goals; dynamic goals (like PR reviews) need `goalTemplate: "{{$.goal}}"` as default.
|
|
5029
|
+
13. **No coordinator script template** -- the multi-phase autonomous pipeline exists as primitives but not as a usable script.
|
|
5030
|
+
|
|
5031
|
+
---
|
|
5032
|
+
|
|
5033
|
+
### Artifacts as first-class citizens: explorable, accessible, out of the repo (Apr 18, 2026)
|
|
5034
|
+
|
|
5035
|
+
**The current mess:** every autonomous session dumps `design-candidates.md`, `implementation_plan.md`, `design-review-findings.md`, `mr-review.md` etc. as files in the repo root or worktrees. They are:
|
|
5036
|
+
- Not indexed or searchable
|
|
5037
|
+
- Not visible in the console
|
|
5038
|
+
- Not accessible to other sessions (agent B can't read agent A's handoff without knowing the exact file path)
|
|
5039
|
+
- Polluting the repo with ephemeral working documents
|
|
5040
|
+
- Lost when worktrees are cleaned up
|
|
5041
|
+
- Scattered across the filesystem with no structure
|
|
5042
|
+
|
|
5043
|
+
**The right model:** artifacts are WorkTrain data, not filesystem files.
|
|
5044
|
+
|
|
5045
|
+
---
|
|
5046
|
+
|
|
5047
|
+
#### What an artifact is
|
|
5048
|
+
|
|
5049
|
+
Any structured output from a session that has value beyond the session itself:
|
|
5050
|
+
- **Handoff docs** -- what one session produces for the next to consume
|
|
5051
|
+
- **Design candidates** -- research output with tradeoffs and recommendation
|
|
5052
|
+
- **Implementation plans** -- what to build, how, in what order
|
|
5053
|
+
- **Review findings** -- MR review output with findings, severity, recommendation
|
|
5054
|
+
- **Spec files** -- behavioral specs, acceptance criteria, API contracts
|
|
5055
|
+
- **Investigation summaries** -- bug investigation root cause and reproduction
|
|
5056
|
+
- **Context bundles** -- pre-packaged knowledge for subagent consumption
|
|
5057
|
+
|
|
5058
|
+
**NOT artifacts:** step notes (stay in WorkRail session store), event logs (stay in daemon events), source code (stays in repo).
|
|
5059
|
+
|
|
5060
|
+
---
|
|
5061
|
+
|
|
5062
|
+
#### Where artifacts live
|
|
5063
|
+
|
|
5064
|
+
`~/.workrail/artifacts/<sessionId>/<artifact-type>-<timestamp>.json`
|
|
5065
|
+
|
|
5066
|
+
Structured JSON, not markdown. The display layer (console, `worktrain artifacts`) renders them as human-readable. Other agents query them as structured data.
|
|
5067
|
+
|
|
5068
|
+
**Why JSON not markdown:**
|
|
5069
|
+
- Queryable by other agents (what are the findings with severity=critical?)
|
|
5070
|
+
- Renderable by the console with proper formatting, filtering, search
|
|
5071
|
+
- Versionable and diffable in the artifact store
|
|
5072
|
+
- Accessible via the knowledge graph (artifacts become nodes with typed edges)
|
|
5073
|
+
|
|
5074
|
+
---
|
|
5075
|
+
|
|
5076
|
+
#### Console integration
|
|
5077
|
+
|
|
5078
|
+
The console session detail view gets an "Artifacts" tab alongside "Steps" and "Notes":
|
|
5079
|
+
|
|
5080
|
+
```
|
|
5081
|
+
Session: sess_3bmj... [MR Review: PR #566]
|
|
5082
|
+
├── Steps (8)
|
|
5083
|
+
├── Notes
|
|
5084
|
+
└── Artifacts (3)
|
|
5085
|
+
├── 📋 review-findings.json "APPROVE -- 3 Minor, 1 Info"
|
|
5086
|
+
├── 📄 context-bundle.json "12 files read, 4 patterns identified"
|
|
5087
|
+
└── 🔍 investigation-notes.json "Signal 3 dead code in max_turns path"
|
|
5088
|
+
```
|
|
5089
|
+
|
|
5090
|
+
Click an artifact → full rendered view in the console.
|
|
5091
|
+
|
|
5092
|
+
---
|
|
5093
|
+
|
|
5094
|
+
#### Accessibility to other agents
|
|
5095
|
+
|
|
5096
|
+
Agents can query artifacts from prior sessions via a new tool:
|
|
5097
|
+
|
|
5098
|
+
```
|
|
5099
|
+
read_artifact({ sessionId: 'sess_3bmj...', type: 'review-findings' })
|
|
5100
|
+
→ { verdict: 'APPROVE', findings: [...], recommendation: '...' }
|
|
5101
|
+
|
|
5102
|
+
search_artifacts({ type: 'implementation-plan', workflowId: 'coding-task-workflow-agentic', since: '7d' })
|
|
5103
|
+
→ [{ sessionId, summary, createdAt }, ...]
|
|
5104
|
+
```
|
|
5105
|
+
|
|
5106
|
+
This replaces the current pattern where agents `cat design-candidates.md` from a known path -- fragile, path-dependent, breaks across worktrees.
|
|
5107
|
+
|
|
5108
|
+
---
|
|
5109
|
+
|
|
5110
|
+
#### Workflow integration
|
|
5111
|
+
|
|
5112
|
+
Workflow steps declare their artifact output type:
|
|
5113
|
+
|
|
5114
|
+
```json
|
|
5115
|
+
{
|
|
5116
|
+
"id": "phase-1c-challenge-and-select",
|
|
5117
|
+
"output": {
|
|
5118
|
+
"artifact": "design-candidates",
|
|
5119
|
+
"schema": "wr.artifacts.design-candidates.v1"
|
|
5120
|
+
}
|
|
5121
|
+
}
|
|
5122
|
+
```
|
|
5123
|
+
|
|
5124
|
+
**Both the daemon AND the MCP server** store step artifacts automatically. The artifact store is a WorkRail data layer feature, not daemon-specific. A human using Claude Code with the MCP produces the same artifacts in the same store as an autonomous daemon session. The console shows them for both. Other sessions (human-driven or autonomous) can query them either way.
|
|
5125
|
+
|
|
5126
|
+
In MCP mode, the human can explicitly commit an artifact to the repo if desired (e.g. a final spec becomes `docs/specs/feature-x.md`). But the default is the artifact store -- repo is opt-in. The `NEVER COMMIT MARKDOWN FILES` rule in workflow metaGuidance exists because the artifact store doesn't exist yet. Once it does, that rule becomes unnecessary for all runtimes.
|
|
5127
|
+
|
|
5128
|
+
---
|
|
5129
|
+
|
|
5130
|
+
#### What stays in the repo
|
|
5131
|
+
|
|
5132
|
+
Almost nothing from WorkTrain sessions. The only things that belong in the repo:
|
|
5133
|
+
- Source code changes (committed via auto-commit or human review)
|
|
5134
|
+
- Long-lived spec files that are part of the product (e.g. `docs/ideas/backlog.md`)
|
|
5135
|
+
- Workflow definitions (`workflows/*.json`)
|
|
5136
|
+
|
|
5137
|
+
Everything else -- design docs, review findings, investigation notes, implementation plans -- lives in `~/.workrail/artifacts/`. If you want a design doc in the repo, you explicitly commit it. The default is: it lives in WorkTrain's data layer.
|
|
5138
|
+
|
|
5139
|
+
---
|
|
5140
|
+
|
|
5141
|
+
#### Build order
|
|
5142
|
+
|
|
5143
|
+
1. **Artifact store** -- `~/.workrail/artifacts/<sessionId>/` directory structure, JSON schema for common types
|
|
5144
|
+
2. **Daemon writes artifacts** -- workflow steps with `output.artifact` declaration write to the artifact store automatically
|
|
5145
|
+
3. **`worktrain artifacts` CLI** -- list, read, search artifacts by session, type, date
|
|
5146
|
+
4. **Console artifacts tab** -- render artifacts in session detail view
|
|
5147
|
+
5. **`read_artifact` / `search_artifacts` tools** -- agents can query the artifact store
|
|
5148
|
+
6. **Knowledge graph integration** -- artifacts become nodes, sessions link to their artifacts
|
|
5149
|
+
|
|
5150
|
+
**The `NEVER COMMIT MARKDOWN FILES` rule in metaGuidance is a symptom of this missing feature.** The rule exists because agents keep dumping files in the wrong place. With a proper artifact store, the rule becomes unnecessary -- artifacts have nowhere to go except the artifact store.
|
|
5151
|
+
|
|
5152
|
+
---
|
|
5153
|
+
|
|
5154
|
+
### "Add to repo" button in console for artifacts (Apr 18, 2026)
|
|
5155
|
+
|
|
5156
|
+
Instead of workflow steps declaring upfront whether an artifact goes to the repo, the human makes that decision after seeing the content -- via a button in the console.
|
|
5157
|
+
|
|
5158
|
+
**The flow:**
|
|
5159
|
+
1. Agent produces artifact → stored automatically in `~/.workrail/artifacts/`
|
|
5160
|
+
2. Human opens it in the console Artifacts tab
|
|
5161
|
+
3. Sees action buttons: **📁 Add to repo** | **📋 Copy** | **🔗 Share link**
|
|
5162
|
+
4. Clicks "Add to repo" → console prompts: "Save as: `docs/design/design-candidates-<name>.md`" (editable path with sensible default)
|
|
5163
|
+
5. Console commits the artifact as markdown to the repo at that path, with a commit message like `docs: add design candidates for <workflow-goal>`
|
|
5164
|
+
|
|
5165
|
+
**Why this is better than workflow-level declaration:**
|
|
5166
|
+
- Agent doesn't need to know at step time whether output will be repo-worthy
|
|
5167
|
+
- Human decides after seeing actual content quality
|
|
5168
|
+
- Ephemeral working artifacts stay ephemeral; only promoted ones go to the repo
|
|
5169
|
+
- No "NEVER COMMIT MARKDOWN FILES" rule needed -- agents just produce artifacts, humans decide what's repo-worthy
|
|
5170
|
+
|
|
5171
|
+
**Button options:**
|
|
5172
|
+
- **📁 Add to repo** -- renders artifact as markdown, commits to repo at specified path
|
|
5173
|
+
- **📋 Copy** -- copies rendered markdown to clipboard
|
|
5174
|
+
- **🔗 Share link** -- generates a URL that opens the artifact in the console. ⚠️ Local-only: only works on the same machine or with shared filesystem access. Requires cloud hosting for true team sharing (see cloud hosting spec in backlog)
|
|
5175
|
+
- **📤 Export** -- save to arbitrary filesystem path outside the repo
|
|
5176
|
+
|
|
5177
|
+
**The commit WorkTrain creates:**
|
|
5178
|
+
```
|
|
5179
|
+
docs(design): add design candidates for MCP simplification
|
|
5180
|
+
|
|
5181
|
+
Source: WorkTrain session sess_3bmj... (mr-review-workflow-agentic)
|
|
5182
|
+
Artifact: design-candidates-stdio-simplification-2026-04-18.md
|
|
5183
|
+
```
|
|
5184
|
+
|
|
5185
|
+
**Also useful for:** implementation plans the team wants to track, spec files that belong in the repo permanently, investigation summaries that become part of incident post-mortems.
|
|
5186
|
+
|
|
5187
|
+
---
|
|
5188
|
+
|
|
5189
|
+
## Current state update (Apr 18, 2026 -- later)
|
|
5190
|
+
|
|
5191
|
+
**npm version: v3.35.1** (auto-released after spawn_agent merged)
|
|
5192
|
+
|
|
5193
|
+
### What additionally shipped since the milestone (commit 473f4bd0)
|
|
5194
|
+
|
|
5195
|
+
- ✅ **`complete_step` tool** (#569) -- daemon manages continueToken internally, LLM never handles it. Notes required (min 50 chars). `continue_workflow` deprecated.
|
|
5196
|
+
- ✅ **`spawn_agent` tool** (#573) -- native in-process child session spawning. parentSessionId in session_created event. Depth enforcement. Semaphore bypass. All 4 WorkflowRunResult variants handled.
|
|
5197
|
+
- ✅ **`complete_step` description fix** (#575) -- removed token-seeking language from deprecated continue_workflow description that would have triggered the LLM to seek a token.
|
|
5198
|
+
- ✅ **Discovery ran before both implementations** -- wr.discovery validated complete_step approach (found 1 merge blocker fixed), designed spawn_agent architecture (found semaphore deadlock risk avoided).
|
|
5199
|
+
|
|
5200
|
+
### Updated limitations
|
|
5201
|
+
|
|
5202
|
+
**Still open from previous list:**
|
|
5203
|
+
1. ~~complete_step just merged, untested~~ → ✅ merged, description fixed, discovery validated
|
|
5204
|
+
2. ~~spawn_agent not merged~~ → ✅ merged as #573
|
|
5205
|
+
3. **No session identity in console UI** -- parentSessionId is NOW in the event store (schema extended in #573) but console doesn't show the tree yet. Data is there; visualization is not.
|
|
5206
|
+
4. **No coordinator scripts** -- spawn_agent exists, coordinator templates don't.
|
|
5207
|
+
5. **Subagent loop still LLM-driven** -- workflow-as-orchestrator model spec'd but not built.
|
|
5208
|
+
6. **Workflow runtime adapter not built** -- one spec, two runtimes model spec'd but not built.
|
|
5209
|
+
7. **Knowledge graph not built** -- context still sweeps files every session.
|
|
5210
|
+
8. **Artifacts not first-class** -- agents still dump markdown files in repo. Artifact store spec'd but not built.
|
|
5211
|
+
9. **No notifications** -- daemon completes silently.
|
|
5212
|
+
10. **MCP simplification PR-B** -- HttpServer still starts with MCP server.
|
|
5213
|
+
|
|
5214
|
+
### What's now possible that wasn't before
|
|
5215
|
+
|
|
5216
|
+
With `complete_step` + `spawn_agent`:
|
|
5217
|
+
- Agents can advance workflows without ever touching a token (removes the #1 session failure cause)
|
|
5218
|
+
- Workflows can declare delegation and the daemon spawns proper child sessions (all visible in event log)
|
|
5219
|
+
- Multi-phase work has a path to becoming a coherent work unit (parentSessionId in data, UI visualization next)
|
|
5220
|
+
|
|
5221
|
+
### Next priorities
|
|
5222
|
+
|
|
5223
|
+
1. **Console session tree view** -- parentSessionId data is in the store. Build the UI to show it.
|
|
5224
|
+
2. **First coordinator script template** -- `coordinator-mr-review.sh` that spawns: discovery → review → (conditional) fix → re-review. Proves the spawn/await loop works end-to-end.
|
|
5225
|
+
3. **Notifications** -- macOS notification + generic webhook. ~30 min implementation.
|
|
5226
|
+
4. **Late-bound goals** -- default `goalTemplate: "{{$.goal}}"` when no static goal. 10-line fix in trigger-store.ts.
|
|
5227
|
+
5. **Artifacts store foundation** -- `~/.workrail/artifacts/` directory structure. Step 1 of the first-class artifacts vision.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Design Candidates: spawn_agent Task Implementation
|
|
2
|
+
|
|
3
|
+
> Full investigative material is in `design-candidates-spawn-agent.md`, `design-spawn-agent.md`,
|
|
4
|
+
> and `design-review-findings-spawn-agent.md`. This file summarizes for the current coding task.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Problem Understanding
|
|
9
|
+
|
|
10
|
+
### Core Tensions
|
|
11
|
+
|
|
12
|
+
**T1: Blocking vs. semaphore deadlock**
|
|
13
|
+
`TriggerRouter.dispatch()` is fire-and-forget (non-blocking by design) and uses a global `Semaphore`.
|
|
14
|
+
A parent holding a slot cannot wait for a child to acquire another slot -- deadlock.
|
|
15
|
+
Correct path: call `runWorkflow()` directly, bypassing the semaphore entirely.
|
|
16
|
+
|
|
17
|
+
**T2: Typed schema extension vs. internalContext injection**
|
|
18
|
+
Adding `parentSessionId` to `session_created.data` is the typed, durable, query-friendly path.
|
|
19
|
+
Injecting via `internalContext` (context_set event) is the proven fast path.
|
|
20
|
+
Both are needed: `internalContext` for the `executeStartWorkflow()` call, AND schema extension for future DAG queries.
|
|
21
|
+
|
|
22
|
+
**T3: Deterministic childSessionId vs. code simplicity**
|
|
23
|
+
Pre-creating the child session (Candidate 2) gives a deterministic `childSessionId` before the run starts.
|
|
24
|
+
Direct `runWorkflow()` (Candidate 1) is simpler but cannot return `childSessionId` if the run crashes before the AgentLoop starts.
|
|
25
|
+
|
|
26
|
+
**T4: Depth propagation safety**
|
|
27
|
+
Using `context.spawnDepth` (generic map) is fragile -- any code that overwrites context silently breaks depth enforcement.
|
|
28
|
+
Using `WorkflowTrigger.spawnDepth` (typed `readonly` field) is compiler-enforced and cannot be accidentally lost.
|
|
29
|
+
|
|
30
|
+
### Likely Seam
|
|
31
|
+
`workflow-runner.ts` -- new `makeSpawnAgentTool()` factory alongside existing tool factories.
|
|
32
|
+
`events.ts` -- one-line additive schema extension for `session_created.data`.
|
|
33
|
+
`start.ts` -- thread `parentSessionId` through `buildInitialEvents()`.
|
|
34
|
+
|
|
35
|
+
### What Makes It Hard
|
|
36
|
+
- The `runWorkflow()` call inside `execute()` requires capturing `ctx`, `apiKey`, `daemonRegistry?`, `emitter?` in the factory closure.
|
|
37
|
+
- `executeStartWorkflow()` returns `RA<StartWorkflowResult, StartWorkflowError>` -- must be unwrapped asynchronously.
|
|
38
|
+
- `_preAllocatedStartResponse` expects `startResult.value.response` (not the full `StartWorkflowResult`).
|
|
39
|
+
- Junior developer would call `dispatch()` instead of `runWorkflow()` and create a deadlock.
|
|
40
|
+
- `session_created.data` currently hardcodes `data: {}` in `buildInitialEvents()` -- must thread `parentSessionId` into that call.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Philosophy Constraints
|
|
45
|
+
|
|
46
|
+
From `CLAUDE.md` and repo patterns:
|
|
47
|
+
|
|
48
|
+
- **Errors as data**: Return `{ outcome: 'error', notes: msg }` JSON, not thrown exceptions, for child failures.
|
|
49
|
+
- **Exhaustiveness**: Handle all 4 `WorkflowRunResult` variants without `as unknown` casts.
|
|
50
|
+
- **Immutability**: New `WorkflowTrigger` fields are `readonly`.
|
|
51
|
+
- **DI for boundaries**: `runWorkflowFn`, `ctx`, `apiKey`, `emitter` all injected at construction time.
|
|
52
|
+
- **YAGNI**: Phase 1 only. No `spawn_session + await_sessions`, no bare-prompt mode, no width guardrails.
|
|
53
|
+
- **Make illegal states unrepresentable**: `childSessionId` always present (pre-create guarantees it).
|
|
54
|
+
|
|
55
|
+
No philosophy conflicts between stated rules and repo patterns.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Impact Surface
|
|
60
|
+
|
|
61
|
+
| File | Change | Risk |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| `src/daemon/workflow-runner.ts` | Add `parentSessionId?`, `spawnDepth?` to `WorkflowTrigger`; add `makeSpawnAgentTool()`; inject in `runWorkflow()`; update `BASE_SYSTEM_PROMPT`; update `_preAllocatedStartResponse` JSDoc | Low -- additive |
|
|
64
|
+
| `src/v2/durable-core/schemas/session/events.ts` | Extend `session_created.data` with `parentSessionId?: z.string().optional()` | Low -- `z.object({})` uses strip mode |
|
|
65
|
+
| `src/mcp/handlers/v2-execution/start.ts` | Thread `parentSessionId` from `internalContext` into `session_created` event via `buildInitialEvents()` | Low -- internal API |
|
|
66
|
+
| `src/trigger/trigger-router.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
|
|
67
|
+
| `src/v2/usecases/console-routes.ts` | No change -- new `WorkflowTrigger` fields are optional | None |
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Candidates
|
|
72
|
+
|
|
73
|
+
### Candidate 1: Direct runWorkflow() call
|
|
74
|
+
|
|
75
|
+
**Summary**: `makeSpawnAgentTool()` calls `runWorkflow()` directly. No pre-creation. Session ID extracted from result after run.
|
|
76
|
+
|
|
77
|
+
**Tensions resolved**: YAGNI (fewest lines), blocking (natural await).
|
|
78
|
+
**Tensions accepted**: Crash-before-start has no observable `childSessionId`. `childSessionId` is absent on failure.
|
|
79
|
+
|
|
80
|
+
**Boundary**: `WorkflowTrigger` + direct `runWorkflow()` call.
|
|
81
|
+
**Why this boundary**: `WorkflowTrigger` is the natural seam -- carries all session config. No new types.
|
|
82
|
+
|
|
83
|
+
**Failure mode**: `runWorkflow()` crashes before AgentLoop starts -- `childSessionId` is null, parent gets `{ outcome: 'error', childSessionId: null }`.
|
|
84
|
+
|
|
85
|
+
**Repo-pattern relationship**: Follows factory pattern. No adaptation of `_preAllocatedStartResponse`.
|
|
86
|
+
|
|
87
|
+
**Gain**: ~10 fewer lines, maximum simplicity.
|
|
88
|
+
**Give up**: No deterministic `childSessionId` on startup failures. Less crash observability.
|
|
89
|
+
|
|
90
|
+
**Scope**: Best-fit.
|
|
91
|
+
**Philosophy fit**: Honors YAGNI strongest. Slight tension with 'make illegal states unrepresentable' (`childSessionId` can be null).
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Candidate 2: Pre-create session with _preAllocatedStartResponse (RECOMMENDED)
|
|
96
|
+
|
|
97
|
+
**Summary**: `execute()` calls `executeStartWorkflow()` with `parentSessionId` in `internalContext`, decodes `childSessionId` from the returned `continueToken`, then calls `runWorkflow()` with `_preAllocatedStartResponse`.
|
|
98
|
+
|
|
99
|
+
**Tensions resolved**: Deterministic `childSessionId`, crash-before-start observability, `childSessionId` seeds Phase 2, 'make illegal states unrepresentable'.
|
|
100
|
+
**Tensions accepted**: One extra async call (~10-50ms).
|
|
101
|
+
|
|
102
|
+
**Boundary**: `WorkflowTrigger._preAllocatedStartResponse` + `internalContext` injection.
|
|
103
|
+
**Why this boundary**: Direct adaptation of the proven `_preAllocatedStartResponse` pattern from `console-routes.ts`. Session store sees the child immediately -- correct observable behavior.
|
|
104
|
+
|
|
105
|
+
**Failure mode**: `executeStartWorkflow()` succeeds, `runWorkflow()` fails before AgentLoop -- zombie session in store. Accepted for Phase 1.
|
|
106
|
+
|
|
107
|
+
**Repo-pattern relationship**: Adapts proven `_preAllocatedStartResponse` pattern.
|
|
108
|
+
|
|
109
|
+
**Gain**: `childSessionId` always known before child runs. Deterministic. Child observable from moment of `execute()`.
|
|
110
|
+
**Give up**: One extra async call. Slightly more setup code.
|
|
111
|
+
|
|
112
|
+
**Scope**: Best-fit.
|
|
113
|
+
**Philosophy fit**: Honors determinism over cleverness, make illegal states unrepresentable, DI. No conflicts.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
### Candidate 3: Read depth from session store at execute() time
|
|
118
|
+
|
|
119
|
+
**Summary**: Instead of passing `currentDepth` as a constructor parameter, read `spawnDepth` from parent session store inside `execute()`.
|
|
120
|
+
|
|
121
|
+
**Tensions resolved**: Accurate depth for checkpoint-resumed sessions (theoretical edge case).
|
|
122
|
+
**Tensions accepted**: Async I/O in `execute()`, more error paths, session store dependency.
|
|
123
|
+
|
|
124
|
+
**Boundary**: Session store read inside `execute()`.
|
|
125
|
+
**Why this boundary is NOT best-fit**: Expensive, speculative. Checkpoint-resumed daemon sessions restart AgentLoop from scratch -- constructor parameter is always correctly set.
|
|
126
|
+
|
|
127
|
+
**Failure mode**: Store read fails -- fail-safe blocks spawn, adds error path complexity.
|
|
128
|
+
|
|
129
|
+
**Repo-pattern relationship**: Departs from constructor-injection pattern.
|
|
130
|
+
|
|
131
|
+
**Gain**: Accurate depth for resumed sessions. **Give up**: YAGNI violation, async I/O, extra error paths.
|
|
132
|
+
|
|
133
|
+
**Scope**: Too broad. **Philosophy fit**: Conflicts with YAGNI.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Comparison and Recommendation
|
|
138
|
+
|
|
139
|
+
### Comparison Matrix
|
|
140
|
+
|
|
141
|
+
| Tension | C1 | C2 | C3 |
|
|
142
|
+
|---|---|---|---|
|
|
143
|
+
| Blocking fidelity | Strong | Strong | Strong |
|
|
144
|
+
| Deterministic childSessionId | Weak | Strong | Weak |
|
|
145
|
+
| Semaphore bypass | Strong | Strong | Strong |
|
|
146
|
+
| YAGNI | Strong | Moderate | Weak |
|
|
147
|
+
| Crash observability | Weak | Strong | Weak |
|
|
148
|
+
| Depth accuracy | Adequate | Adequate | Strong (speculative) |
|
|
149
|
+
| Repo pattern | Follows | Adapts proven | Departs |
|
|
150
|
+
| Philosophy | Full | Full | Partial |
|
|
151
|
+
|
|
152
|
+
### Recommendation: Candidate 2
|
|
153
|
+
|
|
154
|
+
C2 is best-fit. The `_preAllocatedStartResponse` pattern is proven and stable (`console-routes.ts`).
|
|
155
|
+
The marginal complexity (one extra async call) is small relative to the gain: `childSessionId` is always
|
|
156
|
+
known, crash-before-start is observable, Phase 2 is seeded. C3 is rejected on YAGNI grounds.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Self-Critique
|
|
161
|
+
|
|
162
|
+
**Strongest counter-argument**: C2 adds a zombie session failure mode that C1 doesn't have. If `executeStartWorkflow()` succeeds but `runWorkflow()` fails immediately, a session exists in the store with no corresponding run. C1 avoids this -- no session is created until the run actually starts.
|
|
163
|
+
|
|
164
|
+
**C1 as narrower option**: Still satisfies acceptance criteria. Loses crash observability and deterministic `childSessionId`. Would win if we prioritized simplicity over observability.
|
|
165
|
+
|
|
166
|
+
**C3 as broader option**: Justified only if checkpoint-resumed spawned sessions become a real production use case. No evidence for Phase 1.
|
|
167
|
+
|
|
168
|
+
**Assumption that would invalidate C2**: If `_preAllocatedStartResponse` is removed in a future refactor. Mitigation: update its JSDoc (Orange finding O2) to list `spawn_agent` as a legitimate caller.
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Open Questions for the Main Agent
|
|
173
|
+
|
|
174
|
+
1. **maxSubagentDepth source**: Design doc says read from `WorkflowTrigger.agentConfig` (default 3). Should this also check global workspace config? Decision: use `trigger.agentConfig?.maxSubagentDepth ?? 3` for Phase 1. Document in tool description.
|
|
175
|
+
|
|
176
|
+
2. **`session_created.data` strictness**: Confirmed `z.object({})` uses strip mode. Extension is safe. Unverified by migration run -- low risk.
|
|
177
|
+
|
|
178
|
+
3. **Zombie session cleanup**: Deferred to Phase 2. Document as known edge case in tool description.
|