@exaudeus/workrail 3.34.2 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,7 @@ exports.readDaemonSessionState = readDaemonSessionState;
41
41
  exports.readAllDaemonSessions = readAllDaemonSessions;
42
42
  exports.runStartupRecovery = runStartupRecovery;
43
43
  exports.makeContinueWorkflowTool = makeContinueWorkflowTool;
44
+ exports.makeCompleteStepTool = makeCompleteStepTool;
44
45
  exports.makeBashTool = makeBashTool;
45
46
  exports.makeReportIssueTool = makeReportIssueTool;
46
47
  exports.buildSessionRecap = buildSessionRecap;
@@ -324,6 +325,33 @@ function getSchemas() {
324
325
  },
325
326
  required: ['continueToken'],
326
327
  },
328
+ CompleteStepParams: {
329
+ type: 'object',
330
+ properties: {
331
+ notes: {
332
+ type: 'string',
333
+ minLength: 50,
334
+ description: 'What you did in this step (required, at least 50 characters). Write for a human reader. ' +
335
+ 'Include: what you did and key decisions, what you produced (files, tests, numbers), ' +
336
+ 'anything notable (risks, open questions, things you chose NOT to do and why). ' +
337
+ 'Use markdown: headings, bullets, bold. 10-30 lines is ideal.',
338
+ },
339
+ artifacts: {
340
+ type: 'array',
341
+ items: {},
342
+ description: 'Optional structured artifacts to attach to this step. ' +
343
+ 'Include wr.assessment objects here when the step requires an assessment gate. ' +
344
+ 'Example: [{ "kind": "wr.assessment", "assessmentId": "<id>", "dimensions": { "<dimensionId>": "high" } }]',
345
+ },
346
+ context: {
347
+ type: 'object',
348
+ additionalProperties: true,
349
+ description: 'Updated context variables (only changed values). Omit entirely if no facts changed.',
350
+ },
351
+ },
352
+ required: ['notes'],
353
+ additionalProperties: false,
354
+ },
327
355
  BashParams: {
328
356
  type: 'object',
329
357
  properties: {
@@ -353,7 +381,8 @@ function getSchemas() {
353
381
  function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
354
382
  return {
355
383
  name: 'continue_workflow',
356
- description: 'Advance the WorkRail workflow to the next step. Call this after completing all work ' +
384
+ description: '[DEPRECATED in daemon sessions -- use complete_step instead] ' +
385
+ 'Advance the WorkRail workflow to the next step. Call this after completing all work ' +
357
386
  'required by the current step. Include your notes in notesMarkdown. ' +
358
387
  'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
359
388
  inputSchema: schemas['ContinueWorkflowParams'],
@@ -442,6 +471,109 @@ function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas
442
471
  },
443
472
  };
444
473
  }
474
+ function makeCompleteStepTool(sessionId, ctx, getCurrentToken, onAdvance, onComplete, onTokenUpdate, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
475
+ return {
476
+ name: 'complete_step',
477
+ description: 'Mark the current WorkRail workflow step as complete and advance to the next one. ' +
478
+ 'Call this after completing all work required by the current step. ' +
479
+ 'Include your substantive notes (min 50 characters) describing what you did. ' +
480
+ 'The daemon manages the session token internally -- you do not need a continueToken. ' +
481
+ 'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
482
+ inputSchema: schemas['CompleteStepParams'],
483
+ label: 'Complete Step',
484
+ execute: async (_toolCallId, params) => {
485
+ console.log(`[WorkflowRunner] Tool: complete_step sessionId=${sessionId}`);
486
+ emitter?.emit({ kind: 'tool_called', sessionId, toolName: 'complete_step', summary: 'advance', ...withWorkrailSession(workrailSessionId) });
487
+ const notes = params.notes;
488
+ if (!notes || notes.length < 50) {
489
+ throw new Error(`complete_step: notes is required and must be at least 50 characters. ` +
490
+ `Provide substantive notes describing what you did, what you produced, and any notable decisions. ` +
491
+ `Current length: ${notes?.length ?? 0} characters.`);
492
+ }
493
+ const continueToken = getCurrentToken();
494
+ const result = await _executeContinueWorkflowFn({
495
+ continueToken,
496
+ intent: 'advance',
497
+ output: (notes || params.artifacts?.length)
498
+ ? {
499
+ notesMarkdown: notes,
500
+ ...(params.artifacts?.length ? { artifacts: params.artifacts } : {}),
501
+ }
502
+ : undefined,
503
+ context: params.context,
504
+ }, ctx);
505
+ if (result.isErr()) {
506
+ throw new Error(`complete_step failed: ${result.error.kind} -- ${JSON.stringify(result.error)}`);
507
+ }
508
+ const out = result.value.response;
509
+ const newContinueToken = out.continueToken ?? '';
510
+ const checkpointToken = out.checkpointToken ?? null;
511
+ const persistToken = (out.kind === 'blocked' ? out.nextCall?.params.continueToken : undefined) ?? newContinueToken;
512
+ if (persistToken) {
513
+ await persistTokens(sessionId, persistToken, checkpointToken);
514
+ }
515
+ if (out.kind === 'blocked') {
516
+ const retryToken = out.nextCall?.params.continueToken ?? newContinueToken;
517
+ onTokenUpdate(retryToken);
518
+ const lines = ['## Step blocked -- action required\n'];
519
+ for (const blocker of out.blockers.blockers) {
520
+ lines.push(blocker.message);
521
+ if (blocker.suggestedFix) {
522
+ lines.push(`\nWhat to do: ${blocker.suggestedFix}`);
523
+ }
524
+ lines.push('');
525
+ }
526
+ if (out.validation) {
527
+ if (out.validation.issues.length > 0) {
528
+ lines.push('**Issues:**');
529
+ for (const issue of out.validation.issues)
530
+ lines.push(`- ${issue}`);
531
+ lines.push('');
532
+ }
533
+ if (out.validation.suggestions.length > 0) {
534
+ lines.push('**Suggestions:**');
535
+ for (const s of out.validation.suggestions)
536
+ lines.push(`- ${s}`);
537
+ lines.push('');
538
+ }
539
+ }
540
+ if (out.assessmentFollowup) {
541
+ lines.push(`**Follow-up required:** ${out.assessmentFollowup.title}`);
542
+ lines.push(out.assessmentFollowup.guidance);
543
+ lines.push('');
544
+ }
545
+ if (out.retryable) {
546
+ lines.push(`Retry the same step: call complete_step again with corrected notes.`);
547
+ }
548
+ else {
549
+ lines.push(`You cannot proceed without resolving this. Inform the user and wait for their response, then call complete_step.`);
550
+ }
551
+ const feedback = lines.join('\n');
552
+ return {
553
+ content: [{ type: 'text', text: feedback }],
554
+ details: out,
555
+ };
556
+ }
557
+ if (out.isComplete) {
558
+ onComplete(notes);
559
+ return {
560
+ content: [{ type: 'text', text: JSON.stringify({ status: 'complete' }) }],
561
+ details: out,
562
+ };
563
+ }
564
+ const pending = out.pending;
565
+ const nextStepTitle = pending?.title ?? 'Next step';
566
+ const stepText = pending
567
+ ? `${JSON.stringify({ status: 'advanced', nextStep: pending.title })}\n\n## ${pending.title}\n\n${pending.prompt}`
568
+ : JSON.stringify({ status: 'advanced', nextStep: nextStepTitle });
569
+ onAdvance(stepText, newContinueToken);
570
+ return {
571
+ content: [{ type: 'text', text: stepText }],
572
+ details: out,
573
+ };
574
+ },
575
+ };
576
+ }
445
577
  function makeBashTool(workspacePath, schemas, sessionId, emitter, workrailSessionId) {
446
578
  return {
447
579
  name: 'Bash',
@@ -635,20 +767,21 @@ Bad pattern: "I'll analyze both layers." (no justification)
635
767
  Good pattern: "Question: Should I check the middleware? Answer: The workflow step says 'trace the full call chain', and the AGENTS.md says the entry point is in the middleware layer. Yes, start there."
636
768
 
637
769
  ## Your tools
638
- - \`continue_workflow\`: Advance to the next step. Call this after completing each step's work. Always include your notes in notesMarkdown and round-trip the continueToken exactly.
770
+ - \`complete_step\`: Mark the current step complete and advance to the next one. Call this after completing ALL work required by the step. Include your notes (min 50 characters) in the notes field. The daemon manages the session token internally -- you do NOT need a continueToken. This is the preferred advancement tool for daemon sessions.
771
+ - \`continue_workflow\`: [DEPRECATED -- use complete_step instead] Legacy advancement tool. Requires a continueToken that you must round-trip exactly. Only use this if complete_step is unavailable.
639
772
  - \`Bash\`: Run shell commands. Use for building, testing, running scripts.
640
773
  - \`Read\`: Read files.
641
774
  - \`Write\`: Write files.
642
- - \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND continue_workflow (unless fatal). Does not stop the session -- it creates a record for the auto-fix coordinator.
775
+ - \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND complete_step (unless fatal). Does not stop the session -- it creates a record for the auto-fix coordinator.
643
776
 
644
777
  ## Execution contract
645
778
  1. Read the step carefully. Do ALL the work the step asks for.
646
- 2. Call \`continue_workflow\` with your notes. Include the continueToken exactly.
779
+ 2. Call \`complete_step\` with your notes. No continueToken needed -- the daemon manages it.
647
780
  3. Repeat until the workflow reports it is complete.
648
- 4. Do NOT skip steps. Do NOT call \`continue_workflow\` without completing the step's work.
781
+ 4. Do NOT skip steps. Do NOT call \`complete_step\` without completing the step's work.
649
782
 
650
783
  ## The workflow is the contract
651
- Every step must be fully completed before you call continue_workflow. The workflow step prompt is the specification of what 'done' means -- not a suggestion. Don't advance until the work is actually done.
784
+ Every step must be fully completed before you call complete_step. The workflow step prompt is the specification of what 'done' means -- not a suggestion. Don't advance until the work is actually done.
652
785
 
653
786
  Your cognitive mode changes per step: some steps make you a researcher, others a reviewer, others an implementer. Adopt the mode the step describes. Don't bring your own agenda.
654
787
 
@@ -659,7 +792,10 @@ If something goes wrong: call report_issue, then continue unless severity is 'fa
659
792
  Don't narrate what you're about to do. Use the tool and report what you found. Token efficiency matters -- you have a wall-clock timeout.
660
793
 
661
794
  ## You don't have a user. You have a workflow and a soul.
662
- If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue.\
795
+ If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue.
796
+
797
+ ## IMPORTANT: Never use continue_workflow in daemon sessions
798
+ complete_step is your advancement tool. It does not require a continueToken. Do NOT call continue_workflow with a token you found in a previous message -- use complete_step instead.\
663
799
  `;
664
800
  function buildSessionRecap(notes) {
665
801
  if (notes.length === 0)
@@ -746,9 +882,10 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
746
882
  const STUCK_REPEAT_THRESHOLD = 3;
747
883
  const issueSummaries = [];
748
884
  const MAX_ISSUE_SUMMARIES = 10;
749
- const onAdvance = (stepText, _continueToken) => {
885
+ const onAdvance = (stepText, continueToken) => {
750
886
  pendingSteerText = stepText;
751
887
  stepAdvanceCount++;
888
+ currentContinueToken = continueToken;
752
889
  if (workrailSessionId !== null)
753
890
  daemonRegistry?.heartbeat(workrailSessionId);
754
891
  emitter?.emit({ kind: 'step_advanced', sessionId, ...withWorkrailSession(workrailSessionId) });
@@ -775,6 +912,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
775
912
  }
776
913
  const startContinueToken = firstStep.continueToken ?? '';
777
914
  const startCheckpointToken = firstStep.checkpointToken ?? null;
915
+ let currentContinueToken = startContinueToken;
778
916
  if (startContinueToken) {
779
917
  const decoded = await (0, v2_token_ops_js_1.parseContinueTokenOrFail)(startContinueToken, ctx.v2.tokenCodecPorts, ctx.v2.tokenAliasStore);
780
918
  if (decoded.isOk()) {
@@ -799,6 +937,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
799
937
  }
800
938
  const schemas = getSchemas();
801
939
  const tools = [
940
+ makeCompleteStepTool(sessionId, ctx, () => currentContinueToken, onAdvance, onComplete, (t) => { currentContinueToken = t; }, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
802
941
  makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
803
942
  makeBashTool(trigger.workspacePath, schemas, sessionId, emitter, workrailSessionId),
804
943
  makeReadTool(schemas, sessionId, emitter, workrailSessionId),
@@ -819,9 +958,8 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
819
958
  ? `\n\nTrigger context:\n\`\`\`json\n${JSON.stringify(trigger.context, null, 2)}\n\`\`\``
820
959
  : '';
821
960
  const initialPrompt = (firstStep.pending?.prompt ?? 'No step content available') +
822
- `\n\ncontinueToken: ${startContinueToken}` +
823
961
  contextJson +
824
- '\n\nComplete all step work, then call continue_workflow with your notes to begin.';
962
+ '\n\nComplete all step work, then call complete_step with your notes to advance.';
825
963
  const agentCallbacks = {
826
964
  onLlmTurnStarted: ({ messageCount }) => {
827
965
  emitter?.emit({
@@ -445,12 +445,12 @@
445
445
  "sha256": "cf9d09641f1c31fffe6c7835b30bbbad52572befec1acab7fb9a0c188431af36",
446
446
  "bytes": 60355
447
447
  },
448
- "console-ui/assets/index-DSRkHTz1.js": {
449
- "sha256": "15d0103e401c97548eb0266e5305428db769ed6dffe9b501c5241d3fd6fc83c3",
448
+ "console-ui/assets/index-B10Bn8qC.js": {
449
+ "sha256": "7f622b872ba39f5973c38b8fc93e9a5b00074dcdab4540ffbc7a3a78a560e55a",
450
450
  "bytes": 754653
451
451
  },
452
452
  "console-ui/index.html": {
453
- "sha256": "36a20b6ab5382dd54e1238c0c0f4ef3f2f95103cbcc2dd03228947ef8e201b1e",
453
+ "sha256": "c814df5d1fa998848e5c4ee61a1455c2a50bfcefb0af908d3f8fa02d311de32c",
454
454
  "bytes": 417
455
455
  },
456
456
  "console/standalone-console.d.ts": {
@@ -502,12 +502,12 @@
502
502
  "bytes": 1009
503
503
  },
504
504
  "daemon/workflow-runner.d.ts": {
505
- "sha256": "7c2b4283551676702906aeceb553eb1c329c254679d95a2dd2dc980c484d55dc",
506
- "bytes": 3669
505
+ "sha256": "598ca3cda5dba827d0eddf80baf4136b401d821c81ec83aacbee05a63b836d9a",
506
+ "bytes": 4103
507
507
  },
508
508
  "daemon/workflow-runner.js": {
509
- "sha256": "e1caabaeeac274ac8d750fcca52177394da611e669e401ce2672d1f581cb3c54",
510
- "bytes": 48224
509
+ "sha256": "a54677cdf2d2083fd9672b25b9d2264defa8dde7b357055bc14748d0ce9e7098",
510
+ "bytes": 56093
511
511
  },
512
512
  "di/container.d.ts": {
513
513
  "sha256": "003bb7fb7478d627524b9b1e76bd0a963a243794a687ff233b96dc0e33a06d9f",
@@ -0,0 +1,160 @@
1
+ # daemon complete_step tool -- Design Candidates
2
+
3
+ ## Problem Understanding
4
+
5
+ ### Core Tensions
6
+
7
+ 1. **Simplicity vs. completeness of the blocked-response path**: The naive solution handles the happy path (advance) but misses blocked responses. On a blocked response, `executeContinueWorkflow` returns a `retryContinueToken`. The LLM needs to retry `complete_step` with corrected notes, but what token should the tool inject? It must be the retry token, not the original session token -- so the closure variable MUST be updated to the retry token on blocked responses.
8
+
9
+ 2. **Crash safety vs. simplicity**: The existing `makeContinueWorkflowTool` calls `persistTokens()` inside `execute()` before calling `onAdvance()`. The `complete_step` tool must maintain this same invariant: persist the new token to disk before signaling the advance.
10
+
11
+ 3. **Backward compatibility vs. system prompt clarity**: `continue_workflow` must stay in the tools list (it's used by MCP sessions outside the daemon). But if `complete_step` exists alongside it, the system prompt must clearly tell the daemon agent to prefer `complete_step`.
12
+
13
+ 4. **Token update ownership**: `currentContinueToken` is written by two paths: (a) `onAdvance` after a successful advance, and (b) blocked retry in `makeCompleteStepTool.execute()`. Sequential tool execution (`toolExecution: 'sequential'` in AgentLoop) ensures no race condition, but the two write paths must be documented.
14
+
15
+ ### Likely Seam
16
+
17
+ The seam is in `runWorkflow()`: the `onAdvance` callback signature `(stepText: string, continueToken: string) => void` already passes the new `continueToken` as the second parameter. `runWorkflow()` currently ignores it (because `continue_workflow` doesn't need it -- the LLM round-trips the token). For `complete_step`, we just need to use it.
18
+
19
+ ### What Makes This Hard
20
+
21
+ - Two token update paths (advance + blocked retry) must both be correct
22
+ - Token must be persisted before `onAdvance` fires (crash safety invariant)
23
+ - Blocked response feedback must not include `continueToken` in the text (LLM doesn't need to see it)
24
+ - System prompt needs to be updated to prefer `complete_step` without breaking existing `continue_workflow` callers
25
+
26
+ ---
27
+
28
+ ## Philosophy Constraints
29
+
30
+ Sources: `CLAUDE.md` (workspace root), repo patterns in `src/daemon/workflow-runner.ts`
31
+
32
+ **Active principles:**
33
+ - **Immutability by default**: confine mutation to the minimal API -- `currentContinueToken` updated only via callbacks
34
+ - **YAGNI with discipline**: avoid speculative abstractions
35
+ - **Prefer fakes over mocks**: test with fake `executeContinueWorkflow` injection
36
+ - **Document "why" not "what"**: add WHY comments explaining the daemon token injection
37
+ - **Make illegal states unrepresentable**: `notes` required with `minLength: 50` at JSON Schema level
38
+
39
+ No philosophy conflicts found between stated rules and repo patterns.
40
+
41
+ ---
42
+
43
+ ## Impact Surface
44
+
45
+ - `src/daemon/workflow-runner.ts`: new factory function + updates to `runWorkflow()`, `getSchemas()`, `BASE_SYSTEM_PROMPT`
46
+ - `tests/unit/workflow-runner-complete-step.test.ts`: new test file
47
+ - `continue_workflow` tool: unchanged (backward compat)
48
+ - Public MCP tools list: unchanged (`complete_step` is daemon-only, not exposed via MCP)
49
+ - `V2ContinueWorkflowInputShape` / output schemas: unchanged
50
+
51
+ ---
52
+
53
+ ## Candidates
54
+
55
+ ### Candidate 1: Inline closure variable + two callback paths (RECOMMENDED)
56
+
57
+ **Summary:** Add `let currentContinueToken = startContinueToken` to `runWorkflow()`; update it in the existing `onAdvance` closure (which already receives `continueToken` as a second param but ignores it); for blocked-retry token updates, add an `onTokenUpdate: (t: string) => void` callback parameter to `makeCompleteStepTool()`; `makeCompleteStepTool()` uses `getCurrentToken: () => string` to inject the token.
58
+
59
+ **Tensions resolved:**
60
+ - Crash safety: identical `persistTokens` path before callbacks
61
+ - Blocked retry: `onTokenUpdate` callback updates `currentContinueToken` to retry token
62
+ - Backward compat: additive only, both tools in list
63
+
64
+ **Tensions accepted:**
65
+ - Two token-update paths (onAdvance for advance, onTokenUpdate for blocked) could diverge if future developers update one but forget the other
66
+
67
+ **Boundary solved at:** `runWorkflow()` closure + new factory function alongside existing one
68
+
69
+ **Why this boundary is the best fit:** The seam already exists -- `onAdvance` receives the new `continueToken` but ignores it. Adding a single `onTokenUpdate` callback for blocked responses follows the same pattern used by `onIssueSummary` in `makeReportIssueTool`.
70
+
71
+ **Failure mode:** Developer forgets to update `currentContinueToken` on blocked retry, causing second TOKEN_BAD_SIGNATURE on retry.
72
+
73
+ **Repo-pattern relationship:** Follows exactly -- same closure pattern, same fake injection, same `persistTokens` placement.
74
+
75
+ **Gains:**
76
+ - Zero new abstractions
77
+ - Easy to test with same fake injection pattern
78
+ - Minimal change surface
79
+
80
+ **Losses:**
81
+ - Two write paths for `currentContinueToken` (minor)
82
+
83
+ **Scope judgment:** Best-fit
84
+
85
+ **Philosophy fit:** Honors YAGNI, immutability, fakes over mocks. No conflicts.
86
+
87
+ ---
88
+
89
+ ### Candidate 2: Token ref object `{ get(): string; set(t: string): void }`
90
+
91
+ **Summary:** Introduce a typed `TokenRef` interface; instantiate once in `runWorkflow()` with `currentContinueToken` as internal state; pass the ref to both `makeCompleteStepTool` and `makeContinueWorkflowTool`; both tools call `ref.set()` on advance and blocked-retry.
92
+
93
+ **Tensions resolved:**
94
+ - Single token update path (only `ref.set()` is called in both tools)
95
+
96
+ **Tensions accepted:**
97
+ - Adds a new abstraction (`TokenRef`) not present elsewhere in the repo
98
+ - Requires modifying `makeContinueWorkflowTool` signature (breaks backward compat of the function signature, though not behavior)
99
+
100
+ **Boundary solved at:** New `TokenRef` interface shared across factory functions
101
+
102
+ **Why this boundary is the best fit:** Only justified if both tools are converted. The spec says keep `continue_workflow` for backward compat -- converting it is out of scope.
103
+
104
+ **Failure mode:** Over-engineering; `TokenRef` is unnecessary if `makeContinueWorkflowTool` isn't converted.
105
+
106
+ **Repo-pattern relationship:** Departs from existing patterns (no mutable ref objects elsewhere).
107
+
108
+ **Gains:** Single token update path, explicit ownership model
109
+
110
+ **Losses:** New concept to understand, marginal benefit, violates YAGNI
111
+
112
+ **Scope judgment:** Too broad -- would require modifying `makeContinueWorkflowTool` signature, which isn't needed
113
+
114
+ **Philosophy fit:** Honors explicit domain types. Conflicts with YAGNI with discipline.
115
+
116
+ ---
117
+
118
+ ## Comparison and Recommendation
119
+
120
+ | Criterion | Candidate 1 | Candidate 2 |
121
+ |-----------|-------------|-------------|
122
+ | Zero new abstractions | Yes | No |
123
+ | Single token update path | No (two paths) | Yes |
124
+ | Matches repo patterns | Yes | Departs |
125
+ | Requires makeContinueWorkflowTool change | No | Yes |
126
+ | YAGNI | Passes | Fails |
127
+ | Test complexity | Same fake injection | Same |
128
+
129
+ **Recommendation: Candidate 1.**
130
+
131
+ The two write paths are manageable because:
132
+ 1. They are clearly distinct: `onAdvance` fires on successful advance, `onTokenUpdate` fires on blocked retry
133
+ 2. Both have WHY comments explaining their role
134
+ 3. `toolExecution: 'sequential'` makes races impossible
135
+
136
+ Candidate 2 fails the YAGNI test. The single update path benefit only matters if `makeContinueWorkflowTool` is also converted -- which is explicitly out of scope.
137
+
138
+ ---
139
+
140
+ ## Self-Critique
141
+
142
+ **Strongest counter-argument:** Candidate 2's `TokenRef` would prevent the two-path divergence risk. If a future developer adds a third code path that updates the token (e.g., a hypothetical `rehydrate_step` tool), Candidate 1 requires a third callback while Candidate 2 just needs another `ref.set()` call.
143
+
144
+ **Narrower option:** Don't add `onTokenUpdate` at all; instead update `currentContinueToken` directly inside `makeCompleteStepTool.execute()` by passing a setter into the factory. This is functionally identical to `onTokenUpdate` -- just named differently.
145
+
146
+ **Broader option threshold:** `TokenRef` would be justified when a second token-managing tool (e.g., `rehydrate_step`) is being added in the same PR.
147
+
148
+ **Assumption that would invalidate:** If `toolExecution: 'sequential'` is ever changed to parallel, the two write paths could race. But sequential execution is a fundamental requirement for workflow step ordering and will not change.
149
+
150
+ ---
151
+
152
+ ## Open Questions for the Main Agent
153
+
154
+ 1. Should `complete_step` include `continueToken` in the success response text? (The spec says return `{ status: 'advanced', nextStep: string }` or `{ status: 'complete' }` -- so no token in the response text. This is correct and important.)
155
+
156
+ 2. Should the blocked-response feedback text for `complete_step` say "call complete_step again" instead of "call continue_workflow"? Yes -- the blocked feedback must be updated to reference `complete_step`.
157
+
158
+ 3. The spec says `notes: string` required min 50 chars. Should this be enforced at JSON Schema level (LLM sees validation error) or inside `execute()` (tool throws)? JSON Schema is preferred -- it gives the LLM a clear validation error before the tool even runs.
159
+
160
+ 4. Should `workspacePath` be removed from `complete_step` (since the daemon always knows it)? Yes -- daemon context is always available, no need to pass it through.
@@ -0,0 +1,82 @@
1
+ # daemon complete_step tool -- Design Review Findings
2
+
3
+ ## Tradeoff Review
4
+
5
+ ### T1: Two token write paths (onAdvance + onTokenUpdate)
6
+ **Assessment:** Acceptable. The two paths are structurally mutually exclusive (`kind: 'ok'` vs `kind: 'blocked'` response branches). Sequential tool execution prevents races. The response kind enum is closed (`ok` | `blocked`). No future third path is anticipated.
7
+ **Condition for re-evaluation:** If a third response kind is added that carries a new token.
8
+
9
+ ### T2: Blocked feedback references `complete_step`
10
+ **Assessment:** Acceptable. The daemon's tool list will have `complete_step` as the primary tool. Blocked feedback pointing to it is correct.
11
+
12
+ ### T3: `notes` minLength enforced at JSON Schema + runtime
13
+ **Finding:** JSON Schema `minLength` is informational to the LLM but NOT enforced by AgentLoop. Runtime check inside `execute()` is required.
14
+ **Revision:** Add `if (!params.notes || params.notes.length < 50) throw new Error(...)` inside `execute()`.
15
+
16
+ ---
17
+
18
+ ## Failure Mode Review
19
+
20
+ | Failure Mode | Covered? | Mitigation |
21
+ |---|---|---|
22
+ | FM1: Wrong token on blocked retry | Yes | `onTokenUpdate` called with retry token from blocked response |
23
+ | FM2: Notes too short not caught | Fixed | Runtime length check in `execute()` |
24
+ | FM3: Token in response text | Yes | Response text does not include token |
25
+ | FM4: LLM calls `continue_workflow` with hallucinated token | Partial | System prompt marks it deprecated; accepted as transition risk |
26
+ | FM5: Wrong intent used | Yes | `intent: 'advance'` hardcoded, not a parameter |
27
+
28
+ ---
29
+
30
+ ## Runner-Up / Simpler Alternative Review
31
+
32
+ - `TokenRef` object: not worth pulling in -- requires `makeContinueWorkflowTool` signature change which is out of scope
33
+ - Simpler variant (inline `setCurrentToken` vs `onTokenUpdate` callback): functionally identical, cosmetic only
34
+ - No hybrid opportunity that reduces complexity
35
+
36
+ **Verdict:** Selected design is the simplest that satisfies all criteria.
37
+
38
+ ---
39
+
40
+ ## Philosophy Alignment
41
+
42
+ | Principle | Status |
43
+ |---|---|
44
+ | Immutability by default | Satisfied -- mutation confined to callbacks |
45
+ | YAGNI with discipline | Satisfied -- zero new abstractions |
46
+ | Prefer fakes over mocks | Satisfied -- fake injection pattern |
47
+ | Validate at boundaries | Satisfied after fix -- runtime check added |
48
+ | Make illegal states unrepresentable | Satisfied -- notes required, intent hardcoded |
49
+ | Determinism | Acceptable tension -- sequential execution makes mutable state deterministic |
50
+
51
+ ---
52
+
53
+ ## Findings
54
+
55
+ ### Yellow: Two token write paths (manageable)
56
+ The `currentContinueToken` closure variable is updated in two places: `onAdvance` (on successful advance) and `onTokenUpdate` (on blocked retry). These are mutually exclusive branches. Risk is low because sequential execution prevents races, but it should be documented with WHY comments.
57
+
58
+ **Action:** Add WHY comment above each update explaining which response kind it handles.
59
+
60
+ ### Yellow: Runtime notes validation missing
61
+ JSON Schema `minLength: 50` is informational only. Without a runtime check, a notes string of 10 chars would pass through to `executeContinueWorkflow`, potentially causing a downstream blocked response. Better to fail fast in the tool.
62
+
63
+ **Action:** Add runtime check in `execute()` before calling `executeContinueWorkflow`.
64
+
65
+ ---
66
+
67
+ ## Recommended Revisions
68
+
69
+ 1. **Add runtime notes validation**: `if (!params.notes || params.notes.length < 50) throw new Error('complete_step: notes is required and must be at least 50 characters. Include what you did and what you produced.')`
70
+
71
+ 2. **Document two token write paths**: Add WHY comments above each `currentContinueToken` update explaining which execution branch it covers.
72
+
73
+ 3. **Blocked feedback text**: Replace `call continue_workflow` references in blocked feedback with `call complete_step again with corrected notes`.
74
+
75
+ 4. **System prompt update**: Add explicit guidance that `complete_step` is the preferred advancement tool and `continue_workflow` is deprecated in daemon sessions. Keep the `continue_workflow` description updated with `[DEPRECATED in daemon sessions]`.
76
+
77
+ ---
78
+
79
+ ## Residual Concerns
80
+
81
+ - **FM4 (LLM using deprecated continue_workflow)**: Accepted. During transition, the LLM might call `continue_workflow` with a correctly round-tripped token (from the initial prompt or a previous response). This is functional but defeats the purpose. The system prompt must be strong enough to prevent this.
82
+ - **`continueToken` in initial prompt**: The initial prompt currently includes `continueToken: ${startContinueToken}`. With `complete_step`, this is no longer needed since the daemon manages the token. The initial prompt should be updated to remove the token and use `call complete_step when done` instead. This is a UX improvement, not a correctness issue.