npm - @exaudeus/workrail - Versions diffs - 3.34.2 → 3.35.0 - Mend

@exaudeus/workrail 3.34.2 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/console-ui/assets/{index-DSRkHTz1.js → index-B10Bn8qC.js} +1 -1
package/dist/console-ui/index.html +1 -1
package/dist/daemon/workflow-runner.d.ts +1 -0
package/dist/daemon/workflow-runner.js +148 -10
package/dist/manifest.json +7 -7
package/docs/design/daemon-complete-step-tool-candidates.md +160 -0
package/docs/design/daemon-complete-step-tool-design-review.md +82 -0
package/docs/design/daemon-complete-step-tool-implementation-plan.md +166 -0
package/docs/ideas/backlog.md +224 -0
package/package.json +1 -1

package/dist/daemon/workflow-runner.js CHANGED Viewed

@@ -41,6 +41,7 @@ exports.readDaemonSessionState = readDaemonSessionState;
 exports.readAllDaemonSessions = readAllDaemonSessions;
 exports.runStartupRecovery = runStartupRecovery;
 exports.makeContinueWorkflowTool = makeContinueWorkflowTool;
+exports.makeCompleteStepTool = makeCompleteStepTool;
 exports.makeBashTool = makeBashTool;
 exports.makeReportIssueTool = makeReportIssueTool;
 exports.buildSessionRecap = buildSessionRecap;
@@ -324,6 +325,33 @@ function getSchemas() {
             },
             required: ['continueToken'],
         },
+        CompleteStepParams: {
+            type: 'object',
+            properties: {
+                notes: {
+                    type: 'string',
+                    minLength: 50,
+                    description: 'What you did in this step (required, at least 50 characters). Write for a human reader. ' +
+                        'Include: what you did and key decisions, what you produced (files, tests, numbers), ' +
+                        'anything notable (risks, open questions, things you chose NOT to do and why). ' +
+                        'Use markdown: headings, bullets, bold. 10-30 lines is ideal.',
+                },
+                artifacts: {
+                    type: 'array',
+                    items: {},
+                    description: 'Optional structured artifacts to attach to this step. ' +
+                        'Include wr.assessment objects here when the step requires an assessment gate. ' +
+                        'Example: [{ "kind": "wr.assessment", "assessmentId": "<id>", "dimensions": { "<dimensionId>": "high" } }]',
+                },
+                context: {
+                    type: 'object',
+                    additionalProperties: true,
+                    description: 'Updated context variables (only changed values). Omit entirely if no facts changed.',
+                },
+            },
+            required: ['notes'],
+            additionalProperties: false,
+        },
         BashParams: {
             type: 'object',
             properties: {
@@ -353,7 +381,8 @@ function getSchemas() {
 function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
     return {
         name: 'continue_workflow',
-        description: 'Advance the WorkRail workflow to the next step. Call this after completing all work ' +
+        description: '[DEPRECATED in daemon sessions -- use complete_step instead] ' +
+            'Advance the WorkRail workflow to the next step. Call this after completing all work ' +
             'required by the current step. Include your notes in notesMarkdown. ' +
             'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
         inputSchema: schemas['ContinueWorkflowParams'],
@@ -442,6 +471,109 @@ function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas
         },
     };
 }
+function makeCompleteStepTool(sessionId, ctx, getCurrentToken, onAdvance, onComplete, onTokenUpdate, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
+    return {
+        name: 'complete_step',
+        description: 'Mark the current WorkRail workflow step as complete and advance to the next one. ' +
+            'Call this after completing all work required by the current step. ' +
+            'Include your substantive notes (min 50 characters) describing what you did. ' +
+            'The daemon manages the session token internally -- you do not need a continueToken. ' +
+            'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
+        inputSchema: schemas['CompleteStepParams'],
+        label: 'Complete Step',
+        execute: async (_toolCallId, params) => {
+            console.log(`[WorkflowRunner] Tool: complete_step sessionId=${sessionId}`);
+            emitter?.emit({ kind: 'tool_called', sessionId, toolName: 'complete_step', summary: 'advance', ...withWorkrailSession(workrailSessionId) });
+            const notes = params.notes;
+            if (!notes || notes.length < 50) {
+                throw new Error(`complete_step: notes is required and must be at least 50 characters. ` +
+                    `Provide substantive notes describing what you did, what you produced, and any notable decisions. ` +
+                    `Current length: ${notes?.length ?? 0} characters.`);
+            }
+            const continueToken = getCurrentToken();
+            const result = await _executeContinueWorkflowFn({
+                continueToken,
+                intent: 'advance',
+                output: (notes || params.artifacts?.length)
+                    ? {
+                        notesMarkdown: notes,
+                        ...(params.artifacts?.length ? { artifacts: params.artifacts } : {}),
+                    }
+                    : undefined,
+                context: params.context,
+            }, ctx);
+            if (result.isErr()) {
+                throw new Error(`complete_step failed: ${result.error.kind} -- ${JSON.stringify(result.error)}`);
+            }
+            const out = result.value.response;
+            const newContinueToken = out.continueToken ?? '';
+            const checkpointToken = out.checkpointToken ?? null;
+            const persistToken = (out.kind === 'blocked' ? out.nextCall?.params.continueToken : undefined) ?? newContinueToken;
+            if (persistToken) {
+                await persistTokens(sessionId, persistToken, checkpointToken);
+            }
+            if (out.kind === 'blocked') {
+                const retryToken = out.nextCall?.params.continueToken ?? newContinueToken;
+                onTokenUpdate(retryToken);
+                const lines = ['## Step blocked -- action required\n'];
+                for (const blocker of out.blockers.blockers) {
+                    lines.push(blocker.message);
+                    if (blocker.suggestedFix) {
+                        lines.push(`\nWhat to do: ${blocker.suggestedFix}`);
+                    }
+                    lines.push('');
+                }
+                if (out.validation) {
+                    if (out.validation.issues.length > 0) {
+                        lines.push('**Issues:**');
+                        for (const issue of out.validation.issues)
+                            lines.push(`- ${issue}`);
+                        lines.push('');
+                    }
+                    if (out.validation.suggestions.length > 0) {
+                        lines.push('**Suggestions:**');
+                        for (const s of out.validation.suggestions)
+                            lines.push(`- ${s}`);
+                        lines.push('');
+                    }
+                }
+                if (out.assessmentFollowup) {
+                    lines.push(`**Follow-up required:** ${out.assessmentFollowup.title}`);
+                    lines.push(out.assessmentFollowup.guidance);
+                    lines.push('');
+                }
+                if (out.retryable) {
+                    lines.push(`Retry the same step: call complete_step again with corrected notes.`);
+                }
+                else {
+                    lines.push(`You cannot proceed without resolving this. Inform the user and wait for their response, then call complete_step.`);
+                }
+                const feedback = lines.join('\n');
+                return {
+                    content: [{ type: 'text', text: feedback }],
+                    details: out,
+                };
+            }
+            if (out.isComplete) {
+                onComplete(notes);
+                return {
+                    content: [{ type: 'text', text: JSON.stringify({ status: 'complete' }) }],
+                    details: out,
+                };
+            }
+            const pending = out.pending;
+            const nextStepTitle = pending?.title ?? 'Next step';
+            const stepText = pending
+                ? `${JSON.stringify({ status: 'advanced', nextStep: pending.title })}\n\n## ${pending.title}\n\n${pending.prompt}`
+                : JSON.stringify({ status: 'advanced', nextStep: nextStepTitle });
+            onAdvance(stepText, newContinueToken);
+            return {
+                content: [{ type: 'text', text: stepText }],
+                details: out,
+            };
+        },
+    };
+}
 function makeBashTool(workspacePath, schemas, sessionId, emitter, workrailSessionId) {
     return {
         name: 'Bash',
@@ -635,20 +767,21 @@ Bad pattern: "I'll analyze both layers." (no justification)
 Good pattern: "Question: Should I check the middleware? Answer: The workflow step says 'trace the full call chain', and the AGENTS.md says the entry point is in the middleware layer. Yes, start there."
 ## Your tools
-- \`continue_workflow\`: Advance to the next step. Call this after completing each step's work. Always include your notes in notesMarkdown and round-trip the continueToken exactly.
+- \`complete_step\`: Mark the current step complete and advance to the next one. Call this after completing ALL work required by the step. Include your notes (min 50 characters) in the notes field. The daemon manages the session token internally -- you do NOT need a continueToken. This is the preferred advancement tool for daemon sessions.
+- \`continue_workflow\`: [DEPRECATED -- use complete_step instead] Legacy advancement tool. Requires a continueToken that you must round-trip exactly. Only use this if complete_step is unavailable.
 - \`Bash\`: Run shell commands. Use for building, testing, running scripts.
 - \`Read\`: Read files.
 - \`Write\`: Write files.
-- \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND continue_workflow (unless fatal). Does not stop the session -- it creates a record for the auto-fix coordinator.
+- \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND complete_step (unless fatal). Does not stop the session -- it creates a record for the auto-fix coordinator.
 ## Execution contract
 1. Read the step carefully. Do ALL the work the step asks for.
-2. Call \`continue_workflow\` with your notes. Include the continueToken exactly.
+2. Call \`complete_step\` with your notes. No continueToken needed -- the daemon manages it.
 3. Repeat until the workflow reports it is complete.
-4. Do NOT skip steps. Do NOT call \`continue_workflow\` without completing the step's work.
+4. Do NOT skip steps. Do NOT call \`complete_step\` without completing the step's work.
 ## The workflow is the contract
-Every step must be fully completed before you call continue_workflow. The workflow step prompt is the specification of what 'done' means -- not a suggestion. Don't advance until the work is actually done.
+Every step must be fully completed before you call complete_step. The workflow step prompt is the specification of what 'done' means -- not a suggestion. Don't advance until the work is actually done.
 Your cognitive mode changes per step: some steps make you a researcher, others a reviewer, others an implementer. Adopt the mode the step describes. Don't bring your own agenda.
@@ -659,7 +792,10 @@ If something goes wrong: call report_issue, then continue unless severity is 'fa
 Don't narrate what you're about to do. Use the tool and report what you found. Token efficiency matters -- you have a wall-clock timeout.
 ## You don't have a user. You have a workflow and a soul.
-If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue.\
+If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue.
+## IMPORTANT: Never use continue_workflow in daemon sessions
+complete_step is your advancement tool. It does not require a continueToken. Do NOT call continue_workflow with a token you found in a previous message -- use complete_step instead.\
 `;
 function buildSessionRecap(notes) {
     if (notes.length === 0)
@@ -746,9 +882,10 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
     const STUCK_REPEAT_THRESHOLD = 3;
     const issueSummaries = [];
     const MAX_ISSUE_SUMMARIES = 10;
-    const onAdvance = (stepText, _continueToken) => {
+    const onAdvance = (stepText, continueToken) => {
         pendingSteerText = stepText;
         stepAdvanceCount++;
+        currentContinueToken = continueToken;
         if (workrailSessionId !== null)
             daemonRegistry?.heartbeat(workrailSessionId);
         emitter?.emit({ kind: 'step_advanced', sessionId, ...withWorkrailSession(workrailSessionId) });
@@ -775,6 +912,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
     }
     const startContinueToken = firstStep.continueToken ?? '';
     const startCheckpointToken = firstStep.checkpointToken ?? null;
+    let currentContinueToken = startContinueToken;
     if (startContinueToken) {
         const decoded = await (0, v2_token_ops_js_1.parseContinueTokenOrFail)(startContinueToken, ctx.v2.tokenCodecPorts, ctx.v2.tokenAliasStore);
         if (decoded.isOk()) {
@@ -799,6 +937,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
     }
     const schemas = getSchemas();
     const tools = [
+        makeCompleteStepTool(sessionId, ctx, () => currentContinueToken, onAdvance, onComplete, (t) => { currentContinueToken = t; }, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
         makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
         makeBashTool(trigger.workspacePath, schemas, sessionId, emitter, workrailSessionId),
         makeReadTool(schemas, sessionId, emitter, workrailSessionId),
@@ -819,9 +958,8 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
         ? `\n\nTrigger context:\n\`\`\`json\n${JSON.stringify(trigger.context, null, 2)}\n\`\`\``
         : '';
     const initialPrompt = (firstStep.pending?.prompt ?? 'No step content available') +
-        `\n\ncontinueToken: ${startContinueToken}` +
         contextJson +
-        '\n\nComplete all step work, then call continue_workflow with your notes to begin.';
+        '\n\nComplete all step work, then call complete_step with your notes to advance.';
     const agentCallbacks = {
         onLlmTurnStarted: ({ messageCount }) => {
             emitter?.emit({

package/dist/manifest.json CHANGED Viewed

@@ -445,12 +445,12 @@
       "sha256": "cf9d09641f1c31fffe6c7835b30bbbad52572befec1acab7fb9a0c188431af36",
       "bytes": 60355
     },
-    "console-ui/assets/index-DSRkHTz1.js": {
-      "sha256": "15d0103e401c97548eb0266e5305428db769ed6dffe9b501c5241d3fd6fc83c3",
+    "console-ui/assets/index-B10Bn8qC.js": {
+      "sha256": "7f622b872ba39f5973c38b8fc93e9a5b00074dcdab4540ffbc7a3a78a560e55a",
       "bytes": 754653
     },
     "console-ui/index.html": {
-      "sha256": "36a20b6ab5382dd54e1238c0c0f4ef3f2f95103cbcc2dd03228947ef8e201b1e",
+      "sha256": "c814df5d1fa998848e5c4ee61a1455c2a50bfcefb0af908d3f8fa02d311de32c",
       "bytes": 417
     },
     "console/standalone-console.d.ts": {
@@ -502,12 +502,12 @@
       "bytes": 1009
     },
     "daemon/workflow-runner.d.ts": {
-      "sha256": "7c2b4283551676702906aeceb553eb1c329c254679d95a2dd2dc980c484d55dc",
-      "bytes": 3669
+      "sha256": "598ca3cda5dba827d0eddf80baf4136b401d821c81ec83aacbee05a63b836d9a",
+      "bytes": 4103
     },
     "daemon/workflow-runner.js": {
-      "sha256": "e1caabaeeac274ac8d750fcca52177394da611e669e401ce2672d1f581cb3c54",
-      "bytes": 48224
+      "sha256": "a54677cdf2d2083fd9672b25b9d2264defa8dde7b357055bc14748d0ce9e7098",
+      "bytes": 56093
     },
     "di/container.d.ts": {
       "sha256": "003bb7fb7478d627524b9b1e76bd0a963a243794a687ff233b96dc0e33a06d9f",

package/docs/design/daemon-complete-step-tool-candidates.md ADDED Viewed

@@ -0,0 +1,160 @@
+# daemon complete_step tool -- Design Candidates
+## Problem Understanding
+### Core Tensions
+1. **Simplicity vs. completeness of the blocked-response path**: The naive solution handles the happy path (advance) but misses blocked responses. On a blocked response, `executeContinueWorkflow` returns a `retryContinueToken`. The LLM needs to retry `complete_step` with corrected notes, but what token should the tool inject? It must be the retry token, not the original session token -- so the closure variable MUST be updated to the retry token on blocked responses.
+2. **Crash safety vs. simplicity**: The existing `makeContinueWorkflowTool` calls `persistTokens()` inside `execute()` before calling `onAdvance()`. The `complete_step` tool must maintain this same invariant: persist the new token to disk before signaling the advance.
+3. **Backward compatibility vs. system prompt clarity**: `continue_workflow` must stay in the tools list (it's used by MCP sessions outside the daemon). But if `complete_step` exists alongside it, the system prompt must clearly tell the daemon agent to prefer `complete_step`.
+4. **Token update ownership**: `currentContinueToken` is written by two paths: (a) `onAdvance` after a successful advance, and (b) blocked retry in `makeCompleteStepTool.execute()`. Sequential tool execution (`toolExecution: 'sequential'` in AgentLoop) ensures no race condition, but the two write paths must be documented.
+### Likely Seam
+The seam is in `runWorkflow()`: the `onAdvance` callback signature `(stepText: string, continueToken: string) => void` already passes the new `continueToken` as the second parameter. `runWorkflow()` currently ignores it (because `continue_workflow` doesn't need it -- the LLM round-trips the token). For `complete_step`, we just need to use it.
+### What Makes This Hard
+- Two token update paths (advance + blocked retry) must both be correct
+- Token must be persisted before `onAdvance` fires (crash safety invariant)
+- Blocked response feedback must not include `continueToken` in the text (LLM doesn't need to see it)
+- System prompt needs to be updated to prefer `complete_step` without breaking existing `continue_workflow` callers
+---
+## Philosophy Constraints
+Sources: `CLAUDE.md` (workspace root), repo patterns in `src/daemon/workflow-runner.ts`
+**Active principles:**
+- **Immutability by default**: confine mutation to the minimal API -- `currentContinueToken` updated only via callbacks
+- **YAGNI with discipline**: avoid speculative abstractions
+- **Prefer fakes over mocks**: test with fake `executeContinueWorkflow` injection
+- **Document "why" not "what"**: add WHY comments explaining the daemon token injection
+- **Make illegal states unrepresentable**: `notes` required with `minLength: 50` at JSON Schema level
+No philosophy conflicts found between stated rules and repo patterns.
+---
+## Impact Surface
+- `src/daemon/workflow-runner.ts`: new factory function + updates to `runWorkflow()`, `getSchemas()`, `BASE_SYSTEM_PROMPT`
+- `tests/unit/workflow-runner-complete-step.test.ts`: new test file
+- `continue_workflow` tool: unchanged (backward compat)
+- Public MCP tools list: unchanged (`complete_step` is daemon-only, not exposed via MCP)
+- `V2ContinueWorkflowInputShape` / output schemas: unchanged
+---
+## Candidates
+### Candidate 1: Inline closure variable + two callback paths (RECOMMENDED)
+**Summary:** Add `let currentContinueToken = startContinueToken` to `runWorkflow()`; update it in the existing `onAdvance` closure (which already receives `continueToken` as a second param but ignores it); for blocked-retry token updates, add an `onTokenUpdate: (t: string) => void` callback parameter to `makeCompleteStepTool()`; `makeCompleteStepTool()` uses `getCurrentToken: () => string` to inject the token.
+**Tensions resolved:**
+- Crash safety: identical `persistTokens` path before callbacks
+- Blocked retry: `onTokenUpdate` callback updates `currentContinueToken` to retry token
+- Backward compat: additive only, both tools in list
+**Tensions accepted:**
+- Two token-update paths (onAdvance for advance, onTokenUpdate for blocked) could diverge if future developers update one but forget the other
+**Boundary solved at:** `runWorkflow()` closure + new factory function alongside existing one
+**Why this boundary is the best fit:** The seam already exists -- `onAdvance` receives the new `continueToken` but ignores it. Adding a single `onTokenUpdate` callback for blocked responses follows the same pattern used by `onIssueSummary` in `makeReportIssueTool`.
+**Failure mode:** Developer forgets to update `currentContinueToken` on blocked retry, causing second TOKEN_BAD_SIGNATURE on retry.
+**Repo-pattern relationship:** Follows exactly -- same closure pattern, same fake injection, same `persistTokens` placement.
+**Gains:**
+- Zero new abstractions
+- Easy to test with same fake injection pattern
+- Minimal change surface
+**Losses:**
+- Two write paths for `currentContinueToken` (minor)
+**Scope judgment:** Best-fit
+**Philosophy fit:** Honors YAGNI, immutability, fakes over mocks. No conflicts.
+---
+### Candidate 2: Token ref object `{ get(): string; set(t: string): void }`
+**Summary:** Introduce a typed `TokenRef` interface; instantiate once in `runWorkflow()` with `currentContinueToken` as internal state; pass the ref to both `makeCompleteStepTool` and `makeContinueWorkflowTool`; both tools call `ref.set()` on advance and blocked-retry.
+**Tensions resolved:**
+- Single token update path (only `ref.set()` is called in both tools)
+**Tensions accepted:**
+- Adds a new abstraction (`TokenRef`) not present elsewhere in the repo
+- Requires modifying `makeContinueWorkflowTool` signature (breaks backward compat of the function signature, though not behavior)
+**Boundary solved at:** New `TokenRef` interface shared across factory functions
+**Why this boundary is the best fit:** Only justified if both tools are converted. The spec says keep `continue_workflow` for backward compat -- converting it is out of scope.
+**Failure mode:** Over-engineering; `TokenRef` is unnecessary if `makeContinueWorkflowTool` isn't converted.
+**Repo-pattern relationship:** Departs from existing patterns (no mutable ref objects elsewhere).
+**Gains:** Single token update path, explicit ownership model
+**Losses:** New concept to understand, marginal benefit, violates YAGNI
+**Scope judgment:** Too broad -- would require modifying `makeContinueWorkflowTool` signature, which isn't needed
+**Philosophy fit:** Honors explicit domain types. Conflicts with YAGNI with discipline.
+---
+## Comparison and Recommendation
+| Criterion | Candidate 1 | Candidate 2 |
+|-----------|-------------|-------------|
+| Zero new abstractions | Yes | No |
+| Single token update path | No (two paths) | Yes |
+| Matches repo patterns | Yes | Departs |
+| Requires makeContinueWorkflowTool change | No | Yes |
+| YAGNI | Passes | Fails |
+| Test complexity | Same fake injection | Same |
+**Recommendation: Candidate 1.**
+The two write paths are manageable because:
+1. They are clearly distinct: `onAdvance` fires on successful advance, `onTokenUpdate` fires on blocked retry
+2. Both have WHY comments explaining their role
+3. `toolExecution: 'sequential'` makes races impossible
+Candidate 2 fails the YAGNI test. The single update path benefit only matters if `makeContinueWorkflowTool` is also converted -- which is explicitly out of scope.
+---
+## Self-Critique
+**Strongest counter-argument:** Candidate 2's `TokenRef` would prevent the two-path divergence risk. If a future developer adds a third code path that updates the token (e.g., a hypothetical `rehydrate_step` tool), Candidate 1 requires a third callback while Candidate 2 just needs another `ref.set()` call.
+**Narrower option:** Don't add `onTokenUpdate` at all; instead update `currentContinueToken` directly inside `makeCompleteStepTool.execute()` by passing a setter into the factory. This is functionally identical to `onTokenUpdate` -- just named differently.
+**Broader option threshold:** `TokenRef` would be justified when a second token-managing tool (e.g., `rehydrate_step`) is being added in the same PR.
+**Assumption that would invalidate:** If `toolExecution: 'sequential'` is ever changed to parallel, the two write paths could race. But sequential execution is a fundamental requirement for workflow step ordering and will not change.
+---
+## Open Questions for the Main Agent
+1. Should `complete_step` include `continueToken` in the success response text? (The spec says return `{ status: 'advanced', nextStep: string }` or `{ status: 'complete' }` -- so no token in the response text. This is correct and important.)
+2. Should the blocked-response feedback text for `complete_step` say "call complete_step again" instead of "call continue_workflow"? Yes -- the blocked feedback must be updated to reference `complete_step`.
+3. The spec says `notes: string` required min 50 chars. Should this be enforced at JSON Schema level (LLM sees validation error) or inside `execute()` (tool throws)? JSON Schema is preferred -- it gives the LLM a clear validation error before the tool even runs.
+4. Should `workspacePath` be removed from `complete_step` (since the daemon always knows it)? Yes -- daemon context is always available, no need to pass it through.

package/docs/design/daemon-complete-step-tool-design-review.md ADDED Viewed

@@ -0,0 +1,82 @@
+# daemon complete_step tool -- Design Review Findings
+## Tradeoff Review
+### T1: Two token write paths (onAdvance + onTokenUpdate)
+**Assessment:** Acceptable. The two paths are structurally mutually exclusive (`kind: 'ok'` vs `kind: 'blocked'` response branches). Sequential tool execution prevents races. The response kind enum is closed (`ok` | `blocked`). No future third path is anticipated.
+**Condition for re-evaluation:** If a third response kind is added that carries a new token.
+### T2: Blocked feedback references `complete_step`
+**Assessment:** Acceptable. The daemon's tool list will have `complete_step` as the primary tool. Blocked feedback pointing to it is correct.
+### T3: `notes` minLength enforced at JSON Schema + runtime
+**Finding:** JSON Schema `minLength` is informational to the LLM but NOT enforced by AgentLoop. Runtime check inside `execute()` is required.
+**Revision:** Add `if (!params.notes || params.notes.length < 50) throw new Error(...)` inside `execute()`.
+---
+## Failure Mode Review
+| Failure Mode | Covered? | Mitigation |
+|---|---|---|
+| FM1: Wrong token on blocked retry | Yes | `onTokenUpdate` called with retry token from blocked response |
+| FM2: Notes too short not caught | Fixed | Runtime length check in `execute()` |
+| FM3: Token in response text | Yes | Response text does not include token |
+| FM4: LLM calls `continue_workflow` with hallucinated token | Partial | System prompt marks it deprecated; accepted as transition risk |
+| FM5: Wrong intent used | Yes | `intent: 'advance'` hardcoded, not a parameter |
+---
+## Runner-Up / Simpler Alternative Review
+- `TokenRef` object: not worth pulling in -- requires `makeContinueWorkflowTool` signature change which is out of scope
+- Simpler variant (inline `setCurrentToken` vs `onTokenUpdate` callback): functionally identical, cosmetic only
+- No hybrid opportunity that reduces complexity
+**Verdict:** Selected design is the simplest that satisfies all criteria.
+---
+## Philosophy Alignment
+| Principle | Status |
+|---|---|
+| Immutability by default | Satisfied -- mutation confined to callbacks |
+| YAGNI with discipline | Satisfied -- zero new abstractions |
+| Prefer fakes over mocks | Satisfied -- fake injection pattern |
+| Validate at boundaries | Satisfied after fix -- runtime check added |
+| Make illegal states unrepresentable | Satisfied -- notes required, intent hardcoded |
+| Determinism | Acceptable tension -- sequential execution makes mutable state deterministic |
+---
+## Findings
+### Yellow: Two token write paths (manageable)
+The `currentContinueToken` closure variable is updated in two places: `onAdvance` (on successful advance) and `onTokenUpdate` (on blocked retry). These are mutually exclusive branches. Risk is low because sequential execution prevents races, but it should be documented with WHY comments.
+**Action:** Add WHY comment above each update explaining which response kind it handles.
+### Yellow: Runtime notes validation missing
+JSON Schema `minLength: 50` is informational only. Without a runtime check, a notes string of 10 chars would pass through to `executeContinueWorkflow`, potentially causing a downstream blocked response. Better to fail fast in the tool.
+**Action:** Add runtime check in `execute()` before calling `executeContinueWorkflow`.
+---
+## Recommended Revisions
+1. **Add runtime notes validation**: `if (!params.notes || params.notes.length < 50) throw new Error('complete_step: notes is required and must be at least 50 characters. Include what you did and what you produced.')`
+2. **Document two token write paths**: Add WHY comments above each `currentContinueToken` update explaining which execution branch it covers.
+3. **Blocked feedback text**: Replace `call continue_workflow` references in blocked feedback with `call complete_step again with corrected notes`.
+4. **System prompt update**: Add explicit guidance that `complete_step` is the preferred advancement tool and `continue_workflow` is deprecated in daemon sessions. Keep the `continue_workflow` description updated with `[DEPRECATED in daemon sessions]`.
+---
+## Residual Concerns
+- **FM4 (LLM using deprecated continue_workflow)**: Accepted. During transition, the LLM might call `continue_workflow` with a correctly round-tripped token (from the initial prompt or a previous response). This is functional but defeats the purpose. The system prompt must be strong enough to prevent this.
+- **`continueToken` in initial prompt**: The initial prompt currently includes `continueToken: ${startContinueToken}`. With `complete_step`, this is no longer needed since the daemon manages the token. The initial prompt should be updated to remove the token and use `call complete_step when done` instead. This is a UX improvement, not a correctness issue.