@exaudeus/workrail 3.34.2 → 3.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/console-ui/assets/{index-DSRkHTz1.js → index-B10Bn8qC.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/daemon/workflow-runner.d.ts +1 -0
- package/dist/daemon/workflow-runner.js +148 -10
- package/dist/manifest.json +7 -7
- package/docs/design/daemon-complete-step-tool-candidates.md +160 -0
- package/docs/design/daemon-complete-step-tool-design-review.md +82 -0
- package/docs/design/daemon-complete-step-tool-implementation-plan.md +166 -0
- package/docs/ideas/backlog.md +224 -0
- package/package.json +1 -1
|
@@ -41,6 +41,7 @@ exports.readDaemonSessionState = readDaemonSessionState;
|
|
|
41
41
|
exports.readAllDaemonSessions = readAllDaemonSessions;
|
|
42
42
|
exports.runStartupRecovery = runStartupRecovery;
|
|
43
43
|
exports.makeContinueWorkflowTool = makeContinueWorkflowTool;
|
|
44
|
+
exports.makeCompleteStepTool = makeCompleteStepTool;
|
|
44
45
|
exports.makeBashTool = makeBashTool;
|
|
45
46
|
exports.makeReportIssueTool = makeReportIssueTool;
|
|
46
47
|
exports.buildSessionRecap = buildSessionRecap;
|
|
@@ -324,6 +325,33 @@ function getSchemas() {
|
|
|
324
325
|
},
|
|
325
326
|
required: ['continueToken'],
|
|
326
327
|
},
|
|
328
|
+
CompleteStepParams: {
|
|
329
|
+
type: 'object',
|
|
330
|
+
properties: {
|
|
331
|
+
notes: {
|
|
332
|
+
type: 'string',
|
|
333
|
+
minLength: 50,
|
|
334
|
+
description: 'What you did in this step (required, at least 50 characters). Write for a human reader. ' +
|
|
335
|
+
'Include: what you did and key decisions, what you produced (files, tests, numbers), ' +
|
|
336
|
+
'anything notable (risks, open questions, things you chose NOT to do and why). ' +
|
|
337
|
+
'Use markdown: headings, bullets, bold. 10-30 lines is ideal.',
|
|
338
|
+
},
|
|
339
|
+
artifacts: {
|
|
340
|
+
type: 'array',
|
|
341
|
+
items: {},
|
|
342
|
+
description: 'Optional structured artifacts to attach to this step. ' +
|
|
343
|
+
'Include wr.assessment objects here when the step requires an assessment gate. ' +
|
|
344
|
+
'Example: [{ "kind": "wr.assessment", "assessmentId": "<id>", "dimensions": { "<dimensionId>": "high" } }]',
|
|
345
|
+
},
|
|
346
|
+
context: {
|
|
347
|
+
type: 'object',
|
|
348
|
+
additionalProperties: true,
|
|
349
|
+
description: 'Updated context variables (only changed values). Omit entirely if no facts changed.',
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
required: ['notes'],
|
|
353
|
+
additionalProperties: false,
|
|
354
|
+
},
|
|
327
355
|
BashParams: {
|
|
328
356
|
type: 'object',
|
|
329
357
|
properties: {
|
|
@@ -353,7 +381,8 @@ function getSchemas() {
|
|
|
353
381
|
function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
|
|
354
382
|
return {
|
|
355
383
|
name: 'continue_workflow',
|
|
356
|
-
description: '
|
|
384
|
+
description: '[DEPRECATED in daemon sessions -- use complete_step instead] ' +
|
|
385
|
+
'Advance the WorkRail workflow to the next step. Call this after completing all work ' +
|
|
357
386
|
'required by the current step. Include your notes in notesMarkdown. ' +
|
|
358
387
|
'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
|
|
359
388
|
inputSchema: schemas['ContinueWorkflowParams'],
|
|
@@ -442,6 +471,109 @@ function makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas
|
|
|
442
471
|
},
|
|
443
472
|
};
|
|
444
473
|
}
|
|
474
|
+
function makeCompleteStepTool(sessionId, ctx, getCurrentToken, onAdvance, onComplete, onTokenUpdate, schemas, _executeContinueWorkflowFn = index_js_1.executeContinueWorkflow, emitter, workrailSessionId) {
|
|
475
|
+
return {
|
|
476
|
+
name: 'complete_step',
|
|
477
|
+
description: 'Mark the current WorkRail workflow step as complete and advance to the next one. ' +
|
|
478
|
+
'Call this after completing all work required by the current step. ' +
|
|
479
|
+
'Include your substantive notes (min 50 characters) describing what you did. ' +
|
|
480
|
+
'The daemon manages the session token internally -- you do not need a continueToken. ' +
|
|
481
|
+
'When the step requires an assessment gate, include wr.assessment objects in artifacts.',
|
|
482
|
+
inputSchema: schemas['CompleteStepParams'],
|
|
483
|
+
label: 'Complete Step',
|
|
484
|
+
execute: async (_toolCallId, params) => {
|
|
485
|
+
console.log(`[WorkflowRunner] Tool: complete_step sessionId=${sessionId}`);
|
|
486
|
+
emitter?.emit({ kind: 'tool_called', sessionId, toolName: 'complete_step', summary: 'advance', ...withWorkrailSession(workrailSessionId) });
|
|
487
|
+
const notes = params.notes;
|
|
488
|
+
if (!notes || notes.length < 50) {
|
|
489
|
+
throw new Error(`complete_step: notes is required and must be at least 50 characters. ` +
|
|
490
|
+
`Provide substantive notes describing what you did, what you produced, and any notable decisions. ` +
|
|
491
|
+
`Current length: ${notes?.length ?? 0} characters.`);
|
|
492
|
+
}
|
|
493
|
+
const continueToken = getCurrentToken();
|
|
494
|
+
const result = await _executeContinueWorkflowFn({
|
|
495
|
+
continueToken,
|
|
496
|
+
intent: 'advance',
|
|
497
|
+
output: (notes || params.artifacts?.length)
|
|
498
|
+
? {
|
|
499
|
+
notesMarkdown: notes,
|
|
500
|
+
...(params.artifacts?.length ? { artifacts: params.artifacts } : {}),
|
|
501
|
+
}
|
|
502
|
+
: undefined,
|
|
503
|
+
context: params.context,
|
|
504
|
+
}, ctx);
|
|
505
|
+
if (result.isErr()) {
|
|
506
|
+
throw new Error(`complete_step failed: ${result.error.kind} -- ${JSON.stringify(result.error)}`);
|
|
507
|
+
}
|
|
508
|
+
const out = result.value.response;
|
|
509
|
+
const newContinueToken = out.continueToken ?? '';
|
|
510
|
+
const checkpointToken = out.checkpointToken ?? null;
|
|
511
|
+
const persistToken = (out.kind === 'blocked' ? out.nextCall?.params.continueToken : undefined) ?? newContinueToken;
|
|
512
|
+
if (persistToken) {
|
|
513
|
+
await persistTokens(sessionId, persistToken, checkpointToken);
|
|
514
|
+
}
|
|
515
|
+
if (out.kind === 'blocked') {
|
|
516
|
+
const retryToken = out.nextCall?.params.continueToken ?? newContinueToken;
|
|
517
|
+
onTokenUpdate(retryToken);
|
|
518
|
+
const lines = ['## Step blocked -- action required\n'];
|
|
519
|
+
for (const blocker of out.blockers.blockers) {
|
|
520
|
+
lines.push(blocker.message);
|
|
521
|
+
if (blocker.suggestedFix) {
|
|
522
|
+
lines.push(`\nWhat to do: ${blocker.suggestedFix}`);
|
|
523
|
+
}
|
|
524
|
+
lines.push('');
|
|
525
|
+
}
|
|
526
|
+
if (out.validation) {
|
|
527
|
+
if (out.validation.issues.length > 0) {
|
|
528
|
+
lines.push('**Issues:**');
|
|
529
|
+
for (const issue of out.validation.issues)
|
|
530
|
+
lines.push(`- ${issue}`);
|
|
531
|
+
lines.push('');
|
|
532
|
+
}
|
|
533
|
+
if (out.validation.suggestions.length > 0) {
|
|
534
|
+
lines.push('**Suggestions:**');
|
|
535
|
+
for (const s of out.validation.suggestions)
|
|
536
|
+
lines.push(`- ${s}`);
|
|
537
|
+
lines.push('');
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
if (out.assessmentFollowup) {
|
|
541
|
+
lines.push(`**Follow-up required:** ${out.assessmentFollowup.title}`);
|
|
542
|
+
lines.push(out.assessmentFollowup.guidance);
|
|
543
|
+
lines.push('');
|
|
544
|
+
}
|
|
545
|
+
if (out.retryable) {
|
|
546
|
+
lines.push(`Retry the same step: call complete_step again with corrected notes.`);
|
|
547
|
+
}
|
|
548
|
+
else {
|
|
549
|
+
lines.push(`You cannot proceed without resolving this. Inform the user and wait for their response, then call complete_step.`);
|
|
550
|
+
}
|
|
551
|
+
const feedback = lines.join('\n');
|
|
552
|
+
return {
|
|
553
|
+
content: [{ type: 'text', text: feedback }],
|
|
554
|
+
details: out,
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
if (out.isComplete) {
|
|
558
|
+
onComplete(notes);
|
|
559
|
+
return {
|
|
560
|
+
content: [{ type: 'text', text: JSON.stringify({ status: 'complete' }) }],
|
|
561
|
+
details: out,
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
const pending = out.pending;
|
|
565
|
+
const nextStepTitle = pending?.title ?? 'Next step';
|
|
566
|
+
const stepText = pending
|
|
567
|
+
? `${JSON.stringify({ status: 'advanced', nextStep: pending.title })}\n\n## ${pending.title}\n\n${pending.prompt}`
|
|
568
|
+
: JSON.stringify({ status: 'advanced', nextStep: nextStepTitle });
|
|
569
|
+
onAdvance(stepText, newContinueToken);
|
|
570
|
+
return {
|
|
571
|
+
content: [{ type: 'text', text: stepText }],
|
|
572
|
+
details: out,
|
|
573
|
+
};
|
|
574
|
+
},
|
|
575
|
+
};
|
|
576
|
+
}
|
|
445
577
|
function makeBashTool(workspacePath, schemas, sessionId, emitter, workrailSessionId) {
|
|
446
578
|
return {
|
|
447
579
|
name: 'Bash',
|
|
@@ -635,20 +767,21 @@ Bad pattern: "I'll analyze both layers." (no justification)
|
|
|
635
767
|
Good pattern: "Question: Should I check the middleware? Answer: The workflow step says 'trace the full call chain', and the AGENTS.md says the entry point is in the middleware layer. Yes, start there."
|
|
636
768
|
|
|
637
769
|
## Your tools
|
|
638
|
-
- \`
|
|
770
|
+
- \`complete_step\`: Mark the current step complete and advance to the next one. Call this after completing ALL work required by the step. Include your notes (min 50 characters) in the notes field. The daemon manages the session token internally -- you do NOT need a continueToken. This is the preferred advancement tool for daemon sessions.
|
|
771
|
+
- \`continue_workflow\`: [DEPRECATED -- use complete_step instead] Legacy advancement tool. Requires a continueToken that you must round-trip exactly. Only use this if complete_step is unavailable.
|
|
639
772
|
- \`Bash\`: Run shell commands. Use for building, testing, running scripts.
|
|
640
773
|
- \`Read\`: Read files.
|
|
641
774
|
- \`Write\`: Write files.
|
|
642
|
-
- \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND
|
|
775
|
+
- \`report_issue\`: Record a structured issue, error, or unexpected behavior. Call this AND complete_step (unless fatal). Does not stop the session -- it creates a record for the auto-fix coordinator.
|
|
643
776
|
|
|
644
777
|
## Execution contract
|
|
645
778
|
1. Read the step carefully. Do ALL the work the step asks for.
|
|
646
|
-
2. Call \`
|
|
779
|
+
2. Call \`complete_step\` with your notes. No continueToken needed -- the daemon manages it.
|
|
647
780
|
3. Repeat until the workflow reports it is complete.
|
|
648
|
-
4. Do NOT skip steps. Do NOT call \`
|
|
781
|
+
4. Do NOT skip steps. Do NOT call \`complete_step\` without completing the step's work.
|
|
649
782
|
|
|
650
783
|
## The workflow is the contract
|
|
651
|
-
Every step must be fully completed before you call
|
|
784
|
+
Every step must be fully completed before you call complete_step. The workflow step prompt is the specification of what 'done' means -- not a suggestion. Don't advance until the work is actually done.
|
|
652
785
|
|
|
653
786
|
Your cognitive mode changes per step: some steps make you a researcher, others a reviewer, others an implementer. Adopt the mode the step describes. Don't bring your own agenda.
|
|
654
787
|
|
|
@@ -659,7 +792,10 @@ If something goes wrong: call report_issue, then continue unless severity is 'fa
|
|
|
659
792
|
Don't narrate what you're about to do. Use the tool and report what you found. Token efficiency matters -- you have a wall-clock timeout.
|
|
660
793
|
|
|
661
794
|
## You don't have a user. You have a workflow and a soul.
|
|
662
|
-
If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue
|
|
795
|
+
If you're unsure, consult the oracle above. If nothing answers the question, make a reasoned decision, call report_issue with kind='self_correction' to document it, and continue.
|
|
796
|
+
|
|
797
|
+
## IMPORTANT: Never use continue_workflow in daemon sessions
|
|
798
|
+
complete_step is your advancement tool. It does not require a continueToken. Do NOT call continue_workflow with a token you found in a previous message -- use complete_step instead.\
|
|
663
799
|
`;
|
|
664
800
|
function buildSessionRecap(notes) {
|
|
665
801
|
if (notes.length === 0)
|
|
@@ -746,9 +882,10 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
|
|
|
746
882
|
const STUCK_REPEAT_THRESHOLD = 3;
|
|
747
883
|
const issueSummaries = [];
|
|
748
884
|
const MAX_ISSUE_SUMMARIES = 10;
|
|
749
|
-
const onAdvance = (stepText,
|
|
885
|
+
const onAdvance = (stepText, continueToken) => {
|
|
750
886
|
pendingSteerText = stepText;
|
|
751
887
|
stepAdvanceCount++;
|
|
888
|
+
currentContinueToken = continueToken;
|
|
752
889
|
if (workrailSessionId !== null)
|
|
753
890
|
daemonRegistry?.heartbeat(workrailSessionId);
|
|
754
891
|
emitter?.emit({ kind: 'step_advanced', sessionId, ...withWorkrailSession(workrailSessionId) });
|
|
@@ -775,6 +912,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
|
|
|
775
912
|
}
|
|
776
913
|
const startContinueToken = firstStep.continueToken ?? '';
|
|
777
914
|
const startCheckpointToken = firstStep.checkpointToken ?? null;
|
|
915
|
+
let currentContinueToken = startContinueToken;
|
|
778
916
|
if (startContinueToken) {
|
|
779
917
|
const decoded = await (0, v2_token_ops_js_1.parseContinueTokenOrFail)(startContinueToken, ctx.v2.tokenCodecPorts, ctx.v2.tokenAliasStore);
|
|
780
918
|
if (decoded.isOk()) {
|
|
@@ -799,6 +937,7 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
|
|
|
799
937
|
}
|
|
800
938
|
const schemas = getSchemas();
|
|
801
939
|
const tools = [
|
|
940
|
+
makeCompleteStepTool(sessionId, ctx, () => currentContinueToken, onAdvance, onComplete, (t) => { currentContinueToken = t; }, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
|
|
802
941
|
makeContinueWorkflowTool(sessionId, ctx, onAdvance, onComplete, schemas, index_js_1.executeContinueWorkflow, emitter, workrailSessionId),
|
|
803
942
|
makeBashTool(trigger.workspacePath, schemas, sessionId, emitter, workrailSessionId),
|
|
804
943
|
makeReadTool(schemas, sessionId, emitter, workrailSessionId),
|
|
@@ -819,9 +958,8 @@ async function runWorkflow(trigger, ctx, apiKey, daemonRegistry, emitter) {
|
|
|
819
958
|
? `\n\nTrigger context:\n\`\`\`json\n${JSON.stringify(trigger.context, null, 2)}\n\`\`\``
|
|
820
959
|
: '';
|
|
821
960
|
const initialPrompt = (firstStep.pending?.prompt ?? 'No step content available') +
|
|
822
|
-
`\n\ncontinueToken: ${startContinueToken}` +
|
|
823
961
|
contextJson +
|
|
824
|
-
'\n\nComplete all step work, then call
|
|
962
|
+
'\n\nComplete all step work, then call complete_step with your notes to advance.';
|
|
825
963
|
const agentCallbacks = {
|
|
826
964
|
onLlmTurnStarted: ({ messageCount }) => {
|
|
827
965
|
emitter?.emit({
|
package/dist/manifest.json
CHANGED
|
@@ -445,12 +445,12 @@
|
|
|
445
445
|
"sha256": "cf9d09641f1c31fffe6c7835b30bbbad52572befec1acab7fb9a0c188431af36",
|
|
446
446
|
"bytes": 60355
|
|
447
447
|
},
|
|
448
|
-
"console-ui/assets/index-
|
|
449
|
-
"sha256": "
|
|
448
|
+
"console-ui/assets/index-B10Bn8qC.js": {
|
|
449
|
+
"sha256": "7f622b872ba39f5973c38b8fc93e9a5b00074dcdab4540ffbc7a3a78a560e55a",
|
|
450
450
|
"bytes": 754653
|
|
451
451
|
},
|
|
452
452
|
"console-ui/index.html": {
|
|
453
|
-
"sha256": "
|
|
453
|
+
"sha256": "c814df5d1fa998848e5c4ee61a1455c2a50bfcefb0af908d3f8fa02d311de32c",
|
|
454
454
|
"bytes": 417
|
|
455
455
|
},
|
|
456
456
|
"console/standalone-console.d.ts": {
|
|
@@ -502,12 +502,12 @@
|
|
|
502
502
|
"bytes": 1009
|
|
503
503
|
},
|
|
504
504
|
"daemon/workflow-runner.d.ts": {
|
|
505
|
-
"sha256": "
|
|
506
|
-
"bytes":
|
|
505
|
+
"sha256": "598ca3cda5dba827d0eddf80baf4136b401d821c81ec83aacbee05a63b836d9a",
|
|
506
|
+
"bytes": 4103
|
|
507
507
|
},
|
|
508
508
|
"daemon/workflow-runner.js": {
|
|
509
|
-
"sha256": "
|
|
510
|
-
"bytes":
|
|
509
|
+
"sha256": "a54677cdf2d2083fd9672b25b9d2264defa8dde7b357055bc14748d0ce9e7098",
|
|
510
|
+
"bytes": 56093
|
|
511
511
|
},
|
|
512
512
|
"di/container.d.ts": {
|
|
513
513
|
"sha256": "003bb7fb7478d627524b9b1e76bd0a963a243794a687ff233b96dc0e33a06d9f",
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# daemon complete_step tool -- Design Candidates
|
|
2
|
+
|
|
3
|
+
## Problem Understanding
|
|
4
|
+
|
|
5
|
+
### Core Tensions
|
|
6
|
+
|
|
7
|
+
1. **Simplicity vs. completeness of the blocked-response path**: The naive solution handles the happy path (advance) but misses blocked responses. On a blocked response, `executeContinueWorkflow` returns a `retryContinueToken`. The LLM needs to retry `complete_step` with corrected notes, but what token should the tool inject? It must be the retry token, not the original session token -- so the closure variable MUST be updated to the retry token on blocked responses.
|
|
8
|
+
|
|
9
|
+
2. **Crash safety vs. simplicity**: The existing `makeContinueWorkflowTool` calls `persistTokens()` inside `execute()` before calling `onAdvance()`. The `complete_step` tool must maintain this same invariant: persist the new token to disk before signaling the advance.
|
|
10
|
+
|
|
11
|
+
3. **Backward compatibility vs. system prompt clarity**: `continue_workflow` must stay in the tools list (it's used by MCP sessions outside the daemon). But if `complete_step` exists alongside it, the system prompt must clearly tell the daemon agent to prefer `complete_step`.
|
|
12
|
+
|
|
13
|
+
4. **Token update ownership**: `currentContinueToken` is written by two paths: (a) `onAdvance` after a successful advance, and (b) blocked retry in `makeCompleteStepTool.execute()`. Sequential tool execution (`toolExecution: 'sequential'` in AgentLoop) ensures no race condition, but the two write paths must be documented.
|
|
14
|
+
|
|
15
|
+
### Likely Seam
|
|
16
|
+
|
|
17
|
+
The seam is in `runWorkflow()`: the `onAdvance` callback signature `(stepText: string, continueToken: string) => void` already passes the new `continueToken` as the second parameter. `runWorkflow()` currently ignores it (because `continue_workflow` doesn't need it -- the LLM round-trips the token). For `complete_step`, we just need to use it.
|
|
18
|
+
|
|
19
|
+
### What Makes This Hard
|
|
20
|
+
|
|
21
|
+
- Two token update paths (advance + blocked retry) must both be correct
|
|
22
|
+
- Token must be persisted before `onAdvance` fires (crash safety invariant)
|
|
23
|
+
- Blocked response feedback must not include `continueToken` in the text (LLM doesn't need to see it)
|
|
24
|
+
- System prompt needs to be updated to prefer `complete_step` without breaking existing `continue_workflow` callers
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Philosophy Constraints
|
|
29
|
+
|
|
30
|
+
Sources: `CLAUDE.md` (workspace root), repo patterns in `src/daemon/workflow-runner.ts`
|
|
31
|
+
|
|
32
|
+
**Active principles:**
|
|
33
|
+
- **Immutability by default**: confine mutation to the minimal API -- `currentContinueToken` updated only via callbacks
|
|
34
|
+
- **YAGNI with discipline**: avoid speculative abstractions
|
|
35
|
+
- **Prefer fakes over mocks**: test with fake `executeContinueWorkflow` injection
|
|
36
|
+
- **Document "why" not "what"**: add WHY comments explaining the daemon token injection
|
|
37
|
+
- **Make illegal states unrepresentable**: `notes` required with `minLength: 50` at JSON Schema level
|
|
38
|
+
|
|
39
|
+
No philosophy conflicts found between stated rules and repo patterns.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Impact Surface
|
|
44
|
+
|
|
45
|
+
- `src/daemon/workflow-runner.ts`: new factory function + updates to `runWorkflow()`, `getSchemas()`, `BASE_SYSTEM_PROMPT`
|
|
46
|
+
- `tests/unit/workflow-runner-complete-step.test.ts`: new test file
|
|
47
|
+
- `continue_workflow` tool: unchanged (backward compat)
|
|
48
|
+
- Public MCP tools list: unchanged (`complete_step` is daemon-only, not exposed via MCP)
|
|
49
|
+
- `V2ContinueWorkflowInputShape` / output schemas: unchanged
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Candidates
|
|
54
|
+
|
|
55
|
+
### Candidate 1: Inline closure variable + two callback paths (RECOMMENDED)
|
|
56
|
+
|
|
57
|
+
**Summary:** Add `let currentContinueToken = startContinueToken` to `runWorkflow()`; update it in the existing `onAdvance` closure (which already receives `continueToken` as a second param but ignores it); for blocked-retry token updates, add an `onTokenUpdate: (t: string) => void` callback parameter to `makeCompleteStepTool()`; `makeCompleteStepTool()` uses `getCurrentToken: () => string` to inject the token.
|
|
58
|
+
|
|
59
|
+
**Tensions resolved:**
|
|
60
|
+
- Crash safety: identical `persistTokens` path before callbacks
|
|
61
|
+
- Blocked retry: `onTokenUpdate` callback updates `currentContinueToken` to retry token
|
|
62
|
+
- Backward compat: additive only, both tools in list
|
|
63
|
+
|
|
64
|
+
**Tensions accepted:**
|
|
65
|
+
- Two token-update paths (onAdvance for advance, onTokenUpdate for blocked) could diverge if future developers update one but forget the other
|
|
66
|
+
|
|
67
|
+
**Boundary solved at:** `runWorkflow()` closure + new factory function alongside existing one
|
|
68
|
+
|
|
69
|
+
**Why this boundary is the best fit:** The seam already exists -- `onAdvance` receives the new `continueToken` but ignores it. Adding a single `onTokenUpdate` callback for blocked responses follows the same pattern used by `onIssueSummary` in `makeReportIssueTool`.
|
|
70
|
+
|
|
71
|
+
**Failure mode:** Developer forgets to update `currentContinueToken` on blocked retry, causing second TOKEN_BAD_SIGNATURE on retry.
|
|
72
|
+
|
|
73
|
+
**Repo-pattern relationship:** Follows exactly -- same closure pattern, same fake injection, same `persistTokens` placement.
|
|
74
|
+
|
|
75
|
+
**Gains:**
|
|
76
|
+
- Zero new abstractions
|
|
77
|
+
- Easy to test with same fake injection pattern
|
|
78
|
+
- Minimal change surface
|
|
79
|
+
|
|
80
|
+
**Losses:**
|
|
81
|
+
- Two write paths for `currentContinueToken` (minor)
|
|
82
|
+
|
|
83
|
+
**Scope judgment:** Best-fit
|
|
84
|
+
|
|
85
|
+
**Philosophy fit:** Honors YAGNI, immutability, fakes over mocks. No conflicts.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
### Candidate 2: Token ref object `{ get(): string; set(t: string): void }`
|
|
90
|
+
|
|
91
|
+
**Summary:** Introduce a typed `TokenRef` interface; instantiate once in `runWorkflow()` with `currentContinueToken` as internal state; pass the ref to both `makeCompleteStepTool` and `makeContinueWorkflowTool`; both tools call `ref.set()` on advance and blocked-retry.
|
|
92
|
+
|
|
93
|
+
**Tensions resolved:**
|
|
94
|
+
- Single token update path (only `ref.set()` is called in both tools)
|
|
95
|
+
|
|
96
|
+
**Tensions accepted:**
|
|
97
|
+
- Adds a new abstraction (`TokenRef`) not present elsewhere in the repo
|
|
98
|
+
- Requires modifying `makeContinueWorkflowTool` signature (breaks backward compat of the function signature, though not behavior)
|
|
99
|
+
|
|
100
|
+
**Boundary solved at:** New `TokenRef` interface shared across factory functions
|
|
101
|
+
|
|
102
|
+
**Why this boundary is the best fit:** Only justified if both tools are converted. The spec says keep `continue_workflow` for backward compat -- converting it is out of scope.
|
|
103
|
+
|
|
104
|
+
**Failure mode:** Over-engineering; `TokenRef` is unnecessary if `makeContinueWorkflowTool` isn't converted.
|
|
105
|
+
|
|
106
|
+
**Repo-pattern relationship:** Departs from existing patterns (no mutable ref objects elsewhere).
|
|
107
|
+
|
|
108
|
+
**Gains:** Single token update path, explicit ownership model
|
|
109
|
+
|
|
110
|
+
**Losses:** New concept to understand, marginal benefit, violates YAGNI
|
|
111
|
+
|
|
112
|
+
**Scope judgment:** Too broad -- would require modifying `makeContinueWorkflowTool` signature, which isn't needed
|
|
113
|
+
|
|
114
|
+
**Philosophy fit:** Honors explicit domain types. Conflicts with YAGNI with discipline.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Comparison and Recommendation
|
|
119
|
+
|
|
120
|
+
| Criterion | Candidate 1 | Candidate 2 |
|
|
121
|
+
|-----------|-------------|-------------|
|
|
122
|
+
| Zero new abstractions | Yes | No |
|
|
123
|
+
| Single token update path | No (two paths) | Yes |
|
|
124
|
+
| Matches repo patterns | Yes | Departs |
|
|
125
|
+
| Requires makeContinueWorkflowTool change | No | Yes |
|
|
126
|
+
| YAGNI | Passes | Fails |
|
|
127
|
+
| Test complexity | Same fake injection | Same |
|
|
128
|
+
|
|
129
|
+
**Recommendation: Candidate 1.**
|
|
130
|
+
|
|
131
|
+
The two write paths are manageable because:
|
|
132
|
+
1. They are clearly distinct: `onAdvance` fires on successful advance, `onTokenUpdate` fires on blocked retry
|
|
133
|
+
2. Both have WHY comments explaining their role
|
|
134
|
+
3. `toolExecution: 'sequential'` makes races impossible
|
|
135
|
+
|
|
136
|
+
Candidate 2 fails the YAGNI test. The single update path benefit only matters if `makeContinueWorkflowTool` is also converted -- which is explicitly out of scope.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Self-Critique
|
|
141
|
+
|
|
142
|
+
**Strongest counter-argument:** Candidate 2's `TokenRef` would prevent the two-path divergence risk. If a future developer adds a third code path that updates the token (e.g., a hypothetical `rehydrate_step` tool), Candidate 1 requires a third callback while Candidate 2 just needs another `ref.set()` call.
|
|
143
|
+
|
|
144
|
+
**Narrower option:** Don't add `onTokenUpdate` at all; instead update `currentContinueToken` directly inside `makeCompleteStepTool.execute()` by passing a setter into the factory. This is functionally identical to `onTokenUpdate` -- just named differently.
|
|
145
|
+
|
|
146
|
+
**Broader option threshold:** `TokenRef` would be justified when a second token-managing tool (e.g., `rehydrate_step`) is being added in the same PR.
|
|
147
|
+
|
|
148
|
+
**Assumption that would invalidate:** If `toolExecution: 'sequential'` is ever changed to parallel, the two write paths could race. But sequential execution is a fundamental requirement for workflow step ordering and will not change.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Open Questions for the Main Agent
|
|
153
|
+
|
|
154
|
+
1. Should `complete_step` include `continueToken` in the success response text? (The spec says return `{ status: 'advanced', nextStep: string }` or `{ status: 'complete' }` -- so no token in the response text. This is correct and important.)
|
|
155
|
+
|
|
156
|
+
2. Should the blocked-response feedback text for `complete_step` say "call complete_step again" instead of "call continue_workflow"? Yes -- the blocked feedback must be updated to reference `complete_step`.
|
|
157
|
+
|
|
158
|
+
3. The spec says `notes: string` required min 50 chars. Should this be enforced at JSON Schema level (LLM sees validation error) or inside `execute()` (tool throws)? JSON Schema is preferred -- it gives the LLM a clear validation error before the tool even runs.
|
|
159
|
+
|
|
160
|
+
4. Should `workspacePath` be removed from `complete_step` (since the daemon always knows it)? Yes -- daemon context is always available, no need to pass it through.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# daemon complete_step tool -- Design Review Findings
|
|
2
|
+
|
|
3
|
+
## Tradeoff Review
|
|
4
|
+
|
|
5
|
+
### T1: Two token write paths (onAdvance + onTokenUpdate)
|
|
6
|
+
**Assessment:** Acceptable. The two paths are structurally mutually exclusive (`kind: 'ok'` vs `kind: 'blocked'` response branches). Sequential tool execution prevents races. The response kind enum is closed (`ok` | `blocked`). No future third path is anticipated.
|
|
7
|
+
**Condition for re-evaluation:** If a third response kind is added that carries a new token.
|
|
8
|
+
|
|
9
|
+
### T2: Blocked feedback references `complete_step`
|
|
10
|
+
**Assessment:** Acceptable. The daemon's tool list will have `complete_step` as the primary tool. Blocked feedback pointing to it is correct.
|
|
11
|
+
|
|
12
|
+
### T3: `notes` minLength enforced at JSON Schema + runtime
|
|
13
|
+
**Finding:** JSON Schema `minLength` is informational to the LLM but NOT enforced by AgentLoop. Runtime check inside `execute()` is required.
|
|
14
|
+
**Revision:** Add `if (!params.notes || params.notes.length < 50) throw new Error(...)` inside `execute()`.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Failure Mode Review
|
|
19
|
+
|
|
20
|
+
| Failure Mode | Covered? | Mitigation |
|
|
21
|
+
|---|---|---|
|
|
22
|
+
| FM1: Wrong token on blocked retry | Yes | `onTokenUpdate` called with retry token from blocked response |
|
|
23
|
+
| FM2: Notes too short not caught | Fixed | Runtime length check in `execute()` |
|
|
24
|
+
| FM3: Token in response text | Yes | Response text does not include token |
|
|
25
|
+
| FM4: LLM calls `continue_workflow` with hallucinated token | Partial | System prompt marks it deprecated; accepted as transition risk |
|
|
26
|
+
| FM5: Wrong intent used | Yes | `intent: 'advance'` hardcoded, not a parameter |
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Runner-Up / Simpler Alternative Review
|
|
31
|
+
|
|
32
|
+
- `TokenRef` object: not worth pulling in -- requires `makeContinueWorkflowTool` signature change which is out of scope
|
|
33
|
+
- Simpler variant (inline `setCurrentToken` vs `onTokenUpdate` callback): functionally identical, cosmetic only
|
|
34
|
+
- No hybrid opportunity that reduces complexity
|
|
35
|
+
|
|
36
|
+
**Verdict:** Selected design is the simplest that satisfies all criteria.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Philosophy Alignment
|
|
41
|
+
|
|
42
|
+
| Principle | Status |
|
|
43
|
+
|---|---|
|
|
44
|
+
| Immutability by default | Satisfied -- mutation confined to callbacks |
|
|
45
|
+
| YAGNI with discipline | Satisfied -- zero new abstractions |
|
|
46
|
+
| Prefer fakes over mocks | Satisfied -- fake injection pattern |
|
|
47
|
+
| Validate at boundaries | Satisfied after fix -- runtime check added |
|
|
48
|
+
| Make illegal states unrepresentable | Satisfied -- notes required, intent hardcoded |
|
|
49
|
+
| Determinism | Acceptable tension -- sequential execution makes mutable state deterministic |
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Findings
|
|
54
|
+
|
|
55
|
+
### Yellow: Two token write paths (manageable)
|
|
56
|
+
The `currentContinueToken` closure variable is updated in two places: `onAdvance` (on successful advance) and `onTokenUpdate` (on blocked retry). These are mutually exclusive branches. Risk is low because sequential execution prevents races, but it should be documented with WHY comments.
|
|
57
|
+
|
|
58
|
+
**Action:** Add WHY comment above each update explaining which response kind it handles.
|
|
59
|
+
|
|
60
|
+
### Yellow: Runtime notes validation missing
|
|
61
|
+
JSON Schema `minLength: 50` is informational only. Without a runtime check, a notes string of 10 chars would pass through to `executeContinueWorkflow`, potentially causing a downstream blocked response. Better to fail fast in the tool.
|
|
62
|
+
|
|
63
|
+
**Action:** Add runtime check in `execute()` before calling `executeContinueWorkflow`.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Recommended Revisions
|
|
68
|
+
|
|
69
|
+
1. **Add runtime notes validation**: `if (!params.notes || params.notes.length < 50) throw new Error('complete_step: notes is required and must be at least 50 characters. Include what you did and what you produced.')`
|
|
70
|
+
|
|
71
|
+
2. **Document two token write paths**: Add WHY comments above each `currentContinueToken` update explaining which execution branch it covers.
|
|
72
|
+
|
|
73
|
+
3. **Blocked feedback text**: Replace `call continue_workflow` references in blocked feedback with `call complete_step again with corrected notes`.
|
|
74
|
+
|
|
75
|
+
4. **System prompt update**: Add explicit guidance that `complete_step` is the preferred advancement tool and `continue_workflow` is deprecated in daemon sessions. Keep the `continue_workflow` description updated with `[DEPRECATED in daemon sessions]`.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Residual Concerns
|
|
80
|
+
|
|
81
|
+
- **FM4 (LLM using deprecated continue_workflow)**: Accepted. During transition, the LLM might call `continue_workflow` with a correctly round-tripped token (from the initial prompt or a previous response). This is functional but defeats the purpose. The system prompt must be strong enough to prevent this.
|
|
82
|
+
- **`continueToken` in initial prompt**: The initial prompt currently includes `continueToken: ${startContinueToken}`. With `complete_step`, this is no longer needed since the daemon manages the token. The initial prompt should be updated to remove the token and use `call complete_step when done` instead. This is a UX improvement, not a correctness issue.
|