npm - @exaudeus/workrail - Versions diffs - 3.41.0 → 3.43.0 - Mend

@exaudeus/workrail 3.41.0 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/dist/cli-worktrain.js +40 -11
package/dist/console-ui/assets/{index-CQt4UhPB.js → index-Sb57DW4B.js} +1 -1
package/dist/console-ui/index.html +1 -1
package/dist/context-assembly/deps.d.ts +8 -0
package/dist/context-assembly/deps.js +2 -0
package/dist/context-assembly/index.d.ts +6 -0
package/dist/context-assembly/index.js +50 -0
package/dist/context-assembly/infra.d.ts +3 -0
package/dist/context-assembly/infra.js +154 -0
package/dist/context-assembly/types.d.ts +30 -0
package/dist/context-assembly/types.js +2 -0
package/dist/coordinators/pr-review.d.ts +3 -1
package/dist/coordinators/pr-review.js +25 -4
package/dist/daemon/workflow-runner.d.ts +11 -1
package/dist/daemon/workflow-runner.js +82 -9
package/dist/domain/execution/state.d.ts +6 -6
package/dist/manifest.json +76 -44
package/dist/mcp/handlers/v2-workflow.d.ts +2 -2
package/dist/mcp/output-schemas.d.ts +234 -234
package/dist/mcp/tools.d.ts +2 -2
package/dist/mcp/v2/tools.d.ts +24 -24
package/dist/trigger/delivery-action.d.ts +2 -0
package/dist/trigger/delivery-action.js +24 -0
package/dist/trigger/trigger-router.js +24 -1
package/dist/trigger/trigger-store.js +42 -0
package/dist/trigger/types.d.ts +3 -0
package/dist/v2/durable-core/schemas/artifacts/assessment.d.ts +2 -2
package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +2 -2
package/dist/v2/durable-core/schemas/artifacts/loop-control.d.ts +6 -6
package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +6 -6
package/dist/v2/durable-core/schemas/compiled-workflow/index.d.ts +56 -56
package/dist/v2/durable-core/schemas/execution-snapshot/blocked-snapshot.d.ts +83 -83
package/dist/v2/durable-core/schemas/execution-snapshot/execution-snapshot.v1.d.ts +1024 -1024
package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +2336 -2336
package/dist/v2/durable-core/schemas/session/dag-topology.d.ts +6 -6
package/dist/v2/durable-core/schemas/session/events.d.ts +339 -339
package/dist/v2/durable-core/schemas/session/gaps.d.ts +30 -30
package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
package/dist/v2/durable-core/schemas/session/validation-event.d.ts +3 -3
package/docs/design/adaptive-coordinator-context-candidates.md +265 -0
package/docs/design/adaptive-coordinator-context-review.md +101 -0
package/docs/design/adaptive-coordinator-context.md +504 -0
package/docs/design/adaptive-coordinator-routing-candidates.md +340 -0
package/docs/design/adaptive-coordinator-routing-design-review.md +135 -0
package/docs/design/adaptive-coordinator-routing-review.md +156 -0
package/docs/design/adaptive-coordinator-routing.md +660 -0
package/docs/design/context-assembly-design-candidates.md +199 -0
package/docs/design/context-assembly-implementation-plan.md +211 -0
package/docs/design/context-assembly-layer-design-review.md +110 -0
package/docs/design/context-assembly-layer.md +622 -0
package/docs/design/context-assembly-review-findings.md +112 -0
package/docs/design/stuck-escalation-candidates.md +176 -0
package/docs/design/stuck-escalation-design-review.md +70 -0
package/docs/design/stuck-escalation.md +326 -0
package/docs/design/worktrain-task-queue-candidates.md +252 -0
package/docs/design/worktrain-task-queue-design-review.md +109 -0
package/docs/design/worktrain-task-queue.md +443 -0
package/docs/design/worktree-review-findings-candidates.md +101 -0
package/docs/design/worktree-review-findings-design-review.md +65 -0
package/docs/design/worktree-review-findings-implementation-plan.md +153 -0
package/docs/ideas/backlog.md +212 -0
package/package.json +3 -3

package/docs/design/worktree-review-findings-candidates.md ADDED Viewed

@@ -0,0 +1,101 @@
+# Worktree Review Findings - Design Candidates
+## Problem Understanding
+### Core Tensions
+1. **Cleanup location vs result completeness**: `runWorkflow()` knows when a session succeeds; `trigger-router` knows when delivery completes. Cleanup must happen after delivery, but the result type must carry enough context for delivery to work -- hence `sessionWorkspacePath` in `WorkflowRunSuccess`. The existing architecture already threads this context; the bug is that runWorkflow() also tries to clean up before returning, racing with the delivery.
+2. **Crash-safety vs orphan-free**: Worktree path must be persisted before any crash could make it untracked. The `if (startContinueToken)` guard on the second `persistTokens()` call means that if `startContinueToken` is falsy at worktree creation time, the sidecar never records the worktree path -- creating an untracked orphan if the process crashes.
+3. **Type safety vs path coupling**: `sessionId` is currently extracted via `result.sessionWorkspacePath.split('/').at(-1)` -- a fragile string operation that couples branch-naming convention (UUID in path) to the calling code. Threading sessionId as a typed field on `WorkflowRunSuccess` eliminates this coupling.
+4. **Fail-fast validation vs runtime discovery**: Validating `branchPrefix`/`baseBranch` at parse time (trigger-store) produces a clear config error. Waiting until worktree creation produces a cryptic `git checkout` error deep in the session setup.
+### What Makes This Hard
+The key insight for the CRITICAL bug: the cleanup code at trigger-router.ts lines 365-377 is inside `maybeRunDelivery()`, but `maybeRunDelivery()` returns early (line 293) when `autoCommit !== true`. This means worktree sessions with `autoCommit: false` would accumulate orphan worktrees if the runWorkflow() cleanup is removed without a compensating change. The review accepts this -- startup recovery (24h threshold) handles the edge case.
+For Minor 1, the `persistTokens()` function already handles `worktreePath?: string` (omits the field when undefined). The guard `if (startContinueToken)` was added to avoid writing a blank token to the sidecar, but it incorrectly prevents worktreePath from being persisted when the token is falsy. The fix must decouple the worktreePath persistence from the token presence check.
+## Philosophy Constraints
+From CLAUDE.md:
+- **Architectural fixes over patches**: Move cleanup to the correct layer (trigger-router), not patch runWorkflow().
+- **Errors are data**: Use `TriggerStoreError` with `kind: 'invalid_field_value'` for validation failures.
+- **Make illegal states unrepresentable**: `sessionId?: string` on `WorkflowRunSuccess` makes path-parsing unnecessary.
+- **Explicit domain types**: typed sessionId instead of stringly-typed split.
+- **Validate at boundaries**: branchPrefix/baseBranch validation belongs at parse time, not at worktree creation.
+- **Document 'why'**: JSDoc on makeSpawnAgentTool must explain the architectural reason for branchStrategy:'none'.
+No philosophy conflicts detected.
+## Impact Surface
+- **WorkflowRunSuccess interface**: Adding optional `sessionId?: string` is additive. Immediate-complete path (line 3062) must also be updated to include sessionId when applicable.
+- **trigger-router.ts maybeRunDelivery()**: Line 321 changes from `.split('/').at(-1)` to `result.sessionId`. No interface contract changes for callers of TriggerRouter.
+- **trigger-store.ts**: New validation added before existing branchStrategy/baseBranch/branchPrefix are assembled into the trigger. No changes to the TriggerDefinition shape.
+- **spawn_agent tool**: JSDoc addition only -- no behavior change, no callers affected.
+- **persistTokens()**: No signature change. Guard removal makes the second call unconditional.
+## Candidates
+### Candidate A: Follow Review Verbatim (Recommended)
+**Summary**: Apply all 7 findings exactly as specified, accepting that non-autoCommit worktree sessions (a rare/unlikely combination) have worktrees cleaned up by runStartupRecovery after 24h.
+**Tensions resolved**:
+- CRITICAL: delivery no longer races with worktree removal
+- Minor 2: sessionId no longer requires path parsing
+- Minor 3: validation catches bad git chars at daemon startup
+**Tensions accepted**:
+- Non-autoCommit worktree sessions accumulate for up to 24h before startup recovery cleans them
+**Boundary**: runWorkflow() owns session execution; trigger-router owns delivery lifecycle including post-delivery cleanup.
+**Failure mode**: If a worktree session has autoCommit=false (unusual -- why use worktree isolation without autoCommit?), the worktree persists for 24h. Acceptable given startup recovery already handles this.
+**Repo-pattern relationship**: Follows. The `sessionWorkspacePath` threading pattern, `TriggerStoreError` validation, and startup recovery cleanup are all existing patterns.
+**Gains**: Minimal diff, matches review intent exactly, no new abstractions.
+**Losses**: Minor 24h worktree leak for non-autoCommit sessions.
+**Scope**: Best-fit.
+**Philosophy**: Honors architectural fixes over patches, errors-as-data, explicit domain types, validate at boundaries.
+### Candidate B: Move Cleanup to Queue Callback
+**Summary**: Move worktree cleanup out of `maybeRunDelivery()` to the queue callback that orchestrates `runWorkflow()` + `maybeRunDelivery()`, so cleanup always runs regardless of autoCommit.
+**Tensions resolved**: Worktree leak for non-autoCommit sessions eliminated.
+**Tensions accepted**: More invasive change, modifies both trigger-router internals and cleanup location.
+**Failure mode**: Cleanup logic now in two places (maybeRunDelivery for autoCommit=true sessions, queue callback for all). Harder to reason about.
+**Scope**: Too broad. Review doesn't ask for this, and it changes the cleanup location the review identifies as correct.
+**Philosophy conflict**: YAGNI with discipline -- adding complexity without evidence the non-autoCommit+worktree combination is a real use case.
+## Comparison and Recommendation
+**Recommendation: Candidate A**
+The review is the upstream spec. It explicitly says "The cleanup in `maybeRunDelivery()` (in trigger-router) is the architecturally correct location and should be the sole success-path removal." Candidate A follows this exactly. The 24h cleanup window for the edge case is handled by an existing mechanism (runStartupRecovery).
+## Self-Critique
+**Strongest counter-argument**: Moving cleanup out of runWorkflow() creates a window where the process crashes between runWorkflow() returning and maybeRunDelivery() cleaning up -- leaving an orphan. But startup recovery already handles this case, and the review explicitly accepts this tradeoff.
+**Pivot condition**: If evidence emerges that branchStrategy='worktree' without autoCommit is a common pattern, Candidate B becomes justified.
+**Invalidating assumption**: If the review misidentified the cleanup location in trigger-router as correct. But the comment at lines 355-357 of trigger-router.ts is the author's own documentation of the invariant, making this self-consistent.
+## Open Questions for Main Agent
+1. When implementing Minor 1: should the second `persistTokens()` call use `startContinueToken ?? ''` (write empty string) or `currentContinueToken` (same value at that point)? Both work since startup recovery handles malformed sidecars. Prefer `startContinueToken ?? ''` to be explicit about the fallback.
+2. The immediate-complete path at line 3062 returns `{ _tag: 'success', workflowId: trigger.workflowId, stopReason: 'stop' }` without `sessionWorkspacePath`. Should it also include `sessionId` and `sessionWorkspacePath`? Yes -- if a single-step workflow with branchStrategy='worktree' completes immediately, delivery still needs to run from the worktree.

package/docs/design/worktree-review-findings-design-review.md ADDED Viewed

@@ -0,0 +1,65 @@
+# Worktree Review Findings - Design Review
+## Tradeoff Review
+| Tradeoff | Acceptance Criteria Impact | Hidden Assumptions | Verdict |
+|---|---|---|---|
+| 24h orphan window for non-autoCommit worktree sessions | None -- startup recovery handles this | Daemon restarts at least once per 24h | Acceptable |
+| Empty string token fallback in persistTokens() | None -- sidecar still tracks worktreePath for orphan recovery | startContinueToken is always set before worktree creation (verified in code flow) | Acceptable |
+| sessionId absent for spawn_agent child sessions | None -- children never use branchStrategy:'worktree' | No caller reads WorkflowRunSuccess.sessionId except the one being updated | Acceptable |
+## Failure Mode Review
+| Failure Mode | Handled By | Missing Mitigation | Risk |
+|---|---|---|---|
+| Crash after runWorkflow() returns, before maybeRunDelivery() cleans up | Startup recovery (24h) | None needed | Low |
+| maybeRunDelivery() fails partway | Cleanup runs regardless of deliveryResult._tag | None | Low |
+| startContinueToken genuinely undefined at worktree creation | persistTokens() still writes worktreePath; sidecar cleaned on next start | None | Low (theoretical only) |
+| Regex rejects valid but unusual git branch name | Fail-fast with clear config error | None -- review specifies this regex | Low |
+## Runner-Up / Simpler Alternative Review
+- Runner-up (cleanup in queue callback): not worth borrowing -- review explicitly identifies maybeRunDelivery() as the correct cleanup location.
+- Simpler variants (skip Minor 2 or Minor 3): not acceptable -- each finding has a specific correctness justification, not just cosmetic preference.
+- No hybrid opportunities identified.
+## Philosophy Alignment
+All 7 fixes align with CLAUDE.md principles:
+- Architectural fix: cleanup moved to correct layer
+- Errors-as-data: TriggerStoreError for validation
+- Make illegal states unrepresentable: sessionId as typed field
+- Validate at boundaries: branchPrefix/baseBranch at parse time
+- Document 'why': JSDoc on makeSpawnAgentTool
+- YAGNI: only the 7 specified fixes implemented
+No philosophy conflicts.
+## Findings
+### Yellow: Immediate-Complete Path Missing sessionWorkspacePath/sessionId
+The review asks to fix both the success path AND the immediate-complete path for the CRITICAL bug (remove worktree cleanup). But the current immediate-complete return at line 3062 also lacks `sessionWorkspacePath` and `sessionId` spreading. Without these, a single-step workflow with branchStrategy='worktree' would return success with no delivery context, and maybeRunDelivery() would use trigger.workspacePath (wrong directory) for delivery.
+**Severity**: Yellow. The review mentions fixing both paths for cleanup removal, but doesn't explicitly call out the missing return fields. However, omitting them would make the cleanup fix incomplete for the immediate-complete case.
+**Recommended fix**: Add the same spreading pattern used in the main success return to the immediate-complete return:
+```typescript
+return {
+  _tag: 'success',
+  workflowId: trigger.workflowId,
+  stopReason: 'stop',
+  ...(sessionWorktreePath !== undefined ? { sessionWorkspacePath: sessionWorktreePath } : {}),
+  ...(sessionWorktreePath !== undefined ? { sessionId } : {}),
+};
+```
+## Recommended Revisions
+1. **Apply Yellow finding**: Add sessionWorkspacePath and sessionId to the immediate-complete return at line 3062 when sessionWorktreePath is defined.
+2. All other 7 review findings: apply as specified.
+## Residual Concerns
+- The 24h orphan window for non-autoCommit worktree sessions is accepted. If this pattern becomes common in production, consider adding explicit cleanup in the queue callback.
+- The regex for branchPrefix/baseBranch is slightly narrower than git's full rules. This is intentional (clear config errors > cryptic git failures) and matches the review spec.

package/docs/design/worktree-review-findings-implementation-plan.md ADDED Viewed

@@ -0,0 +1,153 @@
+# Worktree Review Findings - Implementation Plan
+## Problem Statement
+PR #630 (`feat/worktree-auto-commit`) has 7 MR review findings (1 critical, 2 major, 4 minor) that must be resolved before merge. The critical bug causes delivery to fail with "not a git repository" because `runWorkflow()` deletes the worktree before `maybeRunDelivery()` runs.
+## Acceptance Criteria
+1. `runWorkflow()` does NOT remove the worktree on the success path or immediate-complete path.
+2. `makeSpawnAgentTool()` has a JSDoc comment documenting that child sessions always use `branchStrategy: 'none'`.
+3. `WorkflowRunSuccess` has a `readonly sessionId?: string` field.
+4. `runWorkflow()` sets `sessionId` in the success return when `branchStrategy === 'worktree'`.
+5. `trigger-router.ts` reads `result.sessionId` instead of `result.sessionWorkspacePath.split('/').at(-1)`.
+6. `trigger-store.ts` validates `branchPrefix` and `baseBranch` against `/^[a-zA-Z0-9._/-]+$/` and rejects values starting with `-`.
+7. `tests/unit/trigger-router.test.ts` has a test verifying delivery uses the worktree path.
+8. `npm run build` compiles clean.
+9. `npx vitest run` shows no regressions.
+10. `persistTokens()` is called unconditionally after worktree creation (not gated on `startContinueToken`).
+11. Immediate-complete path return includes `sessionWorkspacePath` and `sessionId` when `sessionWorktreePath !== undefined`.
+## Non-Goals
+- Do NOT touch `src/mcp/` in any way.
+- Do NOT change delivery logic in `delivery-action.ts`.
+- Do NOT change the cleanup location in `maybeRunDelivery()` (lines 365-377 in trigger-router.ts) -- this is correct.
+- Do NOT add new abstractions or dependencies.
+- Do NOT change workflow definitions or schema files.
+## Philosophy-Driven Constraints
+- Use `TriggerStoreError` with `kind: 'invalid_field_value'` for validation errors (errors-as-data).
+- `WorkflowRunSuccess.sessionId` must be `readonly` (immutability by default).
+- JSDoc must explain WHY, not just what (document 'why' principle).
+- Validation must happen at the boundary (trigger-store parse time), not at worktree creation time.
+- Architectural fix: cleanup moves to the correct layer, not patched at the symptom.
+## Invariants
+1. Worktree must exist until `maybeRunDelivery()` completes; `runWorkflow()` must NOT remove it on any success path.
+2. `persistTokens()` must always record `worktreePath` immediately after worktree creation (not conditional on token presence).
+3. The `sessionId` field on `WorkflowRunSuccess` must never require path parsing at the call site.
+4. `branchPrefix` and `baseBranch` must be validated before use (fail-fast at daemon startup).
+## Selected Approach
+Follow review verbatim, with one additional fix: the immediate-complete return path (line 3062) must also include `sessionWorkspacePath` and `sessionId` when a worktree was created (this was missing and discovered during design review).
+## Vertical Slices
+### Slice 1: CRITICAL -- Remove Premature Worktree Removal
+**File**: `src/daemon/workflow-runner.ts`
+**Changes**:
+- Remove the `if (sessionWorktreePath)` cleanup block at lines 3049-3058 (immediate-complete path).
+- Add `sessionWorkspacePath` and `sessionId` spread to the immediate-complete return at line 3062.
+- Remove the `// ---- Remove worktree on success ----` comment and `if (sessionWorktreePath)` block at lines 3502-3514 (success path).
+**Done when**: `runWorkflow()` returns without any `execFileAsync('git', ['-C', ..., 'worktree', 'remove', ...])` calls on the success path. The worktree cleanup comment in `trigger-router.ts` lines 355-357 remains the sole cleanup on the success path.
+### Slice 2: MAJOR -- JSDoc on makeSpawnAgentTool
+**File**: `src/daemon/workflow-runner.ts`
+**Changes**:
+- Add a JSDoc comment block immediately before `export function makeSpawnAgentTool(` (line 2009).
+- Content: "Child sessions spawned by this tool always have `branchStrategy: 'none'` -- they operate in the parent's workspace without their own worktree or feature branch. Coordinators that need isolated child sessions should dispatch them via `TriggerRouter.dispatch()` instead."
+**Done when**: JSDoc is present and describes the branchStrategy limitation.
+### Slice 3: Minor 1 -- Unconditional persistTokens After Worktree Creation
+**File**: `src/daemon/workflow-runner.ts`
+**Changes**:
+- Remove the `if (startContinueToken)` guard from the second `persistTokens()` call (lines 3020-3022).
+- Replace with an unconditional call: `await persistTokens(sessionId, startContinueToken ?? currentContinueToken, startCheckpointToken, sessionWorktreePath);`
+**Done when**: `persistTokens()` is called unconditionally after worktree creation, ensuring `worktreePath` is always written to the sidecar.
+### Slice 4: Minor 2 -- Thread sessionId Through WorkflowRunSuccess
+**Files**: `src/daemon/workflow-runner.ts`, `src/trigger/trigger-router.ts`
+**Changes in workflow-runner.ts**:
+- Add `readonly sessionId?: string` to `WorkflowRunSuccess` interface (after `sessionWorkspacePath`).
+- In the main success return (line 3526), add `...(sessionWorktreePath !== undefined ? { sessionId } : {})` (where `sessionId` is the process-local UUID already in scope).
+- In the immediate-complete return (line 3062), add `...(sessionWorktreePath !== undefined ? { sessionId } : {})` alongside `sessionWorkspacePath`.
+**Changes in trigger-router.ts**:
+- Line 321: Replace `result.sessionWorkspacePath.split('/').at(-1) ?? ''` with `result.sessionId ?? ''`.
+**Done when**: `WorkflowRunSuccess.sessionId` is set when `branchStrategy === 'worktree'` and trigger-router reads it directly without path manipulation.
+### Slice 5: Minor 3 -- Validate git-safe chars for branchPrefix/baseBranch
+**File**: `src/trigger/trigger-store.ts`
+**Changes**:
+- After lines 867-868 where `baseBranch` and `branchPrefix` are extracted, add regex validation.
+- For each non-undefined value, check `/^[a-zA-Z0-9._/-]+$/` and that it does not start with `-`.
+- Return `err({ kind: 'invalid_field_value', field: '...', triggerId: rawId })` on failure.
+**Done when**: A trigger with `branchPrefix: '--bad'` or `baseBranch: '-main'` fails at parse time with `kind: 'invalid_field_value'`.
+### Slice 6: Minor 4 -- Add End-to-End Delivery Test for branchStrategy:worktree
+**File**: `tests/unit/trigger-router.test.ts`
+**Changes**:
+- Add a test in the `describe('delivery wiring (autoCommit)')` block.
+- The test creates a `WorkflowRunSuccess` with `sessionWorkspacePath: '/worktrees/test-session-id'` and valid `lastStepNotes`.
+- Stubs `runWorkflowFn` to return this success result.
+- Verifies the first git call uses `/worktrees/test-session-id` as the working directory (not trigger.workspacePath).
+**Done when**: Test passes and verifies `execFn` is called with the worktree path.
+## Test Design
+### Existing Tests to Verify Unchanged
+- `tests/unit/trigger-router.test.ts` -- all existing tests must still pass.
+- `tests/unit/trigger-store.test.ts` -- all existing validation tests must still pass.
+### New Test (Slice 6)
+```
+describe('delivery wiring (autoCommit)')
+  it('uses sessionWorkspacePath as working directory when runWorkflow returns a worktree session')
+    - trigger: { autoCommit: true, branchStrategy: 'worktree', workspacePath: '/workspace' }
+    - runWorkflowFn returns: { _tag: 'success', sessionWorkspacePath: '/worktrees/abc-session', lastStepNotes: VALID_HANDOFF_NOTES }
+    - fakeExec: vi.fn().mockResolvedValue(...)
+    - assertion: fakeExec called; first git add call uses cwd '/worktrees/abc-session'
+```
+## Risk Register
+| Risk | Likelihood | Impact | Mitigation |
+|---|---|---|---|
+| `startContinueToken` is undefined in practice when branchStrategy='worktree' | Very Low | Low | persistTokens writes '' as fallback; startup recovery handles it |
+| Removing cleanup breaks non-autoCommit worktree sessions | Low | Low | Startup recovery reaps after 24h; combination is unusual |
+| `sessionId` field name collision with WorkRail server sessionId | Low | Low | Field is optional; no ambiguity since it's typed on the interface |
+## PR Packaging Strategy
+All changes on existing branch `feat/worktree-auto-commit`. Single PR #630.
+Commit message: `fix(daemon): address worktree review findings -- move success cleanup, document spawn_agent limitation, thread sessionId, validate git-safe chars`
+## Philosophy Alignment
+| Principle | Slice | Status |
+|---|---|---|
+| Architectural fixes over patches | Slice 1 | Satisfied -- cleanup moved to correct layer |
+| Errors are data | Slice 5 | Satisfied -- TriggerStoreError returned |
+| Make illegal states unrepresentable | Slice 4 | Satisfied -- typed sessionId, no path-parsing |
+| Validate at boundaries | Slice 5 | Satisfied -- parse-time validation |
+| Document 'why' | Slice 2 | Satisfied -- JSDoc explains architectural reason |
+| Immutability by default | Slice 4 | Satisfied -- readonly field added |
+| YAGNI | All | Satisfied -- no new abstractions |
+## Open Questions
+None. All questions resolved during design.
+## Unresolved Unknown Count: 0
+## Plan Confidence Band: High

package/docs/ideas/backlog.md CHANGED Viewed

@@ -6183,3 +6183,215 @@ The daemon tool approach is only better for ad-hoc mid-session queries the agent
 ### Anti-pattern to avoid
 Adding knowledge graph calls directly into `pr-review.ts` or any other coordinator script. That immediately creates the god class we're trying to avoid and couples the orchestration layer to a specific context source.
+---
+## Scheduled tasks (Apr 19, 2026)
+**The idea:** WorkTrain runs tasks on a schedule -- not triggered by an external event, but by time. "Every Monday morning, run the code health scan." "Every night at 2am, check for new GitHub issues and triage them." "First of the month, run the production readiness audit."
+### Why this matters for the autonomous pipeline vision
+The full autonomous pipeline (prioritize → discover → shape → implement → test → PR → review → fix → merge) needs a way to start without a human pushing a button. Scheduled tasks are the trigger layer for proactive, time-driven work. Without them, WorkTrain is purely reactive -- it only acts when a webhook fires or a human dispatches it.
+### What exists today
+The trigger system (`src/trigger/`) supports `generic` (webhook) and polling providers (`gitlab_poll`, `github_issues_poll`, `github_prs_poll`). There is no native cron/schedule provider. The workaround today is OS crontab calling `curl` to fire a webhook.
+### What to build
+A `schedule` provider in triggers.yml:
+```yaml
+triggers:
+  - id: weekly-code-health
+    provider: schedule
+    cron: "0 9 * * 1"          # every Monday at 9am
+    workflowId: architecture-scalability-audit
+    workspacePath: /path/to/repo
+    goal: "Run weekly code health scan -- identify coupling violations, complexity hotspots, and performance anti-patterns introduced this week"
+  - id: nightly-issue-triage
+    provider: schedule
+    cron: "0 2 * * *"          # every night at 2am
+    workflowId: wr.discovery
+    workspacePath: /path/to/repo
+    goal: "Review open GitHub issues created in the last 24 hours and triage them: classify severity, identify duplicates, suggest which to prioritize"
+  - id: backlog-next-task
+    provider: schedule
+    cron: "0 8 * * 1-5"        # weekday mornings at 8am
+    workflowId: coding-task-workflow-agentic
+    workspacePath: /path/to/repo
+    goal: "Pick the highest-priority unstarted task from docs/ideas/backlog.md and implement it"
+```
+### Key design decisions
+- **Cron syntax**: standard 5-field cron (`min hour dom month dow`). Parsed by `node-cron` or equivalent -- already a pattern in the codebase (backlog mentions cron).
+- **Timezone**: configurable per trigger, defaults to system timezone. Important for "weekday morning" schedules that need to fire in the user's timezone.
+- **Missed runs**: if the daemon was down when a scheduled run should have fired, it does NOT catch up on missed runs by default. "Run at 9am Monday" means "run the next time 9am Monday arrives." Optional `catchUp: true` flag for cases where missing a run should be recovered.
+- **Overlap prevention**: if a scheduled run fires while the previous run is still active, it should be skipped (not queued). A `coding-task` that takes 2 hours should not spawn a second instance at the next cron tick.
+- **Manual trigger**: `worktrain run schedule <trigger-id>` to fire a scheduled trigger immediately without waiting for the cron time. Useful for testing.
+### Integration with the autonomous pipeline
+Scheduled tasks are the entry point for fully autonomous work:
+- "Every weekday morning, pick the next backlog item and run the full pipeline" -- this is how WorkTrain improves WorkTrain without any human input.
+- "Every time a PR is opened, run the MR review pipeline" -- this is github_prs_poll, already exists.
+- "Every Monday, run the architecture audit and file GitHub issues for findings" -- new scheduled capability.
+### Implementation notes
+- The `PollingScheduler` in `src/trigger/polling-scheduler.ts` already runs time-based loops for GitLab/GitHub polling. The schedule provider would be a similar loop, using cron expression matching instead of API polling.
+- `node-cron` or `croner` npm package for cron expression parsing and next-fire-time calculation. Lightweight, no daemon dependencies.
+- Scheduled triggers have no webhook payload -- `contextMapping` is empty, `goalTemplate` uses only static text or env vars.
+- The schedule state (last-fired-at per trigger) persists to `~/.workrail/schedule-state.json` so the daemon can detect missed runs on restart.
+---
+## Autonomous grooming loop + workOnAll mode (Apr 19, 2026)
+### The vision
+WorkTrain eventually finds and executes its own work without any human seeding the queue. This is the full autonomous loop: raw backlog idea → groomed issue → discovered/shaped spec → implemented PR → reviewed → merged. Zero human input required once configured.
+### Three autonomy levels
+**Level 0 -- Opt-in queue (current design)**
+Human adds `worktrain` label to specific issues. WorkTrain works those issues only. Safe, predictable, explicit.
+**Level 1 -- workOnAll mode**
+Config flag `workOnAll: true` in `~/.workrail/config.json`. WorkTrain looks at ALL open issues, infers which ones are actionable, picks the highest-priority one. Human escape hatch: `worktrain:skip` label blocks WorkTrain from touching a specific issue. Status labels (`worktrain:in-progress`, `worktrain:done`) are coordinator-managed for observability. No human-set maturity labels needed -- coordinator infers from content.
+**Level 2 -- Fully proactive**
+WorkTrain also surfaces work it found itself: failing CI, Dependabot alerts, backlog items with no issue, patterns in git history suggesting missing tests or docs. Creates its own work items, runs them, closes the loop.
+### The grooming loop (scheduled, e.g. nightly)
+Runs on a cron trigger. Responsibilities:
+1. Read `docs/ideas/backlog.md`, `docs/roadmap/now-next-later.md`, open GitHub issues
+2. Reconcile: close issues that are already done (PR merged), update priorities based on what shipped recently, flag duplicate or obsolete items
+3. For each ungroomed `worktrain` issue (or all issues in workOnAll mode): infer maturity -- does it have a linked spec? acceptance criteria? concrete implementation plan?
+4. For high-value `idea`-level items: autonomously run `wr.discovery` → `wr.shaping` → update or create issue with pitch attached, set `worktrain:specced`
+5. Backlog → issue promotion: when a backlog item crosses a readiness threshold (has enough context to act on), create a GitHub issue from it
+### Maturity inference (no human-set labels required in Level 1+)
+The coordinator reads issue content and infers:
+- Linked pitch/PRD/spec URL → `ready` or `specced`
+- Has acceptance criteria or concrete implementation plan → `specced` or `ready`
+- Vague/exploratory language → `idea`
+- Has open PR or recent branch activity → skip (already in flight)
+The `worktrain:idea/specced/ready` taxonomy is the coordinator's internal model, not something humans set. In Level 1+ the coordinator manages it automatically.
+### workOnAll config
+```json
+// ~/.workrail/config.json
+{
+  "workOnAll": true,
+  "workOnAllExclusions": ["needs-design", "blocked-external", "wontfix"],
+  "maxConcurrentSelf": 2
+}
+```
+`maxConcurrentSelf` caps how many autonomous self-improvement sessions run simultaneously -- important so WorkTrain doesn't try to implement 10 things at once and create merge conflicts.
+### Design notes
+- The grooming loop and the work loop are **separate triggers** with separate schedules. Grooming runs more frequently (nightly or post-merge). Work loop runs on demand or weekly.
+- The grooming loop requires LLM judgment ("is this ready?") -- it's a `wr.discovery`-style session on the backlog, not a deterministic script. This is a feature, not a limitation.
+- `worktrain:skip` is the only label humans need to set in Level 1+ -- it's the explicit "not this one" override.
+- Auto-PR-from-backlog requires careful scope: WorkTrain should create draft PRs for its own discoveries, not automatically push to open issues on other people's repos.
+### Priority
+This is the long-term autonomous vision. Implement in order:
+1. Level 0 (current, task queue PR #4)
+2. workOnAll config flag (small addition to the coordinator, after #4 ships)
+3. Maturity inference (replace label-based routing with content inference)
+4. Grooming loop (scheduled cron trigger, wr.discovery session on backlog)
+5. Level 2 proactive work (post-grooming, after proving the loop works)
+---
+## Escalating review gates based on finding severity (Apr 19, 2026)
+**The idea:** when an MR review returns a Critical finding post-implementation, the review is not over -- it triggers a deeper audit chain before merge is allowed.
+### Current state
+`worktrain run pr-review` routes by severity: `clean` → merge, `minor` → fix-agent loop, `blocking` → escalate to human. But "blocking" is binary -- a single Critical finding and a trivially incorrect comment are treated identically (both block, neither gets more scrutiny).
+### The right behavior
+After a fix round, if the re-review still returns a Critical finding (or the original review does):
+1. **Another full MR review** -- confirm the Critical is real, not a false positive from the reviewer
+2. **Production readiness audit** (`production-readiness-audit` workflow) -- a Critical finding often implies a runtime risk. Check for error handling gaps, security exposure, missing observability.
+3. **Architecture audit** (`architecture-scalability-audit`) -- if the Critical is architectural (wrong abstraction, tight coupling, violates invariants), run a targeted audit on the affected modules.
+Not all Criticals warrant all three. The coordinator should route based on the finding's `category` field (from `wr.review_verdict`):
+- `correctness` / `security` → always trigger prod audit
+- `architecture` / `design` → trigger arch audit
+- All → trigger re-review
+### Auto-merge policy interaction
+A PR that triggered the escalating audit chain should NEVER auto-merge, even if the final re-review comes back clean. The human should approve it explicitly after seeing the audit trail. This is a hard rule, not a setting.
+### Implementation notes
+- The escalation logic belongs in the `IMPLEMENT` and `REVIEW_ONLY` mode coordinators (part of the adaptive pipeline coordinator work).
+- `wr.review_verdict` `findings[].category` field needs to be defined if not already -- check `src/v2/durable-core/schemas/artifacts/review-verdict.ts`.
+- The audit chain runs sequentially (prod then arch), not in parallel -- each audit's output informs the next.
+- All audit session IDs should be linked to the same parent work unit so the console session tree shows the full chain.
+### Priority
+Design this alongside the adaptive pipeline coordinator (#3). The coordinator needs to know about this escalation policy before its routing logic is finalized -- the `IMPLEMENT` mode's post-review handling is incomplete without it.
+---
+## UX/UI impact detection and design workflow integration (Apr 19, 2026)
+**The idea:** When the adaptive pipeline coordinator classifies a task, it should detect whether the task touches user-facing surfaces (UI components, user flows, API contracts that clients consume) and automatically insert a `ui-ux-design-workflow` run before implementation.
+### Why this matters
+Coding tasks that touch UI get implemented without a design pass today. The agent writes functional code but often produces interfaces that are technically correct but experientially wrong -- wrong information hierarchy, wrong affordances, missing error states, missing loading states, wrong copy. A `ui-ux-design-workflow` run before coding forces the "multiple design directions before converging" discipline that prevents the single-solution trap.
+### Detection signals (what marks a task as UX-impactful)
+The coordinator should classify a task as `touchesUI: true` when any of:
+- Issue title or body mentions: component, screen, page, modal, dialog, button, form, flow, onboarding, dashboard, table, list, navigation, UX, UI, design, user-facing, frontend, console, web
+- Affected files (from git diff or knowledge graph) include: `console/src/`, `*.tsx`, `*.css`, `web/`, `views/`
+- The task has a `ui` or `frontend` label
+- The upstream spec (pitch/PRD) explicitly calls out visual or interaction design requirements
+False positives (running design workflow unnecessarily) are cheaper than false negatives (shipping bad UX). Default to `touchesUI: true` when signals are ambiguous and the task is `complexity: Medium` or larger.
+### Pipeline integration
+When `touchesUI: true`, the `IMPLEMENT` pipeline becomes:
+```
+coding-task-classify → ui-ux-design-workflow → coding-task-workflow-agentic → PR → review → merge
+```
+The `ui-ux-design-workflow` output (a design spec with chosen direction, information architecture, component breakdown, error states) feeds into Phase 0.5 of `coding-task-workflow-agentic` as the upstream spec. The coding agent then implements against a concrete design spec, not ad-hoc intuition.
+### Relationship to escalating review gates
+When a post-implementation MR review finds a UI/UX finding (wrong affordance, missing state, confusing flow), the escalation should include a targeted `ui-ux-design-workflow` audit pass, not just a code review. UX regressions need design eyes, not just code eyes.
+### Open design questions
+- **Who reviews the design spec before coding starts?** If the UX design workflow runs autonomously at 2am and coding starts immediately after, there is no human review of the design direction. This is fine for small UI tweaks; it's wrong for new user flows. The coordinator needs a complexity gate: `complexity: Large AND touchesUI: true` → require human ack on the design spec before coding.
+- **Design spec format:** `ui-ux-design-workflow` currently produces a markdown design document. Does the coding workflow reliably consume this as an upstream spec via Phase 0.5? Verify before relying on the automated handoff.
+- **Console-specific workflows:** WorkRail's console is a React/TypeScript SPA. Consider a `worktrain:console` label or file-path heuristic that routes to a console-specific design workflow variant.
+### Priority
+Design this as part of the adaptive coordinator (#3). The `touchesUI` flag belongs on the classification output alongside `taskComplexity` and `maturity`. The UI detection logic and the design workflow insertion are both coordinator-level concerns, not engine-level.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "3.41.0",
+  "version": "3.43.0",
   "description": "Step-by-step workflow enforcement for AI agents via MCP",
   "license": "MIT",
   "repository": {
@@ -54,8 +54,8 @@
     "preinstall": "node -e \"const v=parseInt(process.versions.node.split('.')[0],10); if(v<20){console.error('WorkRail requires Node.js >=20. Current: '+process.versions.node+'\\nPlease upgrade: https://nodejs.org/'); process.exit(1);}\"",
     "dev:mcp": "pkill -f \"$(pwd)/dist/mcp-server.js\" 2>/dev/null; sleep 0.5; WORKRAIL_TRANSPORT=http WORKRAIL_ENABLE_SESSION_TOOLS=true node dist/mcp-server.js",
     "dev:mcp:watch": "pkill -f \"$(pwd)/dist/mcp-server.js\" 2>/dev/null; sleep 0.5; WORKRAIL_TRANSPORT=http WORKRAIL_ENABLE_SESSION_TOOLS=true nodemon --watch dist --ext js --delay 2 --exec 'node dist/mcp-server.js'",
-    "web:dev": "npm run build && WORKRAIL_ENABLE_SESSION_TOOLS=true node dist/mcp-server.js",
-    "web:ci": "WORKRAIL_ENABLE_SESSION_TOOLS=true node dist/mcp-server.js",
+    "web:dev": "npm run build && node dist/cli-worktrain.js console",
+    "web:ci": "node dist/cli-worktrain.js console",
     "web:typecheck": "tsc -p tsconfig.web.json",
     "typecheck": "tsc --noEmit",
     "test": "vitest",