npm - @exaudeus/workrail - Versions diffs - 3.70.1 → 3.70.2 - Mend

@exaudeus/workrail 3.70.1 → 3.70.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/console-ui/assets/{index-BcZJOyVG.js → index-Cr14LfsQ.js} +1 -1
package/dist/console-ui/index.html +1 -1
package/dist/daemon/daemon-events.d.ts +1 -1
package/dist/daemon/workflow-runner.js +4 -2
package/dist/manifest.json +13 -13
package/dist/trigger/polling-scheduler.d.ts +2 -1
package/dist/trigger/polling-scheduler.js +3 -2
package/dist/v2/durable-core/domain/prompt-renderer.js +6 -6
package/docs/discovery/design-review-findings.md +62 -65
package/docs/ideas/backlog.md +222 -106
package/docs/plans/workflow-modernization-design.md +177 -59
package/docs/tickets/next-up.md +7 -15
package/package.json +1 -1
package/workflows/adaptive-ticket-creation.json +53 -18
package/workflows/mr-review-workflow.agentic.v2.json +10 -4

package/dist/manifest.json CHANGED Viewed

@@ -473,8 +473,8 @@
       "sha256": "5fe866e54f796975dec5d8ba9983aefd86074db212d3fccd64eed04bc9f0b3da",
       "bytes": 8011
     },
-    "console-ui/assets/index-BcZJOyVG.js": {
-      "sha256": "30e50a4bf35f3383569bf5c19e27ab255611f2524048ce306017e88ddffb2b69",
+    "console-ui/assets/index-Cr14LfsQ.js": {
+      "sha256": "d5ab351690b555194e079cdfe4bedcf22ac8644568d9a2b5a622d07f3451d291",
       "bytes": 767983
     },
     "console-ui/assets/index-DHrKiMCf.css": {
@@ -482,7 +482,7 @@
       "bytes": 60673
     },
     "console-ui/index.html": {
-      "sha256": "813832d8d8ed9de1597a4c972111a165117cfaa97674a0bbb66783f8de954cb8",
+      "sha256": "c79189d614e1dacfc75ab3a9c4e1e030677f8730e8c2423ac45f977abd010c1a",
       "bytes": 417
     },
     "console/standalone-console.d.ts": {
@@ -614,8 +614,8 @@
       "bytes": 1216
     },
     "daemon/daemon-events.d.ts": {
-      "sha256": "ba8fbd7be2cdd5f0f53c45b3ed4f27b51739ca3cb782e931629469597f98250d",
-      "bytes": 5359
+      "sha256": "469cc9b6954e19a5eb87c3fac42f96fd66cd31321b33ce741fb9c1aa64cb2b80",
+      "bytes": 5369
     },
     "daemon/daemon-events.js": {
       "sha256": "b6841eef4634bb266faf81961c1e387b535dd64a74d58582f3f2bad8c3469d95",
@@ -658,8 +658,8 @@
       "bytes": 8385
     },
     "daemon/workflow-runner.js": {
-      "sha256": "0575f958423e4f0a4abbc712e0901c5bee8c5ed5f2ffa4b3a68083f4251e1243",
-      "bytes": 102945
+      "sha256": "87e8dfbc87f2794f79b4c591eb65a204f12761e3b438174da0f81097fd1e5f4a",
+      "bytes": 103183
     },
     "di/container.d.ts": {
       "sha256": "003bb7fb7478d627524b9b1e76bd0a963a243794a687ff233b96dc0e33a06d9f",
@@ -1718,12 +1718,12 @@
       "bytes": 6968
     },
     "trigger/polling-scheduler.d.ts": {
-      "sha256": "60df456a31fa87ce71de76f5e31a6c460bfab588a24c8a2f06bf926fdcea550a",
-      "bytes": 1096
+      "sha256": "c8bcd28794a23906feabe276725e63fe7af79749e0517a46c6e0ed41da0cabb2",
+      "bytes": 1152
     },
     "trigger/polling-scheduler.js": {
-      "sha256": "a546506d1586b0020a64f1321c07d929f7b24920a0fbd18d6566ff73fb6d9185",
-      "bytes": 22409
+      "sha256": "0f8264ac6393d91e22cc48d6fb89095de2391420175009b68b5848c13be3f7f0",
+      "bytes": 22481
     },
     "trigger/trigger-listener.d.ts": {
       "sha256": "cbd89c24cdfe89cb555946b0dcaa6836bf81f37db56c5e80d408dc4a2fe42fb9",
@@ -2066,8 +2066,8 @@
       "bytes": 1664
     },
     "v2/durable-core/domain/prompt-renderer.js": {
-      "sha256": "fe7d177a79cf8cbf2ee814a0e20e24b4cb3ea1b332c4f6aaf57d0f601a95419b",
-      "bytes": 22976
+      "sha256": "caff32c39ccd59219b44a5063e00e73dd29e5ba3c86a0602c2e609d17e8a0105",
+      "bytes": 23600
     },
     "v2/durable-core/domain/reason-model.d.ts": {
       "sha256": "a944e7e0d9b3c73468488263cb0aa1e446c023f8084fd2af53cbda3f3bfcd37a",

package/dist/trigger/polling-scheduler.d.ts CHANGED Viewed

@@ -16,10 +16,11 @@ export declare class PollingScheduler {
     private readonly router;
     private readonly store;
     private readonly fetchFn?;
+    private readonly sessionsDir;
     private readonly intervals;
     private readonly polling;
     private readonly dispatchingIssues;
-    constructor(triggers: readonly TriggerDefinition[], router: TriggerRouter, store: PolledEventStore, fetchFn?: FetchFn | undefined);
+    constructor(triggers: readonly TriggerDefinition[], router: TriggerRouter, store: PolledEventStore, fetchFn?: FetchFn | undefined, sessionsDir?: string);
     start(): void;
     stop(): void;
     forcePoll(triggerId: string): Promise<ForcePollResult>;

package/dist/trigger/polling-scheduler.js CHANGED Viewed

@@ -46,11 +46,12 @@ function isPollingTrigger(trigger) {
     return trigger.pollingSource !== undefined;
 }
 class PollingScheduler {
-    constructor(triggers, router, store, fetchFn) {
+    constructor(triggers, router, store, fetchFn, sessionsDir = path.join(os.homedir(), '.workrail', 'daemon-sessions')) {
         this.triggers = triggers;
         this.router = router;
         this.store = store;
         this.fetchFn = fetchFn;
+        this.sessionsDir = sessionsDir;
         this.intervals = new Map();
         this.polling = new Map();
         this.dispatchingIssues = new Set();
@@ -222,7 +223,7 @@ class PollingScheduler {
             await appendQueuePollLog({ event: 'poll_cycle_complete', triggerId, reason: 'not_implemented', queueType: queueConfig.type, ts: new Date().toISOString() });
             return;
         }
-        const sessionsDir = path.join(os.homedir(), '.workrail', 'daemon-sessions');
+        const sessionsDir = this.sessionsDir;
         const activeSessions = await countActiveSessions(sessionsDir);
         if (activeSessions >= queueConfig.maxTotalConcurrentSessions) {
             console.log(`[QueuePoll] Skipping cycle: active sessions (${activeSessions}) >= maxTotalConcurrentSessions (${queueConfig.maxTotalConcurrentSessions}).`);

package/dist/v2/durable-core/domain/prompt-renderer.js CHANGED Viewed

@@ -216,16 +216,16 @@ function buildMetricsSection(profile, isLastStep, cleanFormat) {
             if (!isLastStep)
                 return shaFooter;
             const finalFooter = cleanFormat
-                ? '\n\nMetrics (final): also set metrics_outcome, metrics_pr_numbers, metrics_files_changed, metrics_lines_added, metrics_lines_removed in context.'
-                : '\n\n**METRICS (System):** This is the final step. Also report:\n- `metrics_outcome`: `"success"` | `"partial"` | `"abandoned"` | `"error"`\n- `metrics_pr_numbers`: array of integer PR numbers (not URLs)\n- `metrics_files_changed`: integer count\n- `metrics_lines_added`: integer count\n- `metrics_lines_removed`: integer count\n\nCall `continue_workflow` with all of the above in `context: { metrics_commit_shas: [...], metrics_outcome: "success", ... }`.';
+                ? '\n\nMetrics (final): also set metrics_outcome (exactly one of: "success", "partial", "abandoned", "error"), metrics_pr_numbers, metrics_files_changed, metrics_lines_added, metrics_lines_removed in context.'
+                : '\n\n**METRICS (System):** This is the final step. Also report:\n- `metrics_outcome`: set to exactly one of these four strings -- no other values are valid: `"success"`, `"partial"`, `"abandoned"`, `"error"`. Do not describe what you did -- classify the outcome using only these values.\n- `metrics_pr_numbers`: array of integer PR numbers (not URLs)\n- `metrics_files_changed`: integer count\n- `metrics_lines_added`: integer count\n- `metrics_lines_removed`: integer count\n\nCall `continue_workflow` with all of the above in `context: { metrics_commit_shas: [...], metrics_outcome: "success", ... }`.';
             return shaFooter + finalFooter;
         }
         case 'review': {
             if (!isLastStep)
                 return '';
             return cleanFormat
-                ? '\n\nMetrics (final): set metrics_pr_numbers (integer array) and metrics_outcome in context.'
-                : '\n\n**METRICS (System):** This is the final step of a review workflow. Report:\n- `metrics_pr_numbers`: array of integer PR numbers reviewed (not URLs)\n- `metrics_outcome`: `"success"` | `"partial"` | `"abandoned"` | `"error"`\n\nCall `continue_workflow` with `context: { metrics_pr_numbers: [123], metrics_outcome: "success" }`.';
+                ? '\n\nMetrics (final): set metrics_pr_numbers (integer array) and metrics_outcome (exactly one of: "success", "partial", "abandoned", "error") in context.'
+                : '\n\n**METRICS (System):** This is the final step of a review workflow. Report:\n- `metrics_pr_numbers`: array of integer PR numbers reviewed (not URLs)\n- `metrics_outcome`: set to exactly one of these four strings -- no other values are valid: `"success"`, `"partial"`, `"abandoned"`, `"error"`. Do not describe what you did -- classify the outcome using only these values.\n\nCall `continue_workflow` with `context: { metrics_pr_numbers: [123], metrics_outcome: "success" }`.';
         }
         case 'research':
         case 'design':
@@ -233,8 +233,8 @@ function buildMetricsSection(profile, isLastStep, cleanFormat) {
             if (!isLastStep)
                 return '';
             return cleanFormat
-                ? '\n\nMetrics (final): set metrics_outcome in context.'
-                : '\n\n**METRICS (System):** This is the final step. Report:\n- `metrics_outcome`: `"success"` | `"partial"` | `"abandoned"` | `"error"`\n\nCall `continue_workflow` with `context: { metrics_outcome: "success" }`.';
+                ? '\n\nMetrics (final): set metrics_outcome (exactly one of: "success", "partial", "abandoned", "error") in context.'
+                : '\n\n**METRICS (System):** This is the final step. Report:\n- `metrics_outcome`: set to exactly one of these four strings -- no other values are valid: `"success"`, `"partial"`, `"abandoned"`, `"error"`. Do not describe what you did -- classify the outcome using only these values.\n\nCall `continue_workflow` with `context: { metrics_outcome: "success" }`.';
         }
     }
 }

package/docs/discovery/design-review-findings.md CHANGED Viewed

@@ -1,110 +1,107 @@
-# Design Review Findings: wr.discovery Goal Reframing
+# Design Review Findings: Issue #393 Stale Tracker Close
-*Concise, actionable findings for main-agent synthesis. Not a final decision.*
-**Date:** 2026-04-18
+**Reviewed design:** Candidate A -- Close Issue #393 with evidence comment
+**Review date:** 2026-04-23
+**Reviewer:** wr.discovery session (design_first / QUICK path)
 ---
 ## Tradeoff Review
-**Tradeoff 1: goalType classification remains agent judgment**
+**Tradeoff 1: Close comment mentions why auto-close did not fire**
+- Does not violate any decision criterion -- in fact satisfies Criterion 5 (captures the learning)
+- Tone risk (sounds like blame): mitigated by using technical/mechanical language
+- Hidden assumption: maintainer reads close comments -- reasonable for a 1-person repo
+- **Verdict: Acceptable**
-- Acceptable: this pattern is established in the workflow (rigorMode, pathRecommendation are all agent-derived)
-- Classification examples in the procedure reduce misclassification probability
-- **Finding: YELLOW.** Add goalType classification examples to procedure text to reduce ambiguity at the problem_framed / opportunity_framed boundary.
-**Tradeoff 2: overhead for well-framed goals is nonzero**
-- A few additional context variable captures and procedure lines in Phase 0
-- Well-framed goals produce minimal output ('goalType = problem_framed, no impliedProblem needed')
-- **Finding: NON-ISSUE.** Overhead is trivial.
-**Tradeoff 3: Phase 1g always-on for design_first/full_spectrum**
-- One additional advance per session for these paths
-- Produces trivially short output for well-framed sessions ('pathChangedAfterContext = false')
-- **CRITICAL CORRECTION NEEDED:** Phase 1g runCondition must be an OR (`retriageNeeded = true OR pathRecommendation in [design_first, full_spectrum]`) not a replacement. Otherwise landscape_first sessions that explicitly need retriage will not trigger it.
-- **Finding: YELLOW.** Runnable as-designed if the OR condition is used correctly.
+**Tradeoff 2: No AGENTS.md update -- pattern prevention sacrificed**
+- Actively satisfies Criterion 3 (no new problems) by not touching a protected human-maintained file
+- Only one observed instance of the failure mode (PR #790) -- insufficient evidence of a recurring pattern
+- The learning is captured in the close comment itself, which is more contextually located than AGENTS.md
+- **Verdict: Acceptable**
 ---
 ## Failure Mode Review
-**Failure mode 1: Agent misclassifies solution-framed goal as opportunity_framed**
-- Status: **Partially mitigated.** C3's Phase 1e/1f required 'what would make this framing wrong' output provides a downstream catch. Classification examples in Phase 0 reduce probability.
-- Missing mitigation: examples in procedure (address in revisions)
-- **Finding: MEDIUM risk, mitigated.**
+**FM1: Maintainer re-opens because they wanted to close personally**
+- Design handles it: close comment provides full evidence chain; re-open is 5-second CLI command; no data loss
+- Likelihood: Low (0 comments, 2-day stale, daemon assignee, no checkbox activity)
+- Severity if occurs: Minimal (issue re-opens, maintainer closes manually, traceability comment persists)
+- Missing mitigations: None needed
-**Failure mode 2: 'What would make this framing wrong' output is formulaic**
-- Status: **Partially mitigated.** Making it required non-empty enforces form but not quality.
-- Missing mitigation: specificity instruction ('name ONE concrete condition, not a general caveat')
-- **Finding: LOW-MEDIUM risk.**
-**Failure mode 3: Phase 1g doesn't surface new insights for well-framed sessions**
-- Status: **Non-issue by design.** For well-framed sessions, Phase 1g is a graceful no-op that confirms the path is still correct. One advance wasted, nothing more.
-- **Finding: LOW risk, acceptable.**
+**FM2: CI secretly failing for the test file**
+- Design handles it: 14/14 passing verified locally immediately before action; test is isolated unit coverage
+- No related CI failure issues exist in the open issues list for this test file
+- Severity if occurs: Low (tracker close is orthogonal to CI state; no test regression introduced)
+- Missing mitigations: None needed
 ---
 ## Runner-Up / Simpler Alternative Review
-**C2 (mandatory Phase 0a):** The structural enforcement advantage is real but comes at the cost of a mandatory overhead step for all sessions. The C1+C3 hybrid achieves most of C2's value via procedure-level enforcement plus structural runCondition changes. C2 is the right escalation if the hybrid proves insufficient.
-**Simpler variant:** Just one sentence added to Phase 0: 'If the goal is solution-framed, derive the underlying problem.' Too narrow -- no context variables means no downstream reference to the reframing.
-**`alternativeFraming` addition from C2:** Borrowing C2's `alternativeFraming` requirement (one reframe even when the original goal seems correct) is high-value, low-cost. Add to Phase 0 design doc entry, not as a context variable.
+**Runner-up (Candidate B -- close + AGENTS.md note):**
+- Candidate B's only distinct value (keyword failure mode documentation) is fully absorbed into Candidate A's close comment
+- No element of B is orphaned; no hybrid needed
-**Finding: C1+C3 hybrid with two refinements (examples, alternativeFraming) stands. No direction change needed.**
+**Simpler variant (close without comment):**
+- Fails Criterion 2 (no traceability) and Criterion 5 (no learning capture)
+- Not viable -- the comment is load-bearing, not decorative
 ---
 ## Philosophy Alignment
-| Principle | Status |
-|---|---|
-| Validate at boundaries, trust inside | SATISFIED -- Phase 0 becomes an active validator |
-| Make illegal states unrepresentable | PARTIALLY SATISFIED -- C3 structural changes help; C2 would fully satisfy |
-| YAGNI with discipline | SATISFIED -- no new steps, minimal additions |
-| Architectural fixes over patches | SATISFIED -- runCondition changes and required output contracts are structural |
-| Determinism over cleverness | SATISFIED -- same goalType input produces same path behavior |
+**Clearly satisfied:**
+- Validate at boundaries, trust inside -- all validation done before action
+- Observability -- close comment makes the state transition and rationale fully visible
+- Document "why" not "what" -- comment explains rationale, not just action
+- Atomicity -- single CLI call, no partial state possible
+- Architectural fixes over patches -- reframe correctly identified the real problem (stale tracker) before acting
-**One explicit philosophy tension:** 'Make illegal states unrepresentable' vs 'YAGNI with discipline' -- deliberately accepted, C2 is escalation path.
+**Under tension:**
+- Agent authority over human-filed issues -- mild tension; resolved by reversibility + transparent comment
+- **Verdict: Acceptable tension, not risky**
 ---
 ## Findings
-### Yellow findings
+No RED or ORANGE findings. All challenges and reviews converge.
-**Y1: goalType classification boundary ambiguity**
-The boundary between `problem_framed` and `opportunity_framed` is unclear without examples. Add classification examples to Phase 0 procedure to reduce misclassification at this boundary.
-**Y2: Phase 1g runCondition must be OR, not replacement**
-The retriage step runCondition must be: `retriageNeeded = true OR pathRecommendation == design_first OR pathRecommendation == full_spectrum`. A straight replacement would break landscape_first sessions that legitimately need retriage.
-**Y3: 'What would make this framing wrong' needs specificity instruction**
-The required output field should specify 'name ONE concrete falsification condition, not a general caveat.' Without this, the field can be satisfied by formulaic responses.
-### No Red or Orange findings
-The selected C1+C3 direction has no material structural weaknesses.
+**YELLOW -- Tone of close comment (INFO)**
+- Risk: the explanation of why auto-close did not fire could read as attributing a mistake to the PR author
+- Mitigation: use mechanical/technical language ("GitHub requires `Closes #NNN` syntax; PR #790 used different phrasing") rather than evaluative language
+- Action: word the comment accordingly -- no structural change to the design needed
 ---
 ## Recommended Revisions
-1. **Add goalType classification examples** to Phase 0 procedure (solution_framed: 'add X', 'implement Y', 'build X'; problem_framed: 'reduce X', 'fix Y'; opportunity_framed: 'explore X', 'decide whether Y'; decision_framed: 'choose between A and B')
+**Revision 1 (from YELLOW finding): Prescribe exact comment wording**
-2. **Add `alternativeFraming`** as a required design doc entry in Phase 0: 'Before selecting a path, generate one alternative framing -- if the stated goal is wrong, what would a better goal be?'
+Use this comment text:
-3. **Use OR condition for Phase 1g runCondition:** `retriageNeeded = true OR pathRecommendation in [design_first, full_spectrum]`
+> All acceptance criteria for this issue are satisfied on `main`.
+>
+> - `loadSessionNotes` is exported at `src/daemon/workflow-runner.ts` (added in PR #790)
+> - All 4 failure paths (token decode, store load, projection, unexpected exception) and the happy path are covered by `tests/unit/workflow-runner-load-session-notes.test.ts` (added in PR #782)
+> - 14 tests pass: `npx vitest run tests/unit/workflow-runner-load-session-notes.test.ts`
+>
+> Note: PR #790 referenced this issue as "Closes issue #393 pre-existing test failures" but GitHub's auto-close requires the exact syntax `Closes #393` -- the non-standard phrasing is why the issue was not automatically closed on merge.
-4. **Add specificity instruction** to Phase 1e/1f 'what would make this framing wrong' field: require naming one concrete falsification condition.
+This wording is factual, neutral, and gives any future reader everything they need to verify or re-open.
 ---
 ## Residual Concerns
-1. The goalType classification is LLM-dependent. Without empirical testing on real sessions, we cannot confirm the classification is reliable. This is an inherent limitation of the approach.
+**RC1 (low): Pattern recurrence unaddressed**
+If the PR keyword failure mode recurs on a second PR, the case for Candidate B (AGENTS.md note) strengthens. This is not actionable now but should be noted for future monitoring.
+**RC2 (very low): Open CI failures on main**
+There are 10+ open "CI failure on main blocking release" issues. These are unrelated to the test file in question (verified locally). If main CI is broken in a way that affects this test file, the close would be slightly premature. Probability is very low given local verification.
+---
-2. The C1+C3 hybrid does not prevent path-selection bias for the window between Phase 0 path selection and Phase 1g retriage. A session that selects the wrong path in Phase 0 runs several steps in the wrong direction before Phase 1g can correct it. Acceptable for STANDARD rigor; C2 is the correct escalation if this proves problematic.
+**Overall verdict: PROCEED with Candidate A as designed, using the prescribed comment wording from Revision 1.**