npm - @exaudeus/workrail - Versions diffs - 3.77.1 → 3.78.1 - Mend

@exaudeus/workrail 3.77.1 → 3.78.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/application/services/compiler/ref-registry.js +2 -1
package/dist/console-ui/assets/{index-BooFww1c.js → index-CcgqczfJ.js} +1 -1
package/dist/console-ui/index.html +1 -1
package/dist/coordinators/modes/full-pipeline.js +5 -5
package/dist/coordinators/modes/implement-shared.js +8 -9
package/dist/coordinators/pipeline-run-context.d.ts +18 -0
package/dist/coordinators/pr-review.d.ts +5 -1
package/dist/coordinators/pr-review.js +1 -1
package/dist/manifest.json +24 -24
package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.d.ts +3 -0
package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.js +1 -0
package/dist/v2/usecases/console-service.js +15 -4
package/dist/v2/usecases/console-types.d.ts +3 -0
package/docs/ideas/backlog.md +69 -39
package/package.json +1 -1
package/workflows/routines/hypothesis-challenge.json +2 -2
package/workflows/wr.discovery.json +219 -88

package/dist/coordinators/modes/implement-shared.js CHANGED Viewed

@@ -17,10 +17,10 @@ async function runReviewAndVerdictCycle(deps, opts, prUrl, coordinatorStartMs, i
         : `Re-review PR after fixes (iteration ${iteration}): ${prUrl}`;
     deps.stderr(`[review-cycle] Spawning review session (iteration=${iteration}): ${reviewGoal.slice(0, 80)}`);
     const reviewContextSummary = (0, context_assembly_js_1.buildContextSummary)(priorArtifacts, 'review');
-    const reviewSpawnResult = await deps.spawnSession('wr.mr-review', reviewGoal, opts.workspace, {
-        prUrl,
-        ...(reviewContextSummary ? { assembledContextSummary: reviewContextSummary } : {}),
-    });
+    const reviewContext = reviewContextSummary
+        ? { prUrl, assembledContextSummary: reviewContextSummary }
+        : undefined;
+    const reviewSpawnResult = await deps.spawnSession('wr.mr-review', reviewGoal, opts.workspace, reviewContext);
     if (reviewSpawnResult.kind === 'err') {
         return {
             kind: 'escalated',
@@ -92,11 +92,10 @@ async function runReviewAndVerdictCycle(deps, opts, prUrl, coordinatorStartMs, i
                 return fixCutoff;
             const fixGoal = `Fix review findings: ${findings.findingSummaries.slice(0, 3).join('; ')}`;
             const fixContextSummary = (0, context_assembly_js_1.buildContextSummary)(priorArtifacts, 'fix');
-            const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace, {
-                prUrl,
-                findings: findings.findingSummaries,
-                ...(fixContextSummary ? { assembledContextSummary: fixContextSummary } : {}),
-            });
+            const fixContext = fixContextSummary
+                ? { prUrl, findings: findings.findingSummaries, assembledContextSummary: fixContextSummary }
+                : undefined;
+            const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace, fixContext);
             if (fixSpawnResult.kind === 'err') {
                 return {
                     kind: 'escalated',

package/dist/coordinators/pipeline-run-context.d.ts CHANGED Viewed

@@ -95,6 +95,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }>, "many">>;
+            selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
         }, "strict", z.ZodTypeAny, {
             kind: "wr.discovery_handoff";
             version: 1;
@@ -111,6 +112,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         }, {
             kind: "wr.discovery_handoff";
             version: 1;
@@ -127,6 +129,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         }>;
         confidenceBand: z.ZodNullable<z.ZodEnum<["high", "medium", "low"]>>;
         recapMarkdown: z.ZodNullable<z.ZodString>;
@@ -148,6 +151,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         };
         confidenceBand: "low" | "high" | "medium" | null;
         recapMarkdown: string | null;
@@ -169,6 +173,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         };
         confidenceBand: "low" | "high" | "medium" | null;
         recapMarkdown: string | null;
@@ -210,6 +215,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         };
         confidenceBand: "low" | "high" | "medium" | null;
         recapMarkdown: string | null;
@@ -241,6 +247,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
                 path: string;
                 relevance: string;
             }[] | undefined;
+            selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
         };
         confidenceBand: "low" | "high" | "medium" | null;
         recapMarkdown: string | null;
@@ -737,6 +744,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }>, "many">>;
+                    selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
                 }, "strict", z.ZodTypeAny, {
                     kind: "wr.discovery_handoff";
                     version: 1;
@@ -753,6 +761,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 }, {
                     kind: "wr.discovery_handoff";
                     version: 1;
@@ -769,6 +778,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 }>;
                 confidenceBand: z.ZodNullable<z.ZodEnum<["high", "medium", "low"]>>;
                 recapMarkdown: z.ZodNullable<z.ZodString>;
@@ -790,6 +800,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -811,6 +822,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -852,6 +864,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -883,6 +896,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -1412,6 +1426,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -1525,6 +1540,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -1645,6 +1661,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;
@@ -1766,6 +1783,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
                         path: string;
                         relevance: string;
                     }[] | undefined;
+                    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
                 };
                 confidenceBand: "low" | "high" | "medium" | null;
                 recapMarkdown: string | null;

package/dist/coordinators/pr-review.d.ts CHANGED Viewed

@@ -2,6 +2,10 @@ import type { Result } from '../runtime/result.js';
 import type { AwaitResult } from '../cli/commands/worktrain-await.js';
 import type { ChildSessionResult } from './types.js';
 import type { ContextAssembler } from '../context-assembly/types.js';
+export interface CoordinatorSpawnContext {
+    readonly assembledContextSummary?: string;
+    readonly [key: string]: unknown;
+}
 export type ReviewSeverity = 'clean' | 'minor' | 'blocking' | 'unknown';
 export interface ReviewFindings {
     readonly severity: ReviewSeverity;
@@ -38,7 +42,7 @@ export interface PrReviewOpts {
     readonly port?: number;
 }
 export interface CoordinatorDeps {
-    readonly spawnSession: (workflowId: string, goal: string, workspace: string, context?: Readonly<Record<string, unknown>>, agentConfig?: Readonly<{
+    readonly spawnSession: (workflowId: string, goal: string, workspace: string, context?: CoordinatorSpawnContext, agentConfig?: Readonly<{
         readonly maxSessionMinutes?: number;
         readonly maxTurns?: number;
     }>, parentSessionId?: string) => Promise<Result<string, string>>;

package/dist/coordinators/pr-review.js CHANGED Viewed

@@ -589,7 +589,7 @@ async function runFixAgentLoop(deps, opts, pr, initialFindings, initialOutcome,
             };
         }
         log(`      PR #${pr.number}  ->  spawning fix agent (pass ${passCount})...`);
-        const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace);
+        const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace, reviewSpawnContext);
         if (fixSpawnResult.kind === 'err') {
             log(`      PR #${pr.number}  ->  fix agent spawn failed: ${fixSpawnResult.error}`);
             return {

package/dist/manifest.json CHANGED Viewed

@@ -30,8 +30,8 @@
       "bytes": 428
     },
     "application/services/compiler/ref-registry.js": {
-      "sha256": "a01cceb5a2084cb6effdd408f2ed35bd1dda45f70eb1498f6456ad57d344f36d",
-      "bytes": 5305
+      "sha256": "683b5b08f827d8d695d8f1417798a260e03554b6b2723889320d645174fc2353",
+      "bytes": 5456
     },
     "application/services/compiler/resolve-bindings.d.ts": {
       "sha256": "ef190f200228aedf5d27b3fd38c890c5a0613d3346bf0942ec0a7b51cc167d2e",
@@ -473,16 +473,16 @@
       "sha256": "5fe866e54f796975dec5d8ba9983aefd86074db212d3fccd64eed04bc9f0b3da",
       "bytes": 8011
     },
-    "console-ui/assets/index-BooFww1c.js": {
-      "sha256": "74a7000918d3c20decbab9563481fd738471cbbbfe0e13a57ac443283ee4c410",
-      "bytes": 767524
+    "console-ui/assets/index-CcgqczfJ.js": {
+      "sha256": "6498c94c7c61c70c6d70dd87c0ae2b2b40dc8065c951b46dc76c14d938975864",
+      "bytes": 768377
     },
     "console-ui/assets/index-DHrKiMCf.css": {
       "sha256": "40290b50e21ee7e82433efe13b1aa31c1ea608bd057a5c4e324982f284bc928b",
       "bytes": 60673
     },
     "console-ui/index.html": {
-      "sha256": "34573fa44dba81dda52ce425f674ebbee9283bd1ec4ab405dd8ef20f8a0d1760",
+      "sha256": "b761f8f350b466ed5ecf4a34fdcfb766493d184eb426a8e9fb7183961cf7e488",
       "bytes": 417
     },
     "console/standalone-console.d.ts": {
@@ -546,16 +546,16 @@
       "bytes": 450
     },
     "coordinators/modes/full-pipeline.js": {
-      "sha256": "99dc73260d66bb5c9e5c8f477467f5f934aad81b171d0e1cbe661109e939f9e7",
-      "bytes": 20365
+      "sha256": "b5dbd27a48558c4d4df17ec60edaa2f81db06045a867b3cc3f46f4b503833ba7",
+      "bytes": 20406
     },
     "coordinators/modes/implement-shared.d.ts": {
       "sha256": "3203d8cb8a51dfe0cf88f3ab29d2dd5e0e60ae3b9c9dcc9f426a8581f55e71ff",
       "bytes": 918
     },
     "coordinators/modes/implement-shared.js": {
-      "sha256": "ef9385ab3881aafcf677a9a8823b139f1b197436276c448ff67cc3b4500acf98",
-      "bytes": 14254
+      "sha256": "90a7f82afa4cd60507f008497cd1ded147381f5ba41bd012d3d84dd43e91f284",
+      "bytes": 14300
     },
     "coordinators/modes/implement.d.ts": {
       "sha256": "23919c24d62a0bf15296a52fbc594cca8b1b34e6f8d98dcf7dede8d97ad4cabb",
@@ -582,20 +582,20 @@
       "bytes": 1198
     },
     "coordinators/pipeline-run-context.d.ts": {
-      "sha256": "8fc743a1458e4d7a0971c968d51749d55a3bc114b286efe6b2030eb062809589",
-      "bytes": 68469
+      "sha256": "cb3cb3fde2851472e6769e3934808675175e003e7f8a56579b7807b4e780d9a5",
+      "bytes": 70743
     },
     "coordinators/pipeline-run-context.js": {
       "sha256": "5489319764a0dbd1b037521e014784fab518c4ff4f9137e045129e6845793e55",
       "bytes": 4790
     },
     "coordinators/pr-review.d.ts": {
-      "sha256": "0dba830dd29cd82c58300ca9fdfb4c29d0acd0b257740ce3e65f2360239a106b",
-      "bytes": 4501
+      "sha256": "3ec27f65d802ee25366ea8f053a0d55cd4cb23fa3ffc7fdb5f2c0d743baac64e",
+      "bytes": 4620
     },
     "coordinators/pr-review.js": {
-      "sha256": "385baa9e6252dbd84060bb423ce219884d519752f4a6e9f8f04e5f503fa38b67",
-      "bytes": 32419
+      "sha256": "5abceda849923e37e19c447dfd9b88e7d5fb81586f7c2217effde6d89b7ba6b9",
+      "bytes": 32439
     },
     "coordinators/routing/route-task.d.ts": {
       "sha256": "6661d21e5cfbc9dffbfd8c2f9aaaf0e30a3251997a2c69c6a1b09929343e30e3",
@@ -2630,12 +2630,12 @@
       "bytes": 1242
     },
     "v2/durable-core/schemas/artifacts/discovery-handoff.d.ts": {
-      "sha256": "36b933327a0aed2b767834c4567c7bc51209f9f39e844f3a2503ddccc70ec93e",
-      "bytes": 2323
+      "sha256": "425c0b1fcb69311e0e42f06359fecb8d935128ea35a53e081d93e43f5ff9d8fb",
+      "bytes": 2672
     },
     "v2/durable-core/schemas/artifacts/discovery-handoff.js": {
-      "sha256": "2c8e2f51df1691a34e64bfb4ecfbfd8b4447ca961824612c366f9a30092bfe9b",
-      "bytes": 1624
+      "sha256": "e76c73a9028a4106aec647e7d9862cae66b9eb36066a46cf4f4634e9d7f7b7d6",
+      "bytes": 1748
     },
     "v2/durable-core/schemas/artifacts/index.d.ts": {
       "sha256": "016e3d46d2eac61e12caf851f8b9d46512b2a3a186bbab7d672127f7f48eb168",
@@ -3474,12 +3474,12 @@
       "bytes": 1701
     },
     "v2/usecases/console-service.js": {
-      "sha256": "46c126cc1edee0e672629ec546bc80dcd0700044d89867277dfb285277fff206",
-      "bytes": 40380
+      "sha256": "be260ca0de913b08f1693a61ac8106d61449f999ce7f9e2c1127d7351bc5238e",
+      "bytes": 40956
     },
     "v2/usecases/console-types.d.ts": {
-      "sha256": "854975a0735590ac2981c1c4b5b93a0518eb97493567e88af4ffa101e596195f",
-      "bytes": 8094
+      "sha256": "385e4c316de5b3884532d14ba77a0a06e88b91873cb346bf339723aaf01c5d93",
+      "bytes": 8184
     },
     "v2/usecases/console-types.js": {
       "sha256": "d43aa81f5bc89faa359e0f97c814ba25155591ff078fbb9bfd40f8c7c9683230",

package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.d.ts CHANGED Viewed

@@ -28,6 +28,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
         path: string;
         relevance: string;
     }>, "many">>;
+    selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
 }, "strict", z.ZodTypeAny, {
     kind: "wr.discovery_handoff";
     version: 1;
@@ -44,6 +45,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
         path: string;
         relevance: string;
     }[] | undefined;
+    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
 }, {
     kind: "wr.discovery_handoff";
     version: 1;
@@ -60,6 +62,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
         path: string;
         relevance: string;
     }[] | undefined;
+    selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
 }>;
 export type DiscoveryHandoffArtifactV1 = z.infer<typeof DiscoveryHandoffArtifactV1Schema>;
 export declare function isDiscoveryHandoffArtifact(artifact: unknown): artifact is {

package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.js CHANGED Viewed

@@ -22,6 +22,7 @@ exports.DiscoveryHandoffArtifactV1Schema = zod_1.z
         path: zod_1.z.string().min(1).max(300),
         relevance: zod_1.z.string().min(1).max(150),
     })).max(10).optional(),
+    selectionTier: zod_1.z.enum(['strong_recommendation', 'provisional_recommendation', 'insufficient_signal']).optional(),
 })
     .strict();
 function isDiscoveryHandoffArtifact(artifact) {

package/dist/v2/usecases/console-service.js CHANGED Viewed

@@ -736,6 +736,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
     const sessionHealth = health.isOk() && health.value.kind === 'healthy' ? 'healthy' : 'corrupt';
     const sortedEventsRes = (0, sorted_event_log_js_1.asSortedEventLog)(events);
     const sessionTitle = sortedEventsRes.isOk() ? deriveSessionTitle(sortedEventsRes.value) : null;
+    const runContextRes = sortedEventsRes.isOk() ? (0, run_context_js_1.projectRunContextV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
     const detailTriggerSource = (() => {
         if (sortedEventsRes.isOk()) {
             for (const e of sortedEventsRes.value) {
@@ -744,13 +745,23 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
                 }
             }
         }
-        const contextRes = sortedEventsRes.isOk() ? (0, run_context_js_1.projectRunContextV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
-        const isAutonomous = contextRes.isOk() && Object.values(contextRes.value.byRunId).some((runCtx) => runCtx.context['is_autonomous'] === 'true');
+        const isAutonomous = runContextRes.isOk() && Object.values(runContextRes.value.byRunId).some((runCtx) => runCtx.context['is_autonomous'] === 'true');
         return isAutonomous ? 'daemon' : 'mcp';
     })();
+    const injectedContext = (() => {
+        if (!runContextRes.isOk())
+            return undefined;
+        for (const runCtx of Object.values(runContextRes.value.byRunId)) {
+            const summary = runCtx.context['assembledContextSummary'];
+            if (typeof summary === 'string' && summary.trim().length > 0) {
+                return { assembledContextSummary: summary };
+            }
+        }
+        return undefined;
+    })();
     const dagRes = (0, run_dag_js_1.projectRunDagV2)(events);
     if (dagRes.isErr()) {
-        return { sessionId, sessionTitle, health: sessionHealth, runs: [], metrics: null, repoRoot: null, triggerSource: detailTriggerSource };
+        return { sessionId, sessionTitle, health: sessionHealth, runs: [], metrics: null, repoRoot: null, triggerSource: detailTriggerSource, ...(injectedContext !== undefined ? { injectedContext } : {}) };
     }
     const statusRes = sortedEventsRes.isOk() ? (0, run_status_signals_js_1.projectRunStatusSignalsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
     const gapsRes = sortedEventsRes.isOk() ? (0, gaps_js_1.projectGapsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
@@ -819,7 +830,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
             skippedSteps: skippedStepsMap[run.runId] ?? [],
         };
     });
-    return { sessionId, sessionTitle, health: sessionHealth, runs, metrics: null, repoRoot: null, triggerSource: detailTriggerSource };
+    return { sessionId, sessionTitle, health: sessionHealth, runs, metrics: null, repoRoot: null, triggerSource: detailTriggerSource, ...(injectedContext !== undefined ? { injectedContext } : {}) };
 }
 function deriveRunStatus(isBlocked, hasUnresolvedCriticalGaps, isComplete) {
     if (isBlocked)

package/dist/v2/usecases/console-types.d.ts CHANGED Viewed

@@ -97,6 +97,9 @@ export interface ConsoleSessionDetail {
     readonly metrics: SessionMetricsV2 | null;
     readonly repoRoot: string | null;
     readonly triggerSource: 'daemon' | 'mcp';
+    readonly injectedContext?: {
+        readonly assembledContextSummary: string;
+    };
 }
 export type ConsoleValidationOutcome = 'pass' | 'fail';
 export interface ConsoleValidationResult {

package/docs/ideas/backlog.md CHANGED Viewed

@@ -194,46 +194,27 @@ The delivery pipeline was extracted into `delivery-pipeline.ts` with explicit st
 ### Context injection bugs: double-injection, byte-slice truncation, workspaceRules[0] drop (Apr 30, 2026)
-**Status: idea** | Priority: high
+**Status: done** | Shipped in PR #946 (fix/etienneb/context-injection-bugs, auto-merge enabled)
 **Score: 13** | Cor:3 Cap:1 Eff:3 Lev:3 Con:3 | Blocked: no
-Three active bugs in the context injection pipeline that waste tokens, produce incorrect truncation, and silently discard workspace context. Confirmed by codebase audit (Apr 30, 2026).
-1. **Double-injection (`session-context.ts:117-119`):** `trigger.context` is JSON-serialized in full into the initial user message. Since coordinators write `assembledContextSummary` *into* `trigger.context`, the assembled context appears twice -- once in the system prompt (8KB cap applied) and once in the initial user message (uncapped). These diverge when the content exceeds 8KB.
-2. **Byte-slice truncation (`system-prompt.ts:200-202`):** `assembledContextSummary` is truncated by raw byte index (`ctxStr.slice(0, 8192)`), which splits mid-sentence, mid-section, and can produce malformed UTF-8. The section-aware `buildBudgetedOutput()` pattern already exists in `src/coordinators/context-assembly.ts` and handles this correctly.
-3. **`workspaceRules[0]` silent drop (`session-context.ts:106`):** `ContextBundle.workspaceRules` is typed as `ContextRule[]` but only `[0]` is consumed. All additional workspace context rules are silently dropped. The type implies per-file rules are supported; the consumer silently ignores them.
-**Also in scope:** introduce `WorkflowContextSlots` typed fields on `WorkflowTrigger` (or a companion type) for system-managed context fields (`assembledContextSummary`, `priorSessionNotes`, `gitDiffStat`). This eliminates the stringly-typed `trigger.context['assembledContextSummary']` access pattern and is a prerequisite for the universal enricher (see next item). Scope Phase 0 changes to consumption sites only (`buildSystemPrompt`, `buildSessionContext`); coordinator write sites migrate in Phase 1.
-**Done looks like:** no `trigger.context` JSON dump in `initialPrompt`; `assembledContextSummary` truncated at section boundaries; all `workspaceRules` entries injected; `WorkflowContextSlots` typed fields replace stringly-typed access in consumption sites.
+All three bugs fixed. `WorkflowContextSlots` typed interface + `extractContextSlots()` introduced in `src/daemon/types.ts`. `buildSystemPrompt` refactored to pipeline of pure section functions. `truncateToByteLimit` uses Buffer/surrogate-safe walk-back.
 ---
 ### Universal context enricher for all session entry points (Apr 30, 2026)
-**Status: idea** | Priority: high
-**Score: 11** | Cor:1 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: yes (needs context injection bugs fixed first)
-Today 4 of 6 session entry points receive zero assembled context: raw webhook triggers, direct dispatch, `spawn_agent` children, and crash-recovered sessions never get cross-session notes or git diff state. Only coordinator-spawned sessions (via `pr-review.ts` or the adaptive pipeline) get assembled context -- and even then only through opt-in coordinator logic, not structural injection.
-There is no single layer that all dispatch paths share where assembly can run universally. Coordinators that care must call assembly explicitly; everything else gets nothing. This means every new entry point or coordinator is another opportunity to forget assembly.
+**Status: done** | Shipped in PR #947 (feat/etienneb/workflow-enricher, auto-merge enabled, depends on #946)
-**Design (from Apr 30 discovery):** A `WorkflowEnricher` service injected into `runWorkflow()` that fires for root sessions only (`spawnDepth === 0`). Provides prior workspace session notes (max 3, newest-first, workspace-scoped) and `git diff HEAD~1 --stat` to all entry points. Injected via `WorkflowContextSlots` typed fields (see context injection bugs item). When a coordinator has already set `assembledContextSummary`, the enricher skips prior-notes injection (coordinator's richer context takes precedence) but still provides git diff stat if absent.
+**Score: 11** | Cor:1 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: no
-**Critical gate:** before this ships, run a pilot test -- one session with `assembledContextSummary` injected, inspect turn-1 reasoning for citation. If agents don't reference pre-loaded context, the investment in universal enrichment adds tokens without improving outcomes.
+`WorkflowEnricher` service in `src/daemon/workflow-enricher.ts`. Fires for root sessions (`spawnDepth === 0`) inside `runWorkflow()` before `buildPreAgentSession()`. `PriorNotesPolicy` discriminated type controls notes injection. 1s timeout with partial fallback on `listRecentSessions`. `EnricherResult` threaded as typed value through call chain -- trigger never mutated. All 6 entry points covered.
-**Things to hash out:**
-- Where exactly does the enricher inject: inside `runWorkflow()` before `buildPreAgentSession()`, or inside `buildPreAgentSession()` itself? The latter is cleaner but changes the pre-agent phase boundary.
-- `listRecentSessions` must have a 1s wall-clock timeout with partial-result fallback. Without it, large session stores silently slow all session startups. This is a spec requirement, not optional.
-- `spawn_agent` children don't get enriched (they'd trigger redundant assembly for deeply nested trees). Is there a case where children should optionally enrich? Candidate: an `inheritParentContext: boolean` flag in the `spawn_agent` tool schema.
+**Pilot test gate still pending:** before declaring full success, verify agents reference prior notes in turn-1 reasoning in at least one real session.
 ---
-### MemoryStore: indexed session history and mid-session query_memory tool (Apr 30, 2026)
+### MemoryStore: indexed session history as a coordinator and enricher dependency (Apr 30, 2026)
 **Status: idea** | Priority: medium
@@ -241,16 +222,17 @@ There is no single layer that all dispatch paths share where assembly can run un
 The session event log is rich -- it records goals, step notes, artifacts, delivered commits, git state, and phase handoffs. But querying it requires a full directory scan and per-session event projection on every call. `LocalSessionSummaryProviderV2` does this today and is used in exactly one place (the PR-review coordinator). Every other consumer either skips it or re-implements a slower version.
-There is no mid-session memory query capability at all. An agent mid-session cannot ask "what did we decide about this module last week" and get an answer from persistent memory -- it can only use what was pre-loaded at session start.
+**Design:** A `MemoryStore` port backed by `~/.workrail/memory.db` (SQLite, WAL mode), indexed by `finalizeSession()` as fire-and-forget after each session completes. Replaces the current full directory scan with an indexed query -- O(log n + k) for "recent sessions for this workspace" instead of O(n) full scan. Query kinds v1: `recent_sessions` (workspace-scoped, indexed on `(workspace_hash, completed_at DESC)`), `sessions_by_goal_keywords` (requires full-text index or O(n) scan). Consumed by the WorkflowEnricher and coordinator pre-dispatch paths, not by agents directly.
-**Design (from Apr 30 discovery):** A `MemoryStore` port backed by `~/.workrail/memory.db` (SQLite, WAL mode) indexed by `finalizeSession()` as fire-and-forget after each session completes. Query kinds v1: `recent_sessions` (by workspace path hash), `sessions_by_goal_keywords`. A `query_memory` tool added to the daemon tool set. Replaces the slow `listRecentSessions` scan in the universal enricher.
+**Why not a mid-session agent tool:** context assembly belongs in the layer that dispatches the session -- the coordinator and enricher know what workspace they're spawning into and can assemble context deterministically before the first turn. Leaving retrieval to the agent requires the LLM to make a judgment call about its own context needs mid-session, burns turns, and produces inconsistent results. If an agent needs something that wasn't pre-loaded, that's a gap in the assembly step, not a signal to give agents a retrieval tool.
-Phase 2b (separate): index phase artifacts via a new `phase_artifact_appended` session event kind -- bridges the current PipelineRunContext silo into the session event log so phase artifacts are queryable alongside session notes. Requires engine schema review before implementation.
+Phase 2b (separate): index phase artifacts via a new `phase_artifact_appended` session event kind -- bridges the PipelineRunContext silo into the session event log. Requires engine schema review.
 **Things to hash out:**
-- SQLite native compilation may fail in some deployment environments (Docker, Alpine Linux). Mitigation: use `@sqlite.org/sqlite-wasm` (pure WASM) or make `MemoryStore` fully optional -- daemon works without it, just no indexed queries.
-- `phase_artifact_appended` event schema change is the highest-risk part of Phase 2b. Should it reuse the existing artifact channel with a new content type, or be a new event kind? Each has different backward-compatibility implications.
-- Should `query_memory` be a general-purpose tool or typed with specific query kinds? A typed discriminated union prevents agents from inventing unsupported query shapes.
+- SQLite native compilation may fail in some environments (Docker, Alpine). Mitigation: `@sqlite.org/sqlite-wasm` (pure WASM) or make MemoryStore fully optional -- daemon works without it, enricher falls back to the slow scan.
+- `sessions_by_goal_keywords` without a full-text index is still O(n). Is keyword search needed in v1, or is recency-scoped `recent_sessions` sufficient to start?
+- `phase_artifact_appended` schema change: new event kind vs reuse existing artifact channel with new content type. Different backward-compatibility implications -- needs engine team input before Phase 2b starts.
+- **The ideal vs achievable tension:** ideally all context is assembled before the first turn and the agent never has to fetch more. Whether that's achievable depends on whether the relevant context is predictable from the trigger payload. For structured tasks (PR review, known issue) it usually is. For open-ended discovery or tasks with ambiguous scope, the needed context only becomes clear as the agent reads code -- you can't fully front-load it. One candidate: a context-gathering sub-agent spawned before the main session that reads the workspace and returns a structured context bundle to the coordinator, which then assembles it into the main session's pre-load. This has its own issues: it adds latency (a full extra session before the real work starts), risks gathering the wrong things (the sub-agent doesn't know what the main agent will need), and may just push the "what context do I need?" judgment to an earlier LLM call rather than eliminating it. Worth tracking as a design direction before deciding whether to invest in mid-session retrieval infrastructure at all.
 ---
@@ -273,6 +255,35 @@ Today, validating this requires manually reading raw session transcripts, which
 ---
+### Operator preference memory: WorkTrain learns and retains operator-specific preferences (Apr 30, 2026)
+**Status: idea** | Priority: medium
+**Score: 9** | Cor:1 Cap:2 Eff:2 Lev:2 Con:2 | Blocked: no
+WorkTrain runs fully autonomously but has no persistent memory of operator preferences -- things like "always squash before merging", "don't open PRs without a linked issue", "prefer functional patterns in new files", or "this workspace uses tabs not spaces." Every session starts from the same generic `daemon-soul.md` baseline. Preferences discovered or stated in one session don't carry forward.
+Claude Code solves this for human-in-the-loop sessions via its memory system (feedback, user, project entries written by the AI mid-conversation). WorkTrain needs an equivalent, but the mechanism is fundamentally different because: (a) there is no human watching the session to correct or confirm, and (b) opening up an interactive channel into an autonomous pipeline introduces risk that has to be carefully scoped.
+Candidate input mechanisms (not mutually exclusive):
+1. **MR/PR review comments** -- when a human reviewer requests changes or comments on a WorkTrain PR, that signal is authoritative feedback. WorkTrain already monitors PRs post-review (see backlog entry on root cause analysis). Extracting preference-relevant comments ("always add a test for this pattern", "don't use this API directly") and persisting them is a natural extension.
+2. **`worktrain tell`** -- the existing CLI command queues a message to the daemon. Could be extended to a `worktrain remember "..."` variant that writes directly to a workspace-scoped preferences store, bypassing the session queue entirely.
+3. **Explicit preference file** -- a `~/.workrail/operator-preferences.md` (or per-workspace variant) that the operator edits directly, injected into every session alongside `daemon-soul.md`. Lower friction than building a learning mechanism; higher friction than automatic inference.
+4. **Inferred from repeated corrections** -- if WorkTrain makes the same kind of mistake N times across sessions (same type of review finding, same escalation reason), automatically surface a draft preference for operator approval before persisting.
+**Things to hash out:**
+- What is the storage format -- append-only structured log, a single evolving markdown file, or a SQLite table? The answer affects how preferences are queried and how conflicts between preferences are resolved.
+- How does a persisted preference get *removed or updated*? Stale preferences can be worse than none -- "always use library X" becomes harmful when X is deprecated.
+- What is the trust model for inferred preferences vs explicitly stated ones? A preference extracted from a PR comment should carry different weight than one inferred from repeated behavior.
+- Does this interact with `daemon-soul.md`? Soul covers behavioral philosophy; preferences cover workspace/operator-specific constraints. They're different concerns but both end up in the system prompt -- precedence and load order matter.
+- The fully-closed-pipeline concern is real: mechanisms 1 and 4 operate without human intervention during sessions, which is the correct design. Mechanism 2 requires the operator to pull a lever (acceptable). Mechanism 3 is fully manual (always safe). Any mechanism that *pauses a session mid-run to ask a question* would break the autonomous contract and should not be explored here.
+---
 ### Per-run retrospective: structured learning from pipeline outcomes (Apr 30, 2026)
 **Status: idea** | Priority: medium
@@ -687,19 +698,17 @@ The autonomous workflow runner (`worktrain daemon`). Completely separate from th
 ### Living work context: shared knowledge document that accumulates across the full pipeline (Apr 30, 2026)
-**Status: partial** | Core infra shipped May 5, 2026 (PR #939). Three gaps remain.
+**Status: partial** | Core infra shipped May 5, 2026 (PR #939). All three original gaps now addressed; one residual gap deferred to Phase 2.
 **Score: 13** | Cor:3 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: no
-**Shipped (PR #939):** `ShapingHandoffArtifactV1` + `CodingHandoffArtifactV1` + enriched `DiscoveryHandoffArtifactV1`, `PhaseHandoffArtifact` union, `buildContextSummary()` pure function with per-phase selection, `PipelineRunContext` per-run JSON with `PhaseResult<T>`, crash recovery via `active-run.json` pointer, phase quality gates (fallback escalates, partial warns), persistence failure escalation, 4 workflow authoring changes, adversarial behavioral test (AC 21), `contractRef` validation test. Deferred: `buildSystemPrompt()` named semantic slots, console visualization, retry logic, epic-mode task graph, extensible contract registration, per-workflow lifecycle artifact tests.
+**Shipped (PR #939):** `ShapingHandoffArtifactV1` + `CodingHandoffArtifactV1` + enriched `DiscoveryHandoffArtifactV1`, `PhaseHandoffArtifact` union, `buildContextSummary()` pure function with per-phase selection, `PipelineRunContext` per-run JSON with `PhaseResult<T>`, crash recovery via `active-run.json` pointer, phase quality gates (fallback escalates, partial warns), persistence failure escalation, 4 workflow authoring changes, adversarial behavioral test (AC 21), `contractRef` validation test.
-**Remaining gaps (not tracked elsewhere):**
+**Gap #1 -- fixed (PR #948):** Contract test added: `tests/unit/context-chain-contract.test.ts` pins the seam between `buildContextSummary()` coordinator output and `buildSessionContext()` daemon input across all 4 phase transitions.
-1. **No end-to-end validation that context reaches downstream agents.** The `assembledContextSummary` is wired through `trigger.context` → `buildSystemPrompt()` → system prompt, but there is no test that runs a full pipeline (discovery → shaping → coding) and asserts that the coding agent's system prompt actually contains the discovery context. The adversarial behavioral test (AC 21) proves the pipeline structure -- it does not prove the context content is meaningful to the downstream agent.
+**Gap #2 -- fixed (PR #952):** The actual gap was narrower than originally described: QUICK_REVIEW/REVIEW_ONLY do invoke `runPrReviewCoordinator` with a `contextAssembler` wired. The real issue was the **fix agent spawn** in `runFixAgentLoop()` was not forwarding `reviewSpawnContext` -- fixed with one line. Residual: the `github_prs_poll` direct dispatch path bypasses the coordinator entirely; fix agents from that path still start cold. Deferred to Phase 2 (MemoryStore pre-assembly).
-2. **Not all coordinator pipeline modes populate `assembledContextSummary`.** Some modes (e.g. quick-review) may exit without writing a full `PipelineRunContext`. When context is absent, `buildSystemPrompt()` silently injects nothing -- the downstream agent gets no prior context with no warning. There is no check that the coordinator always writes context before dispatching a downstream session.
-3. **No operator visibility into injected context.** The "Prior Context" section in an agent's system prompt is invisible from the console. An operator has no way to see what context was injected into a session without reading raw conversation logs. The console should surface this -- at minimum, whether the session had prior context and how many bytes.
+**Gap #3 -- fixed (PR #948):** Console session detail view now surfaces an **Injected Context** card when `assembledContextSummary` is present in the session's `context_set` event.
 When a multi-agent pipeline runs -- discovery → shaping → coding → review → fix → re-review -- no agent has a complete picture of what came before it. The coding agent has the goal. The review agent has the code. The fix agent has the findings. None of them have the accumulated context from the full pipeline: why this approach was chosen over alternatives, what was ruled out, what constraints were discovered, what architectural decisions were made, what edge cases were handled, what the review found and why.
@@ -1946,6 +1955,27 @@ Surface in: `worktrain status`, `worktrain health <sessionId>`, console session
 Coordinator design patterns for WorkTrain's autonomous pipeline.
+### Agents must not perform delivery actions -- only the coordinator's delivery layer can (Apr 30, 2026)
+**Status: idea** | Priority: high
+**Score: 13** | Cor:3 Cap:2 Eff:2 Lev:3 Con:3 | Blocked: no
+Daemon agents currently have unrestricted access to `gh` and `git` via the `Bash` tool. There is nothing preventing an agent from running `gh pr create`, `gh pr merge --squash --auto`, `git push --force`, or any other delivery action inside its session. These actions should be exclusively the coordinator delivery layer's responsibility -- they happen after the session completes, after all quality gates pass, through explicit coordinator scripts. Agents that perform them autonomously bypass every gate that was designed to protect the pipeline.
+The problem is architectural: delivery actions are not separated from agent capabilities. An agent that calls `gh pr merge` mid-session has merged before the coordinator's review routing, before CI has a chance to run, before any post-session quality check fires. This is not a hypothetical -- a sufficiently "helpful" agent will try to complete the job it was given, which includes delivery.
+The correct invariant: delivery actions (open PR, merge PR, enable auto-merge, push to main, post to external systems) are only reachable through the coordinator's `autoCommit`, `autoOpenPR`, and delivery pipeline scripts -- not through the agent's Bash tool. The agent's job ends when it calls `complete_step` on the final step. Everything after that is coordinator-owned.
+**Things to hash out:**
+- How is "delivery action" defined precisely enough to enforce? `gh pr create` is delivery; `gh pr view` is read-only. `git push origin feature-branch` is delivery; `git status` is not. The boundary is write-to-external-system.
+- Can this be enforced at the tool level (block specific shell commands in the Bash tool) or does it require a capability-based architecture (agents get a restricted Bash that can't reach delivery commands)?
+- The `daemon-soul.md` could document this as a rule, but that relies on LLM compliance -- not enforcement. What is the structural mechanism?
+- How does this interact with workflows that intentionally ask the agent to run delivery scripts (e.g. a workflow step that says "commit your changes")? Those may be legitimate. The distinction is agent-initiated delivery vs coordinator-authorized delivery.
+- Should the coordinator pass a `deliveryAllowed: false` flag that the daemon enforces in the Bash tool wrapper? Or is this a workflow authoring constraint?
+---
 ### Event-driven agent coordination (coordinator as event bus)
 **Status: idea** | Priority: high

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "3.77.1",
+  "version": "3.78.1",
   "description": "Step-by-step workflow enforcement for AI agents via MCP",
   "license": "MIT",
   "repository": {

package/workflows/routines/hypothesis-challenge.json CHANGED Viewed

@@ -34,8 +34,8 @@
     {
       "id": "step-break-claim",
       "title": "Step 2: Find the Strongest Counter-Argument",
-      "prompt": "Find the strongest case against the current claim.\n\nChallenge it by asking:\n- What is the strongest counter-argument or competing explanation?\n- What evidence could be interpreted differently?\n- What hidden assumption is carrying too much weight?\n- What would a sharp skeptic say first?\n\nOptimize for the single strongest attack, not a long list of weak objections.",
-      "agentRole": "You are a sharp skeptic trying to overturn the current favorite with the strongest available attack.",
+      "prompt": "Find the strongest case against the current claim.\n\nChallenge it by asking:\n- What is the strongest counter-argument or competing explanation?\n- What evidence could be interpreted differently?\n- What hidden assumption is carrying too much weight?\n- What would a sharp skeptic say first?\n- Construct the strongest possible case for a different answer: what would it look like if the current claim is wrong and a competing explanation is right?\n\nOptimize for the single strongest attack, not a long list of weak objections.",
+      "agentRole": "You are a sharp skeptic trying to overturn the current favorite with the strongest available attack -- including constructing the strongest case for an alternative answer, not just finding flaws in the current one.",
       "requireConfirmation": false
     },
     {