@exaudeus/workrail 3.77.1 → 3.78.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/services/compiler/ref-registry.js +2 -1
- package/dist/console-ui/assets/{index-BooFww1c.js → index-CcgqczfJ.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/coordinators/modes/full-pipeline.js +5 -5
- package/dist/coordinators/modes/implement-shared.js +8 -9
- package/dist/coordinators/pipeline-run-context.d.ts +18 -0
- package/dist/coordinators/pr-review.d.ts +5 -1
- package/dist/coordinators/pr-review.js +1 -1
- package/dist/manifest.json +24 -24
- package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.d.ts +3 -0
- package/dist/v2/durable-core/schemas/artifacts/discovery-handoff.js +1 -0
- package/dist/v2/usecases/console-service.js +15 -4
- package/dist/v2/usecases/console-types.d.ts +3 -0
- package/docs/ideas/backlog.md +69 -39
- package/package.json +1 -1
- package/workflows/routines/hypothesis-challenge.json +2 -2
- package/workflows/wr.discovery.json +219 -88
|
@@ -17,10 +17,10 @@ async function runReviewAndVerdictCycle(deps, opts, prUrl, coordinatorStartMs, i
|
|
|
17
17
|
: `Re-review PR after fixes (iteration ${iteration}): ${prUrl}`;
|
|
18
18
|
deps.stderr(`[review-cycle] Spawning review session (iteration=${iteration}): ${reviewGoal.slice(0, 80)}`);
|
|
19
19
|
const reviewContextSummary = (0, context_assembly_js_1.buildContextSummary)(priorArtifacts, 'review');
|
|
20
|
-
const
|
|
21
|
-
prUrl,
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
const reviewContext = reviewContextSummary
|
|
21
|
+
? { prUrl, assembledContextSummary: reviewContextSummary }
|
|
22
|
+
: undefined;
|
|
23
|
+
const reviewSpawnResult = await deps.spawnSession('wr.mr-review', reviewGoal, opts.workspace, reviewContext);
|
|
24
24
|
if (reviewSpawnResult.kind === 'err') {
|
|
25
25
|
return {
|
|
26
26
|
kind: 'escalated',
|
|
@@ -92,11 +92,10 @@ async function runReviewAndVerdictCycle(deps, opts, prUrl, coordinatorStartMs, i
|
|
|
92
92
|
return fixCutoff;
|
|
93
93
|
const fixGoal = `Fix review findings: ${findings.findingSummaries.slice(0, 3).join('; ')}`;
|
|
94
94
|
const fixContextSummary = (0, context_assembly_js_1.buildContextSummary)(priorArtifacts, 'fix');
|
|
95
|
-
const
|
|
96
|
-
prUrl,
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
});
|
|
95
|
+
const fixContext = fixContextSummary
|
|
96
|
+
? { prUrl, findings: findings.findingSummaries, assembledContextSummary: fixContextSummary }
|
|
97
|
+
: undefined;
|
|
98
|
+
const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace, fixContext);
|
|
100
99
|
if (fixSpawnResult.kind === 'err') {
|
|
101
100
|
return {
|
|
102
101
|
kind: 'escalated',
|
|
@@ -95,6 +95,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
95
95
|
path: string;
|
|
96
96
|
relevance: string;
|
|
97
97
|
}>, "many">>;
|
|
98
|
+
selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
|
|
98
99
|
}, "strict", z.ZodTypeAny, {
|
|
99
100
|
kind: "wr.discovery_handoff";
|
|
100
101
|
version: 1;
|
|
@@ -111,6 +112,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
111
112
|
path: string;
|
|
112
113
|
relevance: string;
|
|
113
114
|
}[] | undefined;
|
|
115
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
114
116
|
}, {
|
|
115
117
|
kind: "wr.discovery_handoff";
|
|
116
118
|
version: 1;
|
|
@@ -127,6 +129,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
127
129
|
path: string;
|
|
128
130
|
relevance: string;
|
|
129
131
|
}[] | undefined;
|
|
132
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
130
133
|
}>;
|
|
131
134
|
confidenceBand: z.ZodNullable<z.ZodEnum<["high", "medium", "low"]>>;
|
|
132
135
|
recapMarkdown: z.ZodNullable<z.ZodString>;
|
|
@@ -148,6 +151,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
148
151
|
path: string;
|
|
149
152
|
relevance: string;
|
|
150
153
|
}[] | undefined;
|
|
154
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
151
155
|
};
|
|
152
156
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
153
157
|
recapMarkdown: string | null;
|
|
@@ -169,6 +173,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
169
173
|
path: string;
|
|
170
174
|
relevance: string;
|
|
171
175
|
}[] | undefined;
|
|
176
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
172
177
|
};
|
|
173
178
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
174
179
|
recapMarkdown: string | null;
|
|
@@ -210,6 +215,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
210
215
|
path: string;
|
|
211
216
|
relevance: string;
|
|
212
217
|
}[] | undefined;
|
|
218
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
213
219
|
};
|
|
214
220
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
215
221
|
recapMarkdown: string | null;
|
|
@@ -241,6 +247,7 @@ export declare const DiscoveryPhaseRecordSchema: z.ZodObject<{
|
|
|
241
247
|
path: string;
|
|
242
248
|
relevance: string;
|
|
243
249
|
}[] | undefined;
|
|
250
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
244
251
|
};
|
|
245
252
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
246
253
|
recapMarkdown: string | null;
|
|
@@ -737,6 +744,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
737
744
|
path: string;
|
|
738
745
|
relevance: string;
|
|
739
746
|
}>, "many">>;
|
|
747
|
+
selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
|
|
740
748
|
}, "strict", z.ZodTypeAny, {
|
|
741
749
|
kind: "wr.discovery_handoff";
|
|
742
750
|
version: 1;
|
|
@@ -753,6 +761,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
753
761
|
path: string;
|
|
754
762
|
relevance: string;
|
|
755
763
|
}[] | undefined;
|
|
764
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
756
765
|
}, {
|
|
757
766
|
kind: "wr.discovery_handoff";
|
|
758
767
|
version: 1;
|
|
@@ -769,6 +778,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
769
778
|
path: string;
|
|
770
779
|
relevance: string;
|
|
771
780
|
}[] | undefined;
|
|
781
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
772
782
|
}>;
|
|
773
783
|
confidenceBand: z.ZodNullable<z.ZodEnum<["high", "medium", "low"]>>;
|
|
774
784
|
recapMarkdown: z.ZodNullable<z.ZodString>;
|
|
@@ -790,6 +800,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
790
800
|
path: string;
|
|
791
801
|
relevance: string;
|
|
792
802
|
}[] | undefined;
|
|
803
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
793
804
|
};
|
|
794
805
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
795
806
|
recapMarkdown: string | null;
|
|
@@ -811,6 +822,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
811
822
|
path: string;
|
|
812
823
|
relevance: string;
|
|
813
824
|
}[] | undefined;
|
|
825
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
814
826
|
};
|
|
815
827
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
816
828
|
recapMarkdown: string | null;
|
|
@@ -852,6 +864,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
852
864
|
path: string;
|
|
853
865
|
relevance: string;
|
|
854
866
|
}[] | undefined;
|
|
867
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
855
868
|
};
|
|
856
869
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
857
870
|
recapMarkdown: string | null;
|
|
@@ -883,6 +896,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
883
896
|
path: string;
|
|
884
897
|
relevance: string;
|
|
885
898
|
}[] | undefined;
|
|
899
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
886
900
|
};
|
|
887
901
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
888
902
|
recapMarkdown: string | null;
|
|
@@ -1412,6 +1426,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
1412
1426
|
path: string;
|
|
1413
1427
|
relevance: string;
|
|
1414
1428
|
}[] | undefined;
|
|
1429
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
1415
1430
|
};
|
|
1416
1431
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
1417
1432
|
recapMarkdown: string | null;
|
|
@@ -1525,6 +1540,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
1525
1540
|
path: string;
|
|
1526
1541
|
relevance: string;
|
|
1527
1542
|
}[] | undefined;
|
|
1543
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
1528
1544
|
};
|
|
1529
1545
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
1530
1546
|
recapMarkdown: string | null;
|
|
@@ -1645,6 +1661,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
1645
1661
|
path: string;
|
|
1646
1662
|
relevance: string;
|
|
1647
1663
|
}[] | undefined;
|
|
1664
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
1648
1665
|
};
|
|
1649
1666
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
1650
1667
|
recapMarkdown: string | null;
|
|
@@ -1766,6 +1783,7 @@ export declare const PipelineRunContextSchema: z.ZodObject<{
|
|
|
1766
1783
|
path: string;
|
|
1767
1784
|
relevance: string;
|
|
1768
1785
|
}[] | undefined;
|
|
1786
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
1769
1787
|
};
|
|
1770
1788
|
confidenceBand: "low" | "high" | "medium" | null;
|
|
1771
1789
|
recapMarkdown: string | null;
|
|
@@ -2,6 +2,10 @@ import type { Result } from '../runtime/result.js';
|
|
|
2
2
|
import type { AwaitResult } from '../cli/commands/worktrain-await.js';
|
|
3
3
|
import type { ChildSessionResult } from './types.js';
|
|
4
4
|
import type { ContextAssembler } from '../context-assembly/types.js';
|
|
5
|
+
export interface CoordinatorSpawnContext {
|
|
6
|
+
readonly assembledContextSummary?: string;
|
|
7
|
+
readonly [key: string]: unknown;
|
|
8
|
+
}
|
|
5
9
|
export type ReviewSeverity = 'clean' | 'minor' | 'blocking' | 'unknown';
|
|
6
10
|
export interface ReviewFindings {
|
|
7
11
|
readonly severity: ReviewSeverity;
|
|
@@ -38,7 +42,7 @@ export interface PrReviewOpts {
|
|
|
38
42
|
readonly port?: number;
|
|
39
43
|
}
|
|
40
44
|
export interface CoordinatorDeps {
|
|
41
|
-
readonly spawnSession: (workflowId: string, goal: string, workspace: string, context?:
|
|
45
|
+
readonly spawnSession: (workflowId: string, goal: string, workspace: string, context?: CoordinatorSpawnContext, agentConfig?: Readonly<{
|
|
42
46
|
readonly maxSessionMinutes?: number;
|
|
43
47
|
readonly maxTurns?: number;
|
|
44
48
|
}>, parentSessionId?: string) => Promise<Result<string, string>>;
|
|
@@ -589,7 +589,7 @@ async function runFixAgentLoop(deps, opts, pr, initialFindings, initialOutcome,
|
|
|
589
589
|
};
|
|
590
590
|
}
|
|
591
591
|
log(` PR #${pr.number} -> spawning fix agent (pass ${passCount})...`);
|
|
592
|
-
const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace);
|
|
592
|
+
const fixSpawnResult = await deps.spawnSession('wr.coding-task', fixGoal, opts.workspace, reviewSpawnContext);
|
|
593
593
|
if (fixSpawnResult.kind === 'err') {
|
|
594
594
|
log(` PR #${pr.number} -> fix agent spawn failed: ${fixSpawnResult.error}`);
|
|
595
595
|
return {
|
package/dist/manifest.json
CHANGED
|
@@ -30,8 +30,8 @@
|
|
|
30
30
|
"bytes": 428
|
|
31
31
|
},
|
|
32
32
|
"application/services/compiler/ref-registry.js": {
|
|
33
|
-
"sha256": "
|
|
34
|
-
"bytes":
|
|
33
|
+
"sha256": "683b5b08f827d8d695d8f1417798a260e03554b6b2723889320d645174fc2353",
|
|
34
|
+
"bytes": 5456
|
|
35
35
|
},
|
|
36
36
|
"application/services/compiler/resolve-bindings.d.ts": {
|
|
37
37
|
"sha256": "ef190f200228aedf5d27b3fd38c890c5a0613d3346bf0942ec0a7b51cc167d2e",
|
|
@@ -473,16 +473,16 @@
|
|
|
473
473
|
"sha256": "5fe866e54f796975dec5d8ba9983aefd86074db212d3fccd64eed04bc9f0b3da",
|
|
474
474
|
"bytes": 8011
|
|
475
475
|
},
|
|
476
|
-
"console-ui/assets/index-
|
|
477
|
-
"sha256": "
|
|
478
|
-
"bytes":
|
|
476
|
+
"console-ui/assets/index-CcgqczfJ.js": {
|
|
477
|
+
"sha256": "6498c94c7c61c70c6d70dd87c0ae2b2b40dc8065c951b46dc76c14d938975864",
|
|
478
|
+
"bytes": 768377
|
|
479
479
|
},
|
|
480
480
|
"console-ui/assets/index-DHrKiMCf.css": {
|
|
481
481
|
"sha256": "40290b50e21ee7e82433efe13b1aa31c1ea608bd057a5c4e324982f284bc928b",
|
|
482
482
|
"bytes": 60673
|
|
483
483
|
},
|
|
484
484
|
"console-ui/index.html": {
|
|
485
|
-
"sha256": "
|
|
485
|
+
"sha256": "b761f8f350b466ed5ecf4a34fdcfb766493d184eb426a8e9fb7183961cf7e488",
|
|
486
486
|
"bytes": 417
|
|
487
487
|
},
|
|
488
488
|
"console/standalone-console.d.ts": {
|
|
@@ -546,16 +546,16 @@
|
|
|
546
546
|
"bytes": 450
|
|
547
547
|
},
|
|
548
548
|
"coordinators/modes/full-pipeline.js": {
|
|
549
|
-
"sha256": "
|
|
550
|
-
"bytes":
|
|
549
|
+
"sha256": "b5dbd27a48558c4d4df17ec60edaa2f81db06045a867b3cc3f46f4b503833ba7",
|
|
550
|
+
"bytes": 20406
|
|
551
551
|
},
|
|
552
552
|
"coordinators/modes/implement-shared.d.ts": {
|
|
553
553
|
"sha256": "3203d8cb8a51dfe0cf88f3ab29d2dd5e0e60ae3b9c9dcc9f426a8581f55e71ff",
|
|
554
554
|
"bytes": 918
|
|
555
555
|
},
|
|
556
556
|
"coordinators/modes/implement-shared.js": {
|
|
557
|
-
"sha256": "
|
|
558
|
-
"bytes":
|
|
557
|
+
"sha256": "90a7f82afa4cd60507f008497cd1ded147381f5ba41bd012d3d84dd43e91f284",
|
|
558
|
+
"bytes": 14300
|
|
559
559
|
},
|
|
560
560
|
"coordinators/modes/implement.d.ts": {
|
|
561
561
|
"sha256": "23919c24d62a0bf15296a52fbc594cca8b1b34e6f8d98dcf7dede8d97ad4cabb",
|
|
@@ -582,20 +582,20 @@
|
|
|
582
582
|
"bytes": 1198
|
|
583
583
|
},
|
|
584
584
|
"coordinators/pipeline-run-context.d.ts": {
|
|
585
|
-
"sha256": "
|
|
586
|
-
"bytes":
|
|
585
|
+
"sha256": "cb3cb3fde2851472e6769e3934808675175e003e7f8a56579b7807b4e780d9a5",
|
|
586
|
+
"bytes": 70743
|
|
587
587
|
},
|
|
588
588
|
"coordinators/pipeline-run-context.js": {
|
|
589
589
|
"sha256": "5489319764a0dbd1b037521e014784fab518c4ff4f9137e045129e6845793e55",
|
|
590
590
|
"bytes": 4790
|
|
591
591
|
},
|
|
592
592
|
"coordinators/pr-review.d.ts": {
|
|
593
|
-
"sha256": "
|
|
594
|
-
"bytes":
|
|
593
|
+
"sha256": "3ec27f65d802ee25366ea8f053a0d55cd4cb23fa3ffc7fdb5f2c0d743baac64e",
|
|
594
|
+
"bytes": 4620
|
|
595
595
|
},
|
|
596
596
|
"coordinators/pr-review.js": {
|
|
597
|
-
"sha256": "
|
|
598
|
-
"bytes":
|
|
597
|
+
"sha256": "5abceda849923e37e19c447dfd9b88e7d5fb81586f7c2217effde6d89b7ba6b9",
|
|
598
|
+
"bytes": 32439
|
|
599
599
|
},
|
|
600
600
|
"coordinators/routing/route-task.d.ts": {
|
|
601
601
|
"sha256": "6661d21e5cfbc9dffbfd8c2f9aaaf0e30a3251997a2c69c6a1b09929343e30e3",
|
|
@@ -2630,12 +2630,12 @@
|
|
|
2630
2630
|
"bytes": 1242
|
|
2631
2631
|
},
|
|
2632
2632
|
"v2/durable-core/schemas/artifacts/discovery-handoff.d.ts": {
|
|
2633
|
-
"sha256": "
|
|
2634
|
-
"bytes":
|
|
2633
|
+
"sha256": "425c0b1fcb69311e0e42f06359fecb8d935128ea35a53e081d93e43f5ff9d8fb",
|
|
2634
|
+
"bytes": 2672
|
|
2635
2635
|
},
|
|
2636
2636
|
"v2/durable-core/schemas/artifacts/discovery-handoff.js": {
|
|
2637
|
-
"sha256": "
|
|
2638
|
-
"bytes":
|
|
2637
|
+
"sha256": "e76c73a9028a4106aec647e7d9862cae66b9eb36066a46cf4f4634e9d7f7b7d6",
|
|
2638
|
+
"bytes": 1748
|
|
2639
2639
|
},
|
|
2640
2640
|
"v2/durable-core/schemas/artifacts/index.d.ts": {
|
|
2641
2641
|
"sha256": "016e3d46d2eac61e12caf851f8b9d46512b2a3a186bbab7d672127f7f48eb168",
|
|
@@ -3474,12 +3474,12 @@
|
|
|
3474
3474
|
"bytes": 1701
|
|
3475
3475
|
},
|
|
3476
3476
|
"v2/usecases/console-service.js": {
|
|
3477
|
-
"sha256": "
|
|
3478
|
-
"bytes":
|
|
3477
|
+
"sha256": "be260ca0de913b08f1693a61ac8106d61449f999ce7f9e2c1127d7351bc5238e",
|
|
3478
|
+
"bytes": 40956
|
|
3479
3479
|
},
|
|
3480
3480
|
"v2/usecases/console-types.d.ts": {
|
|
3481
|
-
"sha256": "
|
|
3482
|
-
"bytes":
|
|
3481
|
+
"sha256": "385e4c316de5b3884532d14ba77a0a06e88b91873cb346bf339723aaf01c5d93",
|
|
3482
|
+
"bytes": 8184
|
|
3483
3483
|
},
|
|
3484
3484
|
"v2/usecases/console-types.js": {
|
|
3485
3485
|
"sha256": "d43aa81f5bc89faa359e0f97c814ba25155591ff078fbb9bfd40f8c7c9683230",
|
|
@@ -28,6 +28,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
|
|
|
28
28
|
path: string;
|
|
29
29
|
relevance: string;
|
|
30
30
|
}>, "many">>;
|
|
31
|
+
selectionTier: z.ZodOptional<z.ZodEnum<["strong_recommendation", "provisional_recommendation", "insufficient_signal"]>>;
|
|
31
32
|
}, "strict", z.ZodTypeAny, {
|
|
32
33
|
kind: "wr.discovery_handoff";
|
|
33
34
|
version: 1;
|
|
@@ -44,6 +45,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
|
|
|
44
45
|
path: string;
|
|
45
46
|
relevance: string;
|
|
46
47
|
}[] | undefined;
|
|
48
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
47
49
|
}, {
|
|
48
50
|
kind: "wr.discovery_handoff";
|
|
49
51
|
version: 1;
|
|
@@ -60,6 +62,7 @@ export declare const DiscoveryHandoffArtifactV1Schema: z.ZodObject<{
|
|
|
60
62
|
path: string;
|
|
61
63
|
relevance: string;
|
|
62
64
|
}[] | undefined;
|
|
65
|
+
selectionTier?: "strong_recommendation" | "provisional_recommendation" | "insufficient_signal" | undefined;
|
|
63
66
|
}>;
|
|
64
67
|
export type DiscoveryHandoffArtifactV1 = z.infer<typeof DiscoveryHandoffArtifactV1Schema>;
|
|
65
68
|
export declare function isDiscoveryHandoffArtifact(artifact: unknown): artifact is {
|
|
@@ -22,6 +22,7 @@ exports.DiscoveryHandoffArtifactV1Schema = zod_1.z
|
|
|
22
22
|
path: zod_1.z.string().min(1).max(300),
|
|
23
23
|
relevance: zod_1.z.string().min(1).max(150),
|
|
24
24
|
})).max(10).optional(),
|
|
25
|
+
selectionTier: zod_1.z.enum(['strong_recommendation', 'provisional_recommendation', 'insufficient_signal']).optional(),
|
|
25
26
|
})
|
|
26
27
|
.strict();
|
|
27
28
|
function isDiscoveryHandoffArtifact(artifact) {
|
|
@@ -736,6 +736,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
|
|
|
736
736
|
const sessionHealth = health.isOk() && health.value.kind === 'healthy' ? 'healthy' : 'corrupt';
|
|
737
737
|
const sortedEventsRes = (0, sorted_event_log_js_1.asSortedEventLog)(events);
|
|
738
738
|
const sessionTitle = sortedEventsRes.isOk() ? deriveSessionTitle(sortedEventsRes.value) : null;
|
|
739
|
+
const runContextRes = sortedEventsRes.isOk() ? (0, run_context_js_1.projectRunContextV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
|
|
739
740
|
const detailTriggerSource = (() => {
|
|
740
741
|
if (sortedEventsRes.isOk()) {
|
|
741
742
|
for (const e of sortedEventsRes.value) {
|
|
@@ -744,13 +745,23 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
|
|
|
744
745
|
}
|
|
745
746
|
}
|
|
746
747
|
}
|
|
747
|
-
const
|
|
748
|
-
const isAutonomous = contextRes.isOk() && Object.values(contextRes.value.byRunId).some((runCtx) => runCtx.context['is_autonomous'] === 'true');
|
|
748
|
+
const isAutonomous = runContextRes.isOk() && Object.values(runContextRes.value.byRunId).some((runCtx) => runCtx.context['is_autonomous'] === 'true');
|
|
749
749
|
return isAutonomous ? 'daemon' : 'mcp';
|
|
750
750
|
})();
|
|
751
|
+
const injectedContext = (() => {
|
|
752
|
+
if (!runContextRes.isOk())
|
|
753
|
+
return undefined;
|
|
754
|
+
for (const runCtx of Object.values(runContextRes.value.byRunId)) {
|
|
755
|
+
const summary = runCtx.context['assembledContextSummary'];
|
|
756
|
+
if (typeof summary === 'string' && summary.trim().length > 0) {
|
|
757
|
+
return { assembledContextSummary: summary };
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
return undefined;
|
|
761
|
+
})();
|
|
751
762
|
const dagRes = (0, run_dag_js_1.projectRunDagV2)(events);
|
|
752
763
|
if (dagRes.isErr()) {
|
|
753
|
-
return { sessionId, sessionTitle, health: sessionHealth, runs: [], metrics: null, repoRoot: null, triggerSource: detailTriggerSource };
|
|
764
|
+
return { sessionId, sessionTitle, health: sessionHealth, runs: [], metrics: null, repoRoot: null, triggerSource: detailTriggerSource, ...(injectedContext !== undefined ? { injectedContext } : {}) };
|
|
754
765
|
}
|
|
755
766
|
const statusRes = sortedEventsRes.isOk() ? (0, run_status_signals_js_1.projectRunStatusSignalsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
|
|
756
767
|
const gapsRes = sortedEventsRes.isOk() ? (0, gaps_js_1.projectGapsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
|
|
@@ -819,7 +830,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
|
|
|
819
830
|
skippedSteps: skippedStepsMap[run.runId] ?? [],
|
|
820
831
|
};
|
|
821
832
|
});
|
|
822
|
-
return { sessionId, sessionTitle, health: sessionHealth, runs, metrics: null, repoRoot: null, triggerSource: detailTriggerSource };
|
|
833
|
+
return { sessionId, sessionTitle, health: sessionHealth, runs, metrics: null, repoRoot: null, triggerSource: detailTriggerSource, ...(injectedContext !== undefined ? { injectedContext } : {}) };
|
|
823
834
|
}
|
|
824
835
|
function deriveRunStatus(isBlocked, hasUnresolvedCriticalGaps, isComplete) {
|
|
825
836
|
if (isBlocked)
|
|
@@ -97,6 +97,9 @@ export interface ConsoleSessionDetail {
|
|
|
97
97
|
readonly metrics: SessionMetricsV2 | null;
|
|
98
98
|
readonly repoRoot: string | null;
|
|
99
99
|
readonly triggerSource: 'daemon' | 'mcp';
|
|
100
|
+
readonly injectedContext?: {
|
|
101
|
+
readonly assembledContextSummary: string;
|
|
102
|
+
};
|
|
100
103
|
}
|
|
101
104
|
export type ConsoleValidationOutcome = 'pass' | 'fail';
|
|
102
105
|
export interface ConsoleValidationResult {
|
package/docs/ideas/backlog.md
CHANGED
|
@@ -194,46 +194,27 @@ The delivery pipeline was extracted into `delivery-pipeline.ts` with explicit st
|
|
|
194
194
|
|
|
195
195
|
### Context injection bugs: double-injection, byte-slice truncation, workspaceRules[0] drop (Apr 30, 2026)
|
|
196
196
|
|
|
197
|
-
**Status:
|
|
197
|
+
**Status: done** | Shipped in PR #946 (fix/etienneb/context-injection-bugs, auto-merge enabled)
|
|
198
198
|
|
|
199
199
|
**Score: 13** | Cor:3 Cap:1 Eff:3 Lev:3 Con:3 | Blocked: no
|
|
200
200
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
1. **Double-injection (`session-context.ts:117-119`):** `trigger.context` is JSON-serialized in full into the initial user message. Since coordinators write `assembledContextSummary` *into* `trigger.context`, the assembled context appears twice -- once in the system prompt (8KB cap applied) and once in the initial user message (uncapped). These diverge when the content exceeds 8KB.
|
|
204
|
-
|
|
205
|
-
2. **Byte-slice truncation (`system-prompt.ts:200-202`):** `assembledContextSummary` is truncated by raw byte index (`ctxStr.slice(0, 8192)`), which splits mid-sentence, mid-section, and can produce malformed UTF-8. The section-aware `buildBudgetedOutput()` pattern already exists in `src/coordinators/context-assembly.ts` and handles this correctly.
|
|
206
|
-
|
|
207
|
-
3. **`workspaceRules[0]` silent drop (`session-context.ts:106`):** `ContextBundle.workspaceRules` is typed as `ContextRule[]` but only `[0]` is consumed. All additional workspace context rules are silently dropped. The type implies per-file rules are supported; the consumer silently ignores them.
|
|
208
|
-
|
|
209
|
-
**Also in scope:** introduce `WorkflowContextSlots` typed fields on `WorkflowTrigger` (or a companion type) for system-managed context fields (`assembledContextSummary`, `priorSessionNotes`, `gitDiffStat`). This eliminates the stringly-typed `trigger.context['assembledContextSummary']` access pattern and is a prerequisite for the universal enricher (see next item). Scope Phase 0 changes to consumption sites only (`buildSystemPrompt`, `buildSessionContext`); coordinator write sites migrate in Phase 1.
|
|
210
|
-
|
|
211
|
-
**Done looks like:** no `trigger.context` JSON dump in `initialPrompt`; `assembledContextSummary` truncated at section boundaries; all `workspaceRules` entries injected; `WorkflowContextSlots` typed fields replace stringly-typed access in consumption sites.
|
|
201
|
+
All three bugs fixed. `WorkflowContextSlots` typed interface + `extractContextSlots()` introduced in `src/daemon/types.ts`. `buildSystemPrompt` refactored to pipeline of pure section functions. `truncateToByteLimit` uses Buffer/surrogate-safe walk-back.
|
|
212
202
|
|
|
213
203
|
---
|
|
214
204
|
|
|
215
205
|
### Universal context enricher for all session entry points (Apr 30, 2026)
|
|
216
206
|
|
|
217
|
-
**Status:
|
|
218
|
-
|
|
219
|
-
**Score: 11** | Cor:1 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: yes (needs context injection bugs fixed first)
|
|
220
|
-
|
|
221
|
-
Today 4 of 6 session entry points receive zero assembled context: raw webhook triggers, direct dispatch, `spawn_agent` children, and crash-recovered sessions never get cross-session notes or git diff state. Only coordinator-spawned sessions (via `pr-review.ts` or the adaptive pipeline) get assembled context -- and even then only through opt-in coordinator logic, not structural injection.
|
|
222
|
-
|
|
223
|
-
There is no single layer that all dispatch paths share where assembly can run universally. Coordinators that care must call assembly explicitly; everything else gets nothing. This means every new entry point or coordinator is another opportunity to forget assembly.
|
|
207
|
+
**Status: done** | Shipped in PR #947 (feat/etienneb/workflow-enricher, auto-merge enabled, depends on #946)
|
|
224
208
|
|
|
225
|
-
**
|
|
209
|
+
**Score: 11** | Cor:1 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: no
|
|
226
210
|
|
|
227
|
-
|
|
211
|
+
`WorkflowEnricher` service in `src/daemon/workflow-enricher.ts`. Fires for root sessions (`spawnDepth === 0`) inside `runWorkflow()` before `buildPreAgentSession()`. `PriorNotesPolicy` discriminated type controls notes injection. 1s timeout with partial fallback on `listRecentSessions`. `EnricherResult` threaded as typed value through call chain -- trigger never mutated. All 6 entry points covered.
|
|
228
212
|
|
|
229
|
-
**
|
|
230
|
-
- Where exactly does the enricher inject: inside `runWorkflow()` before `buildPreAgentSession()`, or inside `buildPreAgentSession()` itself? The latter is cleaner but changes the pre-agent phase boundary.
|
|
231
|
-
- `listRecentSessions` must have a 1s wall-clock timeout with partial-result fallback. Without it, large session stores silently slow all session startups. This is a spec requirement, not optional.
|
|
232
|
-
- `spawn_agent` children don't get enriched (they'd trigger redundant assembly for deeply nested trees). Is there a case where children should optionally enrich? Candidate: an `inheritParentContext: boolean` flag in the `spawn_agent` tool schema.
|
|
213
|
+
**Pilot test gate still pending:** before declaring full success, verify agents reference prior notes in turn-1 reasoning in at least one real session.
|
|
233
214
|
|
|
234
215
|
---
|
|
235
216
|
|
|
236
|
-
### MemoryStore: indexed session history and
|
|
217
|
+
### MemoryStore: indexed session history as a coordinator and enricher dependency (Apr 30, 2026)
|
|
237
218
|
|
|
238
219
|
**Status: idea** | Priority: medium
|
|
239
220
|
|
|
@@ -241,16 +222,17 @@ There is no single layer that all dispatch paths share where assembly can run un
|
|
|
241
222
|
|
|
242
223
|
The session event log is rich -- it records goals, step notes, artifacts, delivered commits, git state, and phase handoffs. But querying it requires a full directory scan and per-session event projection on every call. `LocalSessionSummaryProviderV2` does this today and is used in exactly one place (the PR-review coordinator). Every other consumer either skips it or re-implements a slower version.
|
|
243
224
|
|
|
244
|
-
|
|
225
|
+
**Design:** A `MemoryStore` port backed by `~/.workrail/memory.db` (SQLite, WAL mode), indexed by `finalizeSession()` as fire-and-forget after each session completes. Replaces the current full directory scan with an indexed query -- O(log n + k) for "recent sessions for this workspace" instead of O(n) full scan. Query kinds v1: `recent_sessions` (workspace-scoped, indexed on `(workspace_hash, completed_at DESC)`), `sessions_by_goal_keywords` (requires full-text index or O(n) scan). Consumed by the WorkflowEnricher and coordinator pre-dispatch paths, not by agents directly.
|
|
245
226
|
|
|
246
|
-
**
|
|
227
|
+
**Why not a mid-session agent tool:** context assembly belongs in the layer that dispatches the session -- the coordinator and enricher know what workspace they're spawning into and can assemble context deterministically before the first turn. Leaving retrieval to the agent requires the LLM to make a judgment call about its own context needs mid-session, burns turns, and produces inconsistent results. If an agent needs something that wasn't pre-loaded, that's a gap in the assembly step, not a signal to give agents a retrieval tool.
|
|
247
228
|
|
|
248
|
-
Phase 2b (separate): index phase artifacts via a new `phase_artifact_appended` session event kind -- bridges the
|
|
229
|
+
Phase 2b (separate): index phase artifacts via a new `phase_artifact_appended` session event kind -- bridges the PipelineRunContext silo into the session event log. Requires engine schema review.
|
|
249
230
|
|
|
250
231
|
**Things to hash out:**
|
|
251
|
-
- SQLite native compilation may fail in some
|
|
252
|
-
- `
|
|
253
|
-
-
|
|
232
|
+
- SQLite native compilation may fail in some environments (Docker, Alpine). Mitigation: `@sqlite.org/sqlite-wasm` (pure WASM) or make MemoryStore fully optional -- daemon works without it, enricher falls back to the slow scan.
|
|
233
|
+
- `sessions_by_goal_keywords` without a full-text index is still O(n). Is keyword search needed in v1, or is recency-scoped `recent_sessions` sufficient to start?
|
|
234
|
+
- `phase_artifact_appended` schema change: new event kind vs reuse existing artifact channel with new content type. Different backward-compatibility implications -- needs engine team input before Phase 2b starts.
|
|
235
|
+
- **The ideal vs achievable tension:** ideally all context is assembled before the first turn and the agent never has to fetch more. Whether that's achievable depends on whether the relevant context is predictable from the trigger payload. For structured tasks (PR review, known issue) it usually is. For open-ended discovery or tasks with ambiguous scope, the needed context only becomes clear as the agent reads code -- you can't fully front-load it. One candidate: a context-gathering sub-agent spawned before the main session that reads the workspace and returns a structured context bundle to the coordinator, which then assembles it into the main session's pre-load. This has its own issues: it adds latency (a full extra session before the real work starts), risks gathering the wrong things (the sub-agent doesn't know what the main agent will need), and may just push the "what context do I need?" judgment to an earlier LLM call rather than eliminating it. Worth tracking as a design direction before deciding whether to invest in mid-session retrieval infrastructure at all.
|
|
254
236
|
|
|
255
237
|
---
|
|
256
238
|
|
|
@@ -273,6 +255,35 @@ Today, validating this requires manually reading raw session transcripts, which
|
|
|
273
255
|
|
|
274
256
|
---
|
|
275
257
|
|
|
258
|
+
### Operator preference memory: WorkTrain learns and retains operator-specific preferences (Apr 30, 2026)
|
|
259
|
+
|
|
260
|
+
**Status: idea** | Priority: medium
|
|
261
|
+
|
|
262
|
+
**Score: 9** | Cor:1 Cap:2 Eff:2 Lev:2 Con:2 | Blocked: no
|
|
263
|
+
|
|
264
|
+
WorkTrain runs fully autonomously but has no persistent memory of operator preferences -- things like "always squash before merging", "don't open PRs without a linked issue", "prefer functional patterns in new files", or "this workspace uses tabs not spaces." Every session starts from the same generic `daemon-soul.md` baseline. Preferences discovered or stated in one session don't carry forward.
|
|
265
|
+
|
|
266
|
+
Claude Code solves this for human-in-the-loop sessions via its memory system (feedback, user, project entries written by the AI mid-conversation). WorkTrain needs an equivalent, but the mechanism is fundamentally different because: (a) there is no human watching the session to correct or confirm, and (b) opening up an interactive channel into an autonomous pipeline introduces risk that has to be carefully scoped.
|
|
267
|
+
|
|
268
|
+
Candidate input mechanisms (not mutually exclusive):
|
|
269
|
+
|
|
270
|
+
1. **MR/PR review comments** -- when a human reviewer requests changes or comments on a WorkTrain PR, that signal is authoritative feedback. WorkTrain already monitors PRs post-review (see backlog entry on root cause analysis). Extracting preference-relevant comments ("always add a test for this pattern", "don't use this API directly") and persisting them is a natural extension.
|
|
271
|
+
|
|
272
|
+
2. **`worktrain tell`** -- the existing CLI command queues a message to the daemon. Could be extended to a `worktrain remember "..."` variant that writes directly to a workspace-scoped preferences store, bypassing the session queue entirely.
|
|
273
|
+
|
|
274
|
+
3. **Explicit preference file** -- a `~/.workrail/operator-preferences.md` (or per-workspace variant) that the operator edits directly, injected into every session alongside `daemon-soul.md`. Lower friction than building a learning mechanism; higher friction than automatic inference.
|
|
275
|
+
|
|
276
|
+
4. **Inferred from repeated corrections** -- if WorkTrain makes the same kind of mistake N times across sessions (same type of review finding, same escalation reason), automatically surface a draft preference for operator approval before persisting.
|
|
277
|
+
|
|
278
|
+
**Things to hash out:**
|
|
279
|
+
- What is the storage format -- append-only structured log, a single evolving markdown file, or a SQLite table? The answer affects how preferences are queried and how conflicts between preferences are resolved.
|
|
280
|
+
- How does a persisted preference get *removed or updated*? Stale preferences can be worse than none -- "always use library X" becomes harmful when X is deprecated.
|
|
281
|
+
- What is the trust model for inferred preferences vs explicitly stated ones? A preference extracted from a PR comment should carry different weight than one inferred from repeated behavior.
|
|
282
|
+
- Does this interact with `daemon-soul.md`? Soul covers behavioral philosophy; preferences cover workspace/operator-specific constraints. They're different concerns but both end up in the system prompt -- precedence and load order matter.
|
|
283
|
+
- The fully-closed-pipeline concern is real: mechanisms 1 and 4 operate without human intervention during sessions, which is the correct design. Mechanism 2 requires the operator to pull a lever (acceptable). Mechanism 3 is fully manual (always safe). Any mechanism that *pauses a session mid-run to ask a question* would break the autonomous contract and should not be explored here.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
276
287
|
### Per-run retrospective: structured learning from pipeline outcomes (Apr 30, 2026)
|
|
277
288
|
|
|
278
289
|
**Status: idea** | Priority: medium
|
|
@@ -687,19 +698,17 @@ The autonomous workflow runner (`worktrain daemon`). Completely separate from th
|
|
|
687
698
|
|
|
688
699
|
### Living work context: shared knowledge document that accumulates across the full pipeline (Apr 30, 2026)
|
|
689
700
|
|
|
690
|
-
**Status: partial** | Core infra shipped May 5, 2026 (PR #939).
|
|
701
|
+
**Status: partial** | Core infra shipped May 5, 2026 (PR #939). All three original gaps now addressed; one residual gap deferred to Phase 2.
|
|
691
702
|
|
|
692
703
|
**Score: 13** | Cor:3 Cap:3 Eff:2 Lev:3 Con:2 | Blocked: no
|
|
693
704
|
|
|
694
|
-
**Shipped (PR #939):** `ShapingHandoffArtifactV1` + `CodingHandoffArtifactV1` + enriched `DiscoveryHandoffArtifactV1`, `PhaseHandoffArtifact` union, `buildContextSummary()` pure function with per-phase selection, `PipelineRunContext` per-run JSON with `PhaseResult<T>`, crash recovery via `active-run.json` pointer, phase quality gates (fallback escalates, partial warns), persistence failure escalation, 4 workflow authoring changes, adversarial behavioral test (AC 21), `contractRef` validation test.
|
|
705
|
+
**Shipped (PR #939):** `ShapingHandoffArtifactV1` + `CodingHandoffArtifactV1` + enriched `DiscoveryHandoffArtifactV1`, `PhaseHandoffArtifact` union, `buildContextSummary()` pure function with per-phase selection, `PipelineRunContext` per-run JSON with `PhaseResult<T>`, crash recovery via `active-run.json` pointer, phase quality gates (fallback escalates, partial warns), persistence failure escalation, 4 workflow authoring changes, adversarial behavioral test (AC 21), `contractRef` validation test.
|
|
695
706
|
|
|
696
|
-
**
|
|
707
|
+
**Gap #1 -- fixed (PR #948):** Contract test added: `tests/unit/context-chain-contract.test.ts` pins the seam between `buildContextSummary()` coordinator output and `buildSessionContext()` daemon input across all 4 phase transitions.
|
|
697
708
|
|
|
698
|
-
|
|
709
|
+
**Gap #2 -- fixed (PR #952):** The actual gap was narrower than originally described: QUICK_REVIEW/REVIEW_ONLY do invoke `runPrReviewCoordinator` with a `contextAssembler` wired. The real issue was the **fix agent spawn** in `runFixAgentLoop()` was not forwarding `reviewSpawnContext` -- fixed with one line. Residual: the `github_prs_poll` direct dispatch path bypasses the coordinator entirely; fix agents from that path still start cold. Deferred to Phase 2 (MemoryStore pre-assembly).
|
|
699
710
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
3. **No operator visibility into injected context.** The "Prior Context" section in an agent's system prompt is invisible from the console. An operator has no way to see what context was injected into a session without reading raw conversation logs. The console should surface this -- at minimum, whether the session had prior context and how many bytes.
|
|
711
|
+
**Gap #3 -- fixed (PR #948):** Console session detail view now surfaces an **Injected Context** card when `assembledContextSummary` is present in the session's `context_set` event.
|
|
703
712
|
|
|
704
713
|
When a multi-agent pipeline runs -- discovery → shaping → coding → review → fix → re-review -- no agent has a complete picture of what came before it. The coding agent has the goal. The review agent has the code. The fix agent has the findings. None of them have the accumulated context from the full pipeline: why this approach was chosen over alternatives, what was ruled out, what constraints were discovered, what architectural decisions were made, what edge cases were handled, what the review found and why.
|
|
705
714
|
|
|
@@ -1946,6 +1955,27 @@ Surface in: `worktrain status`, `worktrain health <sessionId>`, console session
|
|
|
1946
1955
|
Coordinator design patterns for WorkTrain's autonomous pipeline.
|
|
1947
1956
|
|
|
1948
1957
|
|
|
1958
|
+
### Agents must not perform delivery actions -- only the coordinator's delivery layer can (Apr 30, 2026)
|
|
1959
|
+
|
|
1960
|
+
**Status: idea** | Priority: high
|
|
1961
|
+
|
|
1962
|
+
**Score: 13** | Cor:3 Cap:2 Eff:2 Lev:3 Con:3 | Blocked: no
|
|
1963
|
+
|
|
1964
|
+
Daemon agents currently have unrestricted access to `gh` and `git` via the `Bash` tool. There is nothing preventing an agent from running `gh pr create`, `gh pr merge --squash --auto`, `git push --force`, or any other delivery action inside its session. These actions should be exclusively the coordinator delivery layer's responsibility -- they happen after the session completes, after all quality gates pass, through explicit coordinator scripts. Agents that perform them autonomously bypass every gate that was designed to protect the pipeline.
|
|
1965
|
+
|
|
1966
|
+
The problem is architectural: delivery actions are not separated from agent capabilities. An agent that calls `gh pr merge` mid-session has merged before the coordinator's review routing, before CI has a chance to run, before any post-session quality check fires. This is not a hypothetical -- a sufficiently "helpful" agent will try to complete the job it was given, which includes delivery.
|
|
1967
|
+
|
|
1968
|
+
The correct invariant: delivery actions (open PR, merge PR, enable auto-merge, push to main, post to external systems) are only reachable through the coordinator's `autoCommit`, `autoOpenPR`, and delivery pipeline scripts -- not through the agent's Bash tool. The agent's job ends when it calls `complete_step` on the final step. Everything after that is coordinator-owned.
|
|
1969
|
+
|
|
1970
|
+
**Things to hash out:**
|
|
1971
|
+
- How is "delivery action" defined precisely enough to enforce? `gh pr create` is delivery; `gh pr view` is read-only. `git push origin feature-branch` is delivery; `git status` is not. The boundary is write-to-external-system.
|
|
1972
|
+
- Can this be enforced at the tool level (block specific shell commands in the Bash tool) or does it require a capability-based architecture (agents get a restricted Bash that can't reach delivery commands)?
|
|
1973
|
+
- The `daemon-soul.md` could document this as a rule, but that relies on LLM compliance -- not enforcement. What is the structural mechanism?
|
|
1974
|
+
- How does this interact with workflows that intentionally ask the agent to run delivery scripts (e.g. a workflow step that says "commit your changes")? Those may be legitimate. The distinction is agent-initiated delivery vs coordinator-authorized delivery.
|
|
1975
|
+
- Should the coordinator pass a `deliveryAllowed: false` flag that the daemon enforces in the Bash tool wrapper? Or is this a workflow authoring constraint?
|
|
1976
|
+
|
|
1977
|
+
---
|
|
1978
|
+
|
|
1949
1979
|
### Event-driven agent coordination (coordinator as event bus)
|
|
1950
1980
|
|
|
1951
1981
|
**Status: idea** | Priority: high
|
package/package.json
CHANGED
|
@@ -34,8 +34,8 @@
|
|
|
34
34
|
{
|
|
35
35
|
"id": "step-break-claim",
|
|
36
36
|
"title": "Step 2: Find the Strongest Counter-Argument",
|
|
37
|
-
"prompt": "Find the strongest case against the current claim.\n\nChallenge it by asking:\n- What is the strongest counter-argument or competing explanation?\n- What evidence could be interpreted differently?\n- What hidden assumption is carrying too much weight?\n- What would a sharp skeptic say first?\n\nOptimize for the single strongest attack, not a long list of weak objections.",
|
|
38
|
-
"agentRole": "You are a sharp skeptic trying to overturn the current favorite with the strongest available attack.",
|
|
37
|
+
"prompt": "Find the strongest case against the current claim.\n\nChallenge it by asking:\n- What is the strongest counter-argument or competing explanation?\n- What evidence could be interpreted differently?\n- What hidden assumption is carrying too much weight?\n- What would a sharp skeptic say first?\n- Construct the strongest possible case for a different answer: what would it look like if the current claim is wrong and a competing explanation is right?\n\nOptimize for the single strongest attack, not a long list of weak objections.",
|
|
38
|
+
"agentRole": "You are a sharp skeptic trying to overturn the current favorite with the strongest available attack -- including constructing the strongest case for an alternative answer, not just finding flaws in the current one.",
|
|
39
39
|
"requireConfirmation": false
|
|
40
40
|
},
|
|
41
41
|
{
|