@exaudeus/workrail 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RESUME_PREVIEW_CONTRACT = exports.REHYDRATE_RETRIEVAL_CONTRACT = void 0;
4
+ exports.createBranchSummarySegment = createBranchSummarySegment;
5
+ exports.createDownstreamRecapSegment = createDownstreamRecapSegment;
6
+ exports.createAncestryRecapSegment = createAncestryRecapSegment;
7
+ exports.createFunctionDefinitionsSegment = createFunctionDefinitionsSegment;
8
+ exports.createSessionTitlePreviewSegment = createSessionTitlePreviewSegment;
9
+ exports.createRecapPreviewSegment = createRecapPreviewSegment;
10
+ exports.compareRetrievalPackSegments = compareRetrievalPackSegments;
11
+ exports.orderRetrievalPackSegments = orderRetrievalPackSegments;
12
+ exports.renderRetrievalPackSections = renderRetrievalPackSections;
13
+ exports.renderBudgetedResumePreview = renderBudgetedResumePreview;
14
+ exports.renderBudgetedRehydrateRecovery = renderBudgetedRehydrateRecovery;
15
+ const constants_js_1 = require("../constants.js");
16
+ const REHYDRATE_TIER_DEFINITIONS = [
17
+ {
18
+ tier: 'structural_context',
19
+ purpose: 'Orient the agent to branch shape and preferred continuation path.',
20
+ priority: 0,
21
+ retention: 'core',
22
+ },
23
+ {
24
+ tier: 'durable_recap',
25
+ purpose: 'Surface durable notes captured from explicit recap outputs.',
26
+ priority: 1,
27
+ retention: 'core',
28
+ },
29
+ {
30
+ tier: 'reference_material',
31
+ purpose: 'Surface authored workflow definitions referenced by the current step.',
32
+ priority: 2,
33
+ retention: 'tail',
34
+ },
35
+ ];
36
+ exports.REHYDRATE_RETRIEVAL_CONTRACT = {
37
+ surface: 'rehydrate',
38
+ tiers: REHYDRATE_TIER_DEFINITIONS,
39
+ truncation: {
40
+ mode: 'drop_lower_tiers_then_global_utf8_trim',
41
+ budgetScope: 'shared_recovery_prompt',
42
+ antiReconstructionRule: 'select_order_and_compress_explicit_facts_only',
43
+ },
44
+ };
45
+ const RESUME_PREVIEW_TIER_DEFINITIONS = [
46
+ {
47
+ tier: 'identity_context',
48
+ purpose: 'Surface the best concise identity hint for the session.',
49
+ priority: 0,
50
+ maxBytes: 320,
51
+ },
52
+ {
53
+ tier: 'durable_recap',
54
+ purpose: 'Surface durable recap text, focused around the user query when possible.',
55
+ priority: 1,
56
+ maxBytes: 1600,
57
+ },
58
+ ];
59
+ exports.RESUME_PREVIEW_CONTRACT = {
60
+ surface: 'resume_preview',
61
+ tiers: RESUME_PREVIEW_TIER_DEFINITIONS,
62
+ budgetBytes: constants_js_1.MAX_RESUME_PREVIEW_BYTES,
63
+ };
64
+ const encoder = new TextEncoder();
65
+ const decoder = new TextDecoder('utf-8');
66
+ function getTierPriority(tier) {
67
+ return exports.REHYDRATE_RETRIEVAL_CONTRACT.tiers.find((candidate) => candidate.tier === tier)?.priority ?? Number.MAX_SAFE_INTEGER;
68
+ }
69
+ function getTierRetention(tier) {
70
+ return exports.REHYDRATE_RETRIEVAL_CONTRACT.tiers.find((candidate) => candidate.tier === tier)?.retention ?? 'tail';
71
+ }
72
+ function getResumePreviewTierPriority(tier) {
73
+ return exports.RESUME_PREVIEW_CONTRACT.tiers.find((candidate) => candidate.tier === tier)?.priority ?? Number.MAX_SAFE_INTEGER;
74
+ }
75
+ function getResumePreviewTierMaxBytes(tier) {
76
+ return exports.RESUME_PREVIEW_CONTRACT.tiers.find((candidate) => candidate.tier === tier)?.maxBytes ?? exports.RESUME_PREVIEW_CONTRACT.budgetBytes;
77
+ }
78
+ function compareAscii(a, b) {
79
+ return a < b ? -1 : a > b ? 1 : 0;
80
+ }
81
+ function trimToUtf8Boundary(bytes) {
82
+ const n = bytes.length;
83
+ if (n === 0)
84
+ return bytes;
85
+ let cont = 0;
86
+ for (let i = n - 1; i >= 0 && i >= n - 4; i--) {
87
+ const b = bytes[i];
88
+ if ((b & 192) === 128) {
89
+ cont++;
90
+ }
91
+ else {
92
+ break;
93
+ }
94
+ }
95
+ if (cont === 0)
96
+ return bytes;
97
+ const leadByteIndex = n - cont - 1;
98
+ if (leadByteIndex < 0) {
99
+ return new Uint8Array(0);
100
+ }
101
+ const leadByte = bytes[leadByteIndex];
102
+ const expectedLen = (leadByte & 128) === 0 ? 1 :
103
+ (leadByte & 224) === 192 ? 2 :
104
+ (leadByte & 240) === 224 ? 3 :
105
+ (leadByte & 248) === 240 ? 4 :
106
+ 0;
107
+ const actualLen = cont + 1;
108
+ if (expectedLen === 0 || expectedLen !== actualLen) {
109
+ return bytes.subarray(0, leadByteIndex);
110
+ }
111
+ return bytes;
112
+ }
113
+ function truncateUtf8(text, maxBytes) {
114
+ const bytes = encoder.encode(text);
115
+ if (bytes.length <= maxBytes) {
116
+ return text;
117
+ }
118
+ return decoder.decode(trimToUtf8Boundary(bytes.subarray(0, Math.max(0, maxBytes))));
119
+ }
120
+ function buildOmissionSuffix(omittedTierCount) {
121
+ const omissionLine = omittedTierCount > 0
122
+ ? `\nOmitted ${omittedTierCount} lower-priority tier${omittedTierCount === 1 ? '' : 's'} due to budget constraints.`
123
+ : '\nOmitted recovery content due to budget constraints.';
124
+ return `${constants_js_1.TRUNCATION_MARKER}${omissionLine}`;
125
+ }
126
+ function trimFinalRecoveryText(text, omittedTierCount) {
127
+ const suffix = buildOmissionSuffix(omittedTierCount);
128
+ const maxContentBytes = constants_js_1.RECOVERY_BUDGET_BYTES - encoder.encode(suffix).length;
129
+ const truncated = truncateUtf8(text, maxContentBytes);
130
+ return truncated + suffix;
131
+ }
132
+ function normalizePreviewFocusTerms(focusTerms) {
133
+ return [...new Set(focusTerms.map((term) => term.trim().toLowerCase()).filter((term) => term.length >= 3))];
134
+ }
135
+ function findFocusIndex(text, focusTerms) {
136
+ if (focusTerms.length === 0)
137
+ return -1;
138
+ const lower = text.toLowerCase();
139
+ return focusTerms.reduce((bestIndex, term) => {
140
+ const idx = lower.indexOf(term);
141
+ if (idx === -1)
142
+ return bestIndex;
143
+ if (bestIndex === -1)
144
+ return idx;
145
+ return Math.min(bestIndex, idx);
146
+ }, -1);
147
+ }
148
+ function excerptAroundFocus(text, maxBytes, focusTerms) {
149
+ const focusIndex = findFocusIndex(text, focusTerms);
150
+ if (focusIndex === -1) {
151
+ const truncated = truncateUtf8(text, maxBytes);
152
+ return truncated.length < text.length ? `${truncated}...` : truncated;
153
+ }
154
+ const contextChars = Math.max(80, Math.floor(maxBytes / 4));
155
+ const start = Math.max(0, focusIndex - contextChars);
156
+ const end = Math.min(text.length, focusIndex + contextChars * 2);
157
+ const slice = text.slice(start, end).trim();
158
+ const prefix = start > 0 ? '...' : '';
159
+ const suffix = end < text.length ? '...' : '';
160
+ const excerpt = `${prefix}${slice}${suffix}`;
161
+ return truncateUtf8(excerpt, maxBytes);
162
+ }
163
+ function createBranchSummarySegment(body) {
164
+ const trimmed = body.trim();
165
+ return trimmed.length === 0
166
+ ? null
167
+ : {
168
+ kind: 'branch_summary',
169
+ tier: 'structural_context',
170
+ source: 'deterministic_structure',
171
+ title: 'Branch Summary',
172
+ body: trimmed,
173
+ };
174
+ }
175
+ function createDownstreamRecapSegment(body) {
176
+ const trimmed = body.trim();
177
+ return trimmed.length === 0
178
+ ? null
179
+ : {
180
+ kind: 'downstream_recap',
181
+ tier: 'structural_context',
182
+ source: 'explicit_durable_fact',
183
+ title: 'Downstream Recap (Preferred Branch)',
184
+ body: trimmed,
185
+ };
186
+ }
187
+ function createAncestryRecapSegment(body) {
188
+ const trimmed = body.trim();
189
+ return trimmed.length === 0
190
+ ? null
191
+ : {
192
+ kind: 'ancestry_recap',
193
+ tier: 'durable_recap',
194
+ source: 'explicit_durable_fact',
195
+ title: 'Ancestry Recap',
196
+ body: trimmed,
197
+ };
198
+ }
199
+ function createFunctionDefinitionsSegment(body) {
200
+ const trimmed = body.trim();
201
+ return trimmed.length === 0
202
+ ? null
203
+ : {
204
+ kind: 'function_definitions',
205
+ tier: 'reference_material',
206
+ source: 'workflow_definition',
207
+ title: 'Function Definitions',
208
+ body: trimmed,
209
+ };
210
+ }
211
+ function createSessionTitlePreviewSegment(body) {
212
+ const trimmed = body.trim();
213
+ return trimmed.length === 0
214
+ ? null
215
+ : {
216
+ kind: 'session_title_preview',
217
+ tier: 'identity_context',
218
+ source: 'persisted_context',
219
+ body: trimmed,
220
+ };
221
+ }
222
+ function createRecapPreviewSegment(body) {
223
+ const trimmed = body.trim();
224
+ return trimmed.length === 0
225
+ ? null
226
+ : {
227
+ kind: 'recap_preview',
228
+ tier: 'durable_recap',
229
+ source: 'explicit_durable_fact',
230
+ body: trimmed,
231
+ };
232
+ }
233
+ function compareRetrievalPackSegments(a, b) {
234
+ const tierDiff = getTierPriority(a.tier) - getTierPriority(b.tier);
235
+ if (tierDiff !== 0)
236
+ return tierDiff;
237
+ const titleDiff = compareAscii(a.title, b.title);
238
+ if (titleDiff !== 0)
239
+ return titleDiff;
240
+ return compareAscii(a.body, b.body);
241
+ }
242
+ function orderRetrievalPackSegments(segments) {
243
+ return [...segments].sort(compareRetrievalPackSegments);
244
+ }
245
+ function renderRetrievalPackSections(segments) {
246
+ return orderRetrievalPackSegments(segments).map((segment) => `### ${segment.title}\n${segment.body}`);
247
+ }
248
+ function compareResumePreviewSegments(a, b) {
249
+ const tierDiff = getResumePreviewTierPriority(a.tier) - getResumePreviewTierPriority(b.tier);
250
+ if (tierDiff !== 0)
251
+ return tierDiff;
252
+ return compareAscii(a.body, b.body);
253
+ }
254
+ function renderBudgetedResumePreview(args) {
255
+ const ordered = [...args.segments].sort(compareResumePreviewSegments);
256
+ if (ordered.length === 0) {
257
+ return { text: '', includedTiers: [] };
258
+ }
259
+ const focusTerms = normalizePreviewFocusTerms(args.focusTerms ?? []);
260
+ const tierTexts = ordered.map((segment) => {
261
+ const maxBytes = getResumePreviewTierMaxBytes(segment.tier);
262
+ return {
263
+ tier: segment.tier,
264
+ text: excerptAroundFocus(segment.body, maxBytes, focusTerms),
265
+ };
266
+ });
267
+ const joined = tierTexts.map((entry) => entry.text).filter((text) => text.length > 0).join('\n\n');
268
+ const finalText = truncateUtf8(joined, exports.RESUME_PREVIEW_CONTRACT.budgetBytes);
269
+ const includedTiers = [...new Set(tierTexts.filter((entry) => entry.text.length > 0).map((entry) => entry.tier))];
270
+ return { text: finalText, includedTiers };
271
+ }
272
+ function renderBudgetedRehydrateRecovery(args) {
273
+ const ordered = orderRetrievalPackSegments(args.segments);
274
+ if (ordered.length === 0) {
275
+ return { text: '', includedTiers: [], omittedTierCount: 0, truncatedWithinTier: false };
276
+ }
277
+ const tiersInOrder = exports.REHYDRATE_RETRIEVAL_CONTRACT.tiers.map((tier) => tier.tier);
278
+ const sectionsByTier = new Map(tiersInOrder.map((tier) => [tier, ordered.filter((segment) => segment.tier === tier).map((segment) => `### ${segment.title}\n${segment.body}`)]));
279
+ const renderFromTiers = (tiers) => {
280
+ const sections = tiers.flatMap((tier) => sectionsByTier.get(tier) ?? []);
281
+ return sections.length === 0 ? '' : `${args.header}\n\n${sections.join('\n\n')}`;
282
+ };
283
+ const initiallyIncludedTiers = tiersInOrder.filter((tier) => (sectionsByTier.get(tier) ?? []).length > 0);
284
+ let includedTiers = initiallyIncludedTiers;
285
+ let recoveryText = renderFromTiers(includedTiers);
286
+ while (encoder.encode(recoveryText).length > constants_js_1.RECOVERY_BUDGET_BYTES) {
287
+ const droppableTierIndex = [...includedTiers]
288
+ .reverse()
289
+ .findIndex((tier) => getTierRetention(tier) === 'tail');
290
+ if (droppableTierIndex === -1) {
291
+ break;
292
+ }
293
+ const actualIndex = includedTiers.length - 1 - droppableTierIndex;
294
+ includedTiers = includedTiers.filter((_, index) => index !== actualIndex);
295
+ recoveryText = renderFromTiers(includedTiers);
296
+ }
297
+ const omittedTierCount = initiallyIncludedTiers.length - includedTiers.length;
298
+ const needsSuffix = omittedTierCount > 0 || encoder.encode(recoveryText).length > constants_js_1.RECOVERY_BUDGET_BYTES || includedTiers.length === 0;
299
+ const finalText = recoveryText.length === 0
300
+ ? trimFinalRecoveryText(args.header, initiallyIncludedTiers.length)
301
+ : !needsSuffix
302
+ ? recoveryText
303
+ : trimFinalRecoveryText(recoveryText, omittedTierCount);
304
+ return {
305
+ text: finalText,
306
+ includedTiers,
307
+ omittedTierCount,
308
+ truncatedWithinTier: encoder.encode(recoveryText).length > constants_js_1.RECOVERY_BUDGET_BYTES || includedTiers.length === 0,
309
+ };
310
+ }
@@ -1,4 +1,5 @@
1
1
  import type { SessionId, WorkflowHash, WorkflowId } from '../durable-core/ids/index.js';
2
+ export { MAX_RESUME_PREVIEW_BYTES } from '../durable-core/constants.js';
2
3
  export type RecapSnippet = string & {
3
4
  readonly __brand: 'RecapSnippet';
4
5
  };
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.MAX_RESUME_CANDIDATES = void 0;
3
+ exports.MAX_RESUME_CANDIDATES = exports.MAX_RESUME_PREVIEW_BYTES = void 0;
4
4
  exports.asRecapSnippet = asRecapSnippet;
5
5
  exports.normalizeToTokens = normalizeToTokens;
6
6
  exports.allQueryTokensMatch = allQueryTokensMatch;
@@ -11,7 +11,10 @@ exports.computeQueryRelevanceScore = computeQueryRelevanceScore;
11
11
  exports.assignTier = assignTier;
12
12
  exports.rankResumeCandidates = rankResumeCandidates;
13
13
  const constants_js_1 = require("../durable-core/constants.js");
14
- const MAX_SNIPPET_BYTES = 1024;
14
+ const retrieval_contract_js_1 = require("../durable-core/domain/retrieval-contract.js");
15
+ var constants_js_2 = require("../durable-core/constants.js");
16
+ Object.defineProperty(exports, "MAX_RESUME_PREVIEW_BYTES", { enumerable: true, get: function () { return constants_js_2.MAX_RESUME_PREVIEW_BYTES; } });
17
+ const MAX_SNIPPET_BYTES = constants_js_1.MAX_RESUME_PREVIEW_BYTES;
15
18
  function asRecapSnippet(raw) {
16
19
  const stripped = raw.endsWith(constants_js_1.TRUNCATION_MARKER)
17
20
  ? raw.slice(0, -constants_js_1.TRUNCATION_MARKER.length)
@@ -145,29 +148,14 @@ function collectMatchReasons(summary, query, tier) {
145
148
  return reasons;
146
149
  }
147
150
  function buildPreviewSnippet(summary, query) {
148
- const previewSource = buildSearchableSessionText(summary);
149
- if (!previewSource)
151
+ const segments = [
152
+ summary.sessionTitle ? (0, retrieval_contract_js_1.createSessionTitlePreviewSegment)(summary.sessionTitle) : null,
153
+ summary.recapSnippet ? (0, retrieval_contract_js_1.createRecapPreviewSegment)(summary.recapSnippet) : null,
154
+ ].filter((segment) => segment !== null);
155
+ if (segments.length === 0)
150
156
  return '';
151
- const queryTokens = query.freeTextQuery ? [...normalizeToTokens(query.freeTextQuery)] : [];
152
- if (queryTokens.length === 0)
153
- return summary.recapSnippet ?? previewSource;
154
- const lower = previewSource.toLowerCase();
155
- let bestIndex = -1;
156
- for (const token of queryTokens) {
157
- if (token.length < 3)
158
- continue;
159
- const idx = lower.indexOf(token);
160
- if (idx !== -1 && (bestIndex === -1 || idx < bestIndex))
161
- bestIndex = idx;
162
- }
163
- if (bestIndex === -1)
164
- return summary.recapSnippet ?? previewSource;
165
- const start = Math.max(0, bestIndex - 100);
166
- const end = Math.min(previewSource.length, bestIndex + 180);
167
- const slice = previewSource.slice(start, end).trim();
168
- const prefix = start > 0 ? '...' : '';
169
- const suffix = end < previewSource.length ? '...' : '';
170
- return `${prefix}${slice}${suffix}`;
157
+ const focusTerms = query.freeTextQuery ? [...normalizeToTokens(query.freeTextQuery)] : [];
158
+ return (0, retrieval_contract_js_1.renderBudgetedResumePreview)({ segments, focusTerms }).text;
171
159
  }
172
160
  function deriveConfidence(tier, reasons) {
173
161
  if (tier.kind === 'matched_exact_id' || tier.kind === 'matched_notes')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "3.9.1",
3
+ "version": "3.10.0",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -20,7 +20,7 @@
20
20
  },
21
21
  "description": {
22
22
  "type": "string",
23
- "description": "What this workflow accomplishes",
23
+ "description": "What this workflow accomplishes. Prefer clear user-facing language for bundled/user-facing workflows; schema does not enforce prose style.",
24
24
  "minLength": 1,
25
25
  "maxLength": 512
26
26
  },
@@ -42,7 +42,7 @@
42
42
  },
43
43
  "clarificationPrompts": {
44
44
  "type": "array",
45
- "description": "Questions to ask upfront to resolve ambiguities",
45
+ "description": "Questions to ask upfront to resolve ambiguities. Prefer direct, user-grounded wording over abstract framework narration.",
46
46
  "items": {
47
47
  "type": "string",
48
48
  "minLength": 1,
@@ -67,7 +67,7 @@
67
67
  },
68
68
  "metaGuidance": {
69
69
  "type": "array",
70
- "description": "Persistent behavioral rules surfaced on start and resume. Not repeated on every step advance. For external document pointers, use 'references' instead.",
70
+ "description": "Persistent behavioral rules surfaced on start and resume. Not repeated on every step advance. Use this to define quality bars and anti-failure guidance without rigidly scripting every thought. For external document pointers, use 'references' instead.",
71
71
  "items": {
72
72
  "type": "string",
73
73
  "minLength": 1,
@@ -127,7 +127,7 @@
127
127
  },
128
128
  "features": {
129
129
  "type": "array",
130
- "description": "Compiler features to apply to this workflow (e.g. wr.features.memory_context). Features inject content into promptBlocks at compile time.",
130
+ "description": "Compiler features to apply to this workflow (e.g. wr.features.memory_context). Features inject content into promptBlocks at compile time and work best when the workflow uses structured promptBlocks instead of relying entirely on raw prompt prose.",
131
131
  "items": {
132
132
  "type": "string",
133
133
  "minLength": 1,
@@ -137,7 +137,7 @@
137
137
  },
138
138
  "extensionPoints": {
139
139
  "type": "array",
140
- "description": "Bounded cognitive slots that users can customize via .workrail/bindings.json. Each slot is referenced in step prompts via {{wr.bindings.slotId}} and resolved at compile time.",
140
+ "description": "Bounded cognitive slots that users can customize via .workrail/bindings.json. Each slot is referenced in step prompts via {{wr.bindings.slotId}} and resolved at compile time. Use extension points for bounded cognitive units, not for core orchestration or final synthesis ownership.",
141
141
  "items": {
142
142
  "$ref": "#/$defs/extensionPoint"
143
143
  },
@@ -163,7 +163,7 @@
163
163
  "$defs": {
164
164
  "extensionPoint": {
165
165
  "type": "object",
166
- "description": "A bounded cognitive slot that can be customized via .workrail/bindings.json",
166
+ "description": "A bounded cognitive slot that can be customized via .workrail/bindings.json. Good uses include candidate generation, review, and validation passes; poor uses include replacing the parent workflow's core orchestration contract.",
167
167
  "properties": {
168
168
  "slotId": {
169
169
  "type": "string",
@@ -283,7 +283,7 @@
283
283
  "properties": {
284
284
  "id": { "$ref": "#/$defs/stepId", "description": "Unique identifier for the step" },
285
285
  "title": { "type": "string", "minLength": 1, "maxLength": 128 },
286
- "prompt": { "type": "string", "minLength": 1, "maxLength": 8192 },
286
+ "prompt": { "type": "string", "minLength": 1, "maxLength": 8192, "description": "Traditional single-string prompt. Use when structure is simple; prefer promptBlocks when you need stronger quality bars without over-scripting cognition." },
287
287
  "promptBlocks": { "$ref": "#/$defs/promptBlocks" },
288
288
  "agentRole": { "type": "string", "minLength": 10, "maxLength": 1024 },
289
289
  "guidance": { "type": "array", "items": { "type": "string" } },
@@ -19,19 +19,22 @@
19
19
  "DEFAULT BEHAVIOR: self-execute with tools. Only ask for missing external artifacts, permissions, or business context you cannot resolve yourself.",
20
20
  "V2 DURABILITY: use output.notesMarkdown and explicit `continue_workflow` context keys as durable workflow state. Do NOT rely on the review document as required workflow memory.",
21
21
  "ARTIFACT STRATEGY: `reviewDocPath` is an optional human-facing artifact only. Create or update it only when it materially improves handoff or readability. Workflow truth lives in notes and explicit context fields.",
22
+ "NOTES QUALITY: notes should work for both a human reader and a future agent resuming later. For important phases, make clear what was learned, what was decided, what remains uncertain, and what should happen next.",
22
23
  "OWNERSHIP & DELEGATION: the main agent owns truth, synthesis, severity calibration, recommendation, and final handoff. Delegate only bounded reviewer or validation work through the WorkRail Executor.",
23
24
  "SUBAGENT SYNTHESIS: treat reviewer-family and validator output as evidence, not conclusions. State your current hypothesis before delegation, then say what was confirmed, what was new, what you reject, and what changed your mind.",
24
25
  "PARALLELISM: parallelize independent cognition; serialize canonical synthesis, coverage-ledger updates, recommendation decisions, and document writes.",
25
26
  "REVIEW MODEL: first build shared understanding, then freeze a neutral fact packet, then let reviewer families challenge it in parallel, then reconcile contradictions explicitly.",
26
27
  "COVERAGE LEDGER: explicitly track review domains as `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`. Do not finalize with unresolved material gaps unless you name them clearly.",
27
28
  "TRIGGERS: WorkRail can only react to explicit fields. Use structural fields such as `contextUnknownCount`, `criticalSurfaceTouched`, `coverageUncertainCount`, `contradictionCount`, `falsePositiveRiskCount`, `blindSpotCount`, and `needsSimulation`.",
29
+ "BOUNDARY DISCIPLINE: attempt to determine the real review target and the likely ancestor-relative review surface. If that confidence remains weak, continue with downgraded confidence and disclose the limitation clearly instead of pretending certainty.",
30
+ "SOURCE DISCOVERY: opportunistically recover PR context, ticket/docs context, and repo/user policy context from the strongest available sources. Missing sources should usually lower confidence and be disclosed, not block the workflow.",
28
31
  "BOUNDARY: do not post comments, approve, reject, or merge unless the user explicitly asks. Produce findings, recommendation, and handoff material only."
29
32
  ],
30
33
  "steps": [
31
34
  {
32
35
  "id": "phase-0-understand-and-classify",
33
- "title": "Phase 0: Understand & Classify",
34
- "prompt": "Build understanding and classify the review in one pass.\n\nStep 1 — Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Explore:\nUse tools to build the minimum complete understanding needed to review accurately. Read independent files in parallel when possible.\n\nGather:\n- MR title and purpose, if discoverable\n- ticket or acceptance-criteria context when available\n- changed files overview and changed-file count\n- module roots, call chain highlights, public contracts, impacted consumers, and repo patterns that matter\n- explicit unknowns, likely blind spots, and whether author intent remains unclear\n- whether any critical surface is touched\n\nStep 3 — Classify after exploration:\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nStep 4 — Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and understanding still feels incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 5 — Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- if the review target is missing entirely, ask only for that missing artifact\n- classify AFTER exploring, not before",
36
+ "title": "Phase 0: Locate, Bound, Enrich & Classify",
37
+ "prompt": "Build the review foundation in one pass.\n\nStep 1 — Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Locate and bound the review target:\nAttempt to determine the strongest available review target and boundary.\n\nAttempt to establish:\n- `reviewTargetKind` from the strongest available source such as PR/MR, branch, patch, diff, or local working tree changes\n- `reviewTargetSource` describing where the target came from\n- likely PR/MR identity when available (`prUrl`, `prNumber`)\n- likely base / ancestor reference (`baseCandidate`, `mergeBaseRef`) when available\n- whether the branch may include inherited or out-of-scope changes\n- `boundaryConfidence`: High / Medium / Low\n\nDo not over-prescribe your own investigation path. Use the strongest available evidence and record uncertainty honestly.\n\nStep 3 — Enrich with context:\nRecover the strongest available intent and policy context from whatever sources are actually available.\n\nAttempt to recover:\n- MR title and purpose\n- ticket / issue / acceptance context (`ticketRefs`, `ticketContext`)\n- supporting docs / specs / rollout context (`supportingDocsFound`)\n- repo or user policy/convention context when it is likely to affect review judgment (`policySourcesFound`)\n- `contextConfidence`: High / Medium / Low\n\nStep 4 Review-surface hygiene:\nClassify the visible change into a minimal review surface.\n\nSet:\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n\nThe goal is not a giant ledger. The goal is to avoid treating every visible changed file as equally worthy of deep review by default.\n\nStep 5 — Classify the review:\nAfter exploration, classify the work.\n\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `shapeProfile`: choose the best primary label from `isolated_change`, `crosscutting_change`, `mechanically_noisy_change`, or `ambiguous_boundary`\n- `changeTypeProfile`: choose the best primary label from `general_code_change`, `api_contract_change`, `data_model_or_migration`, `security_sensitive`, or `test_only`\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n- `needsBoundaryFollowup`: true / false\n- `needsContextFollowup`: true / false\n- `needsReviewerBundle`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nMinimal routing guidance:\n- if `boundaryConfidence = Low`, bias toward boundary/context follow-up before strong recommendation confidence\n- if `changeTypeProfile = api_contract_change`, bias toward contract/consumer/backward-compatibility scrutiny\n- if `changeTypeProfile = data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny\n- if `changeTypeProfile = security_sensitive`, bias toward adversarial/runtime-risk scrutiny and lower tolerance for weak evidence\n- if `changeTypeProfile = test_only`, bias toward stronger false-positive suppression\n- if `shapeProfile = mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings\n\nStep 6 — Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and context remains incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 7 — Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nFallback behavior:\n- if PR/MR is not found but a branch/diff is inspectable, continue with downgraded context confidence and disclose missing PR context later\n- if the branch is inspectable but merge-base / ancestor remains ambiguous, continue with downgraded boundary confidence, set `needsBoundaryFollowup = true`, and disclose the uncertainty later\n- if ticket or supporting docs are missing, continue with downgraded context confidence and avoid overclaiming intent-sensitive findings\n- if only a patch/diff is available, continue if it is inspectable, but keep lower confidence on intent/boundary-dependent conclusions\n- if the review target itself is missing, ask only for that missing artifact and stop\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewTargetKind`\n- `reviewTargetSource`\n- `prUrl`\n- `prNumber`\n- `baseCandidate`\n- `mergeBaseRef`\n- `boundaryConfidence`\n- `contextConfidence`\n- `mrTitle`\n- `mrPurpose`\n- `ticketRefs`\n- `ticketContext`\n- `supportingDocsFound`\n- `policySourcesFound`\n- `accessibleContextSources`\n- `missingContextSources`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `shapeProfile`\n- `changeTypeProfile`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `needsBoundaryFollowup`\n- `needsContextFollowup`\n- `needsReviewerBundle`\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before\n- before leaving this phase, either establish the likely review boundary or explicitly record why you could not",
35
38
  "requireConfirmation": {
36
39
  "or": [
37
40
  { "var": "reviewMode", "equals": "THOROUGH" },
@@ -58,12 +61,13 @@
58
61
  "Keep `recommendationHypothesis` as a secondary hypothesis to challenge, not a frame to defend."
59
62
  ],
60
63
  "procedure": [
61
- "Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, and explicit open unknowns.",
64
+ "Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, review target and review-surface summary, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, discovered ticket/doc/policy context, accessible and missing context sources, and explicit open unknowns.",
62
65
  "Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
63
66
  "Perform a preliminary self-review from the fact packet before choosing reviewer families.",
64
67
  "Reviewer family options: `correctness_invariants`, `patterns_architecture`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
65
68
  "Selection guidance: QUICK = no bundle by default unless ambiguity still feels material; STANDARD = 3 families by default; THOROUGH = 5 families by default.",
66
69
  "Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial.",
70
+ "Routing guidance: for `api_contract_change`, bias toward contract / consumer / backward-compatibility scrutiny; for `data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny; for `security_sensitive`, bias toward runtime-risk scrutiny and lower tolerance for weak evidence; for `test_only`, bias toward stronger false-positive suppression; for `mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings.",
67
71
  "Set `coverageUncertainCount` as the number of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`.",
68
72
  "Initialize `contradictionCount`, `blindSpotCount`, and `falsePositiveRiskCount` to `0` if no reviewer-family bundle will run."
69
73
  ],
@@ -191,8 +195,9 @@
191
195
  "Before delegating, state: what is your current recommendation, where are you least confident, and what finding would most likely change your mind now?",
192
196
  "Mode-adaptive validation: QUICK = self-validate and optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` if a serious uncertainty remains; STANDARD = if validation is required and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and either `routine-execution-simulation` or `routine-plan-analysis`; THOROUGH = if validation is required and delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation` when needed, and `routine-plan-analysis`.",
193
197
  "After receiving validator output, explicitly synthesize what was confirmed, what was new, what appears weak, and whether your recommendation changed.",
198
+ "Perform a compact confidence assessment using these dimensions: `boundaryConfidence`, `intentConfidence`, `evidenceConfidence`, `coverageConfidence`, and `consensusConfidence`. Rate each as High / Medium / Low, explain each in one sentence, and then derive final recommendation confidence with these rules: if boundary is Low, final confidence is Low; else if evidence is Low, final confidence is Low; else if 2 or more dimensions are Medium, final confidence is Medium; else if all key dimensions are High, final confidence is High. Unresolved disagreement can only lower confidence, never raise it.",
194
199
  "Compute `docCompletenessConcernCount` by counting one concern for each material packaging gap: missing rationale for any Critical or Major finding, missing ready-to-post MR comment for any Critical or Major finding, recommendation mismatch with canonical findings, still-uncertain / contradicted / needs-followup coverage domains not summarized clearly, or any missing required final section needed for actionability.",
195
- "Set these keys in the next `continue_workflow` call's `context` object: `validatorConsensusLevel`, `validationSummary`, `recommendationConfidenceBand`, `docCompletenessConcernCount`."
200
+ "Set these keys in the next `continue_workflow` call's `context` object: `intentConfidence`, `evidenceConfidence`, `coverageConfidence`, `consensusConfidence`, `confidenceAssessmentSummary`, `validatorConsensusLevel`, `validationSummary`, `recommendationConfidenceBand`, `docCompletenessConcernCount`."
196
201
  ],
197
202
  "verify": [
198
203
  "If 2+ validators still raise serious concerns, confidence is downgraded and synthesis is reopened.",
@@ -210,7 +215,7 @@
210
215
  {
211
216
  "id": "phase-6-final-handoff",
212
217
  "title": "Phase 6: Final Handoff",
213
- "prompt": "Provide the final MR review handoff.\n\nInclude:\n- MR title and purpose\n- review mode used\n- final recommendation and confidence band\n- counts of Critical / Major / Minor / Nit findings\n- top findings with rationale\n- strongest remaining areas of uncertainty, if any\n- summary of the coverage ledger, especially any still-uncertain domains\n- ready-to-post MR comments summary\n- any validation outcomes a human reviewer should see\n- path to the full human-facing review artifact (`reviewDocPath`) only if one was created\n\nRules:\n- the final recommendation assists a human reviewer; it does not replace them\n- if `reviewDocPath` exists, treat it as a human-facing companion artifact only\n- do not post comments, approve, reject, or merge unless the user explicitly asks",
218
+ "prompt": "Provide the final MR review handoff.\n\nInclude:\n- MR title and purpose\n- review mode used\n- final recommendation and confidence band\n- confidence assessment summary, including the most important reason confidence was capped if it was not High\n- counts of Critical / Major / Minor / Nit findings\n- top findings with rationale\n- strongest remaining areas of uncertainty, if any\n- summary of the coverage ledger, especially any still-uncertain domains\n- ready-to-post MR comments summary\n- any validation outcomes a human reviewer should see\n- review environment status:\n - what review target/context sources were successfully used\n - what important sources were missing or ambiguous\n - boundary confidence and context confidence\n - how those limits affected the review\n- path to the full human-facing review artifact (`reviewDocPath`) only if one was created\n\nRules:\n- the final recommendation assists a human reviewer; it does not replace them\n- if `reviewDocPath` exists, treat it as a human-facing companion artifact only\n- be explicit when missing PR/ticket/doc/boundary context limited confidence\n- do not post comments, approve, reject, or merge unless the user explicitly asks",
214
219
  "requireConfirmation": true
215
220
  }
216
221
  ]